Refactor: Format Code with Ruff and Update Deprecated G2PW Link (#2255)

* ruff check --fix * ruff format --line-length 120 --target-version py39 * Change the link for G2PW Model * update pytorch version and colab
2025-04-07 09:42:47 +01:00
parent 9da7e17efe
commit 53cac93589
132 changed files with 8185 additions and 6648 deletions
--- a/GPT_SoVITS/prepare_datasets/1-get-text.py
+++ b/GPT_SoVITS/prepare_datasets/1-get-text.py
@@ -8,19 +8,17 @@ exp_name = os.environ.get("exp_name")
 i_part = os.environ.get("i_part")
 all_parts = os.environ.get("all_parts")
 if "_CUDA_VISIBLE_DEVICES" in os.environ:
-     os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
+    os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
 opt_dir = os.environ.get("opt_dir")
 bert_pretrained_dir = os.environ.get("bert_pretrained_dir")
 import torch
+
 is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
-version = os.environ.get('version', None)
-import sys, numpy as np, traceback, pdb
+version = os.environ.get("version", None)
+import traceback
 import os.path
-from glob import glob
-from tqdm import tqdm
 from text.cleaner import clean_text
 from transformers import AutoModelForMaskedLM, AutoTokenizer
-import numpy as np
 from tools.my_utils import clean_path

 # inp_text=sys.argv[1]
@@ -36,13 +34,13 @@ from time import time as ttime
 import shutil


-def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
-    dir=os.path.dirname(path)
-    name=os.path.basename(path)
+def my_save(fea, path):  #####fix issue: torch.save doesn't support chinese path
+    dir = os.path.dirname(path)
+    name = os.path.basename(path)
    # tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part)
-    tmp_path="%s%s.pth"%(ttime(),i_part)
-    torch.save(fea,tmp_path)
-    shutil.move(tmp_path,"%s/%s"%(dir,name))
+    tmp_path = "%s%s.pth" % (ttime(), i_part)
+    torch.save(fea, tmp_path)
+    shutil.move(tmp_path, "%s/%s" % (dir, name))


 txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part)
@@ -56,8 +54,10 @@ if os.path.exists(txt_path) == False:
    #     device = "mps"
    else:
        device = "cpu"
-    if os.path.exists(bert_pretrained_dir):...
-    else:raise FileNotFoundError(bert_pretrained_dir)
+    if os.path.exists(bert_pretrained_dir):
+        ...
+    else:
+        raise FileNotFoundError(bert_pretrained_dir)
    tokenizer = AutoTokenizer.from_pretrained(bert_pretrained_dir)
    bert_model = AutoModelForMaskedLM.from_pretrained(bert_pretrained_dir)
    if is_half == True:
@@ -86,12 +86,10 @@ if os.path.exists(txt_path) == False:
    def process(data, res):
        for name, text, lan in data:
            try:
-                name=clean_path(name)
+                name = clean_path(name)
                name = os.path.basename(name)
                print(name)
-                phones, word2ph, norm_text = clean_text(
-                    text.replace("%", "-").replace("￥", ","), lan, version
-                )
+                phones, word2ph, norm_text = clean_text(text.replace("%", "-").replace("￥", ","), lan, version)
                path_bert = "%s/%s.pt" % (bert_dir, name)
                if os.path.exists(path_bert) == False and lan == "zh":
                    bert_feature = get_bert_feature(norm_text, word2ph)
@@ -131,9 +129,7 @@ if os.path.exists(txt_path) == False:
            wav_name, spk_name, language, text = line.split("|")
            # todo.append([name,text,"zh"])
            if language in language_v1_to_language_v2.keys():
-                todo.append(
-                    [wav_name, text, language_v1_to_language_v2.get(language, language)]
-                )
+                todo.append([wav_name, text, language_v1_to_language_v2.get(language, language)])
            else:
                print(f"\033[33m[Waring] The {language = } of {wav_name} is not supported for training.\033[0m")
        except: