Refactor: Format Code with Ruff and Update Deprecated G2PW Link (#2255)

* ruff check --fix * ruff format --line-length 120 --target-version py39 * Change the link for G2PW Model * update pytorch version and colab
2025-04-07 09:42:47 +01:00
parent 9da7e17efe
commit 53cac93589
132 changed files with 8185 additions and 6648 deletions
--- a/GPT_SoVITS/prepare_datasets/1-get-text.py
+++ b/GPT_SoVITS/prepare_datasets/1-get-text.py
@@ -8,19 +8,17 @@ exp_name = os.environ.get("exp_name")
 i_part = os.environ.get("i_part")
 all_parts = os.environ.get("all_parts")
 if "_CUDA_VISIBLE_DEVICES" in os.environ:
-     os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
+    os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
 opt_dir = os.environ.get("opt_dir")
 bert_pretrained_dir = os.environ.get("bert_pretrained_dir")
 import torch
+
 is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
-version = os.environ.get('version', None)
-import sys, numpy as np, traceback, pdb
+version = os.environ.get("version", None)
+import traceback
 import os.path
-from glob import glob
-from tqdm import tqdm
 from text.cleaner import clean_text
 from transformers import AutoModelForMaskedLM, AutoTokenizer
-import numpy as np
 from tools.my_utils import clean_path

 # inp_text=sys.argv[1]
@@ -36,13 +34,13 @@ from time import time as ttime
 import shutil


-def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
-    dir=os.path.dirname(path)
-    name=os.path.basename(path)
+def my_save(fea, path):  #####fix issue: torch.save doesn't support chinese path
+    dir = os.path.dirname(path)
+    name = os.path.basename(path)
    # tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part)
-    tmp_path="%s%s.pth"%(ttime(),i_part)
-    torch.save(fea,tmp_path)
-    shutil.move(tmp_path,"%s/%s"%(dir,name))
+    tmp_path = "%s%s.pth" % (ttime(), i_part)
+    torch.save(fea, tmp_path)
+    shutil.move(tmp_path, "%s/%s" % (dir, name))


 txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part)
@@ -56,8 +54,10 @@ if os.path.exists(txt_path) == False:
    #     device = "mps"
    else:
        device = "cpu"
-    if os.path.exists(bert_pretrained_dir):...
-    else:raise FileNotFoundError(bert_pretrained_dir)
+    if os.path.exists(bert_pretrained_dir):
+        ...
+    else:
+        raise FileNotFoundError(bert_pretrained_dir)
    tokenizer = AutoTokenizer.from_pretrained(bert_pretrained_dir)
    bert_model = AutoModelForMaskedLM.from_pretrained(bert_pretrained_dir)
    if is_half == True:
@@ -86,12 +86,10 @@ if os.path.exists(txt_path) == False:
    def process(data, res):
        for name, text, lan in data:
            try:
-                name=clean_path(name)
+                name = clean_path(name)
                name = os.path.basename(name)
                print(name)
-                phones, word2ph, norm_text = clean_text(
-                    text.replace("%", "-").replace("￥", ","), lan, version
-                )
+                phones, word2ph, norm_text = clean_text(text.replace("%", "-").replace("￥", ","), lan, version)
                path_bert = "%s/%s.pt" % (bert_dir, name)
                if os.path.exists(path_bert) == False and lan == "zh":
                    bert_feature = get_bert_feature(norm_text, word2ph)
@@ -131,9 +129,7 @@ if os.path.exists(txt_path) == False:
            wav_name, spk_name, language, text = line.split("|")
            # todo.append([name,text,"zh"])
            if language in language_v1_to_language_v2.keys():
-                todo.append(
-                    [wav_name, text, language_v1_to_language_v2.get(language, language)]
-                )
+                todo.append([wav_name, text, language_v1_to_language_v2.get(language, language)])
            else:
                print(f"\033[33m[Waring] The {language = } of {wav_name} is not supported for training.\033[0m")
        except:
--- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py
+++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py
@@ -1,25 +1,31 @@
 # -*- coding: utf-8 -*-

-import sys,os
-inp_text=                           os.environ.get("inp_text")
-inp_wav_dir=                        os.environ.get("inp_wav_dir")
-exp_name=                           os.environ.get("exp_name")
-i_part=                             os.environ.get("i_part")
-all_parts=                          os.environ.get("all_parts")
+import sys
+import os
+
+inp_text = os.environ.get("inp_text")
+inp_wav_dir = os.environ.get("inp_wav_dir")
+exp_name = os.environ.get("exp_name")
+i_part = os.environ.get("i_part")
+all_parts = os.environ.get("all_parts")
 if "_CUDA_VISIBLE_DEVICES" in os.environ:
-     os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
+    os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
 from feature_extractor import cnhubert
-opt_dir=                            os.environ.get("opt_dir")
-cnhubert.cnhubert_base_path=                os.environ.get("cnhubert_base_dir")
+
+opt_dir = os.environ.get("opt_dir")
+cnhubert.cnhubert_base_path = os.environ.get("cnhubert_base_dir")
 import torch
+
 is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()

-import pdb,traceback,numpy as np,logging
+import traceback
+import numpy as np
 from scipy.io import wavfile
 import librosa
+
 now_dir = os.getcwd()
 sys.path.append(now_dir)
-from tools.my_utils import load_audio,clean_path
+from tools.my_utils import load_audio, clean_path

 # from config import cnhubert_base_path
 # cnhubert.cnhubert_base_path=cnhubert_base_path
@@ -34,90 +40,95 @@ from tools.my_utils import load_audio,clean_path

 from time import time as ttime
 import shutil
-def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
-    dir=os.path.dirname(path)
-    name=os.path.basename(path)
+
+
+def my_save(fea, path):  #####fix issue: torch.save doesn't support chinese path
+    dir = os.path.dirname(path)
+    name = os.path.basename(path)
    # tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part)
-    tmp_path="%s%s.pth"%(ttime(),i_part)
-    torch.save(fea,tmp_path)
-    shutil.move(tmp_path,"%s/%s"%(dir,name))
+    tmp_path = "%s%s.pth" % (ttime(), i_part)
+    torch.save(fea, tmp_path)
+    shutil.move(tmp_path, "%s/%s" % (dir, name))

-hubert_dir="%s/4-cnhubert"%(opt_dir)
-wav32dir="%s/5-wav32k"%(opt_dir)
-os.makedirs(opt_dir,exist_ok=True)
-os.makedirs(hubert_dir,exist_ok=True)
-os.makedirs(wav32dir,exist_ok=True)

-maxx=0.95
-alpha=0.5
+hubert_dir = "%s/4-cnhubert" % (opt_dir)
+wav32dir = "%s/5-wav32k" % (opt_dir)
+os.makedirs(opt_dir, exist_ok=True)
+os.makedirs(hubert_dir, exist_ok=True)
+os.makedirs(wav32dir, exist_ok=True)
+
+maxx = 0.95
+alpha = 0.5
 if torch.cuda.is_available():
    device = "cuda:0"
 # elif torch.backends.mps.is_available():
 #     device = "mps"
 else:
    device = "cpu"
-model=cnhubert.get_model()
+model = cnhubert.get_model()
 # is_half=False
-if(is_half==True):
-    model=model.half().to(device)
+if is_half == True:
+    model = model.half().to(device)
 else:
    model = model.to(device)

-nan_fails=[]
-def name2go(wav_name,wav_path):
-    hubert_path="%s/%s.pt"%(hubert_dir,wav_name)
-    if(os.path.exists(hubert_path)):return
+nan_fails = []
+
+
+def name2go(wav_name, wav_path):
+    hubert_path = "%s/%s.pt" % (hubert_dir, wav_name)
+    if os.path.exists(hubert_path):
+        return
    tmp_audio = load_audio(wav_path, 32000)
    tmp_max = np.abs(tmp_audio).max()
    if tmp_max > 2.2:
        print("%s-filtered,%s" % (wav_name, tmp_max))
        return
-    tmp_audio32 = (tmp_audio / tmp_max * (maxx * alpha*32768)) + ((1 - alpha)*32768) * tmp_audio
-    tmp_audio32b = (tmp_audio / tmp_max * (maxx * alpha*1145.14)) + ((1 - alpha)*1145.14) * tmp_audio
-    tmp_audio = librosa.resample(
-        tmp_audio32b, orig_sr=32000, target_sr=16000
-    )#不是重采样问题
+    tmp_audio32 = (tmp_audio / tmp_max * (maxx * alpha * 32768)) + ((1 - alpha) * 32768) * tmp_audio
+    tmp_audio32b = (tmp_audio / tmp_max * (maxx * alpha * 1145.14)) + ((1 - alpha) * 1145.14) * tmp_audio
+    tmp_audio = librosa.resample(tmp_audio32b, orig_sr=32000, target_sr=16000)  # 不是重采样问题
    tensor_wav16 = torch.from_numpy(tmp_audio)
-    if (is_half == True):
-        tensor_wav16=tensor_wav16.half().to(device)
+    if is_half == True:
+        tensor_wav16 = tensor_wav16.half().to(device)
    else:
        tensor_wav16 = tensor_wav16.to(device)
-    ssl=model.model(tensor_wav16.unsqueeze(0))["last_hidden_state"].transpose(1,2).cpu()#torch.Size([1, 768, 215])
-    if np.isnan(ssl.detach().numpy()).sum()!= 0:
-        nan_fails.append((wav_name,wav_path))
-        print("nan filtered:%s"%wav_name)
+    ssl = model.model(tensor_wav16.unsqueeze(0))["last_hidden_state"].transpose(1, 2).cpu()  # torch.Size([1, 768, 215])
+    if np.isnan(ssl.detach().numpy()).sum() != 0:
+        nan_fails.append((wav_name, wav_path))
+        print("nan filtered:%s" % wav_name)
        return
    wavfile.write(
-        "%s/%s"%(wav32dir,wav_name),
+        "%s/%s" % (wav32dir, wav_name),
        32000,
        tmp_audio32.astype("int16"),
    )
-    my_save(ssl,hubert_path)
+    my_save(ssl, hubert_path)

-with open(inp_text,"r",encoding="utf8")as f:
-    lines=f.read().strip("\n").split("\n")

-for line in lines[int(i_part)::int(all_parts)]:
+with open(inp_text, "r", encoding="utf8") as f:
+    lines = f.read().strip("\n").split("\n")
+
+for line in lines[int(i_part) :: int(all_parts)]:
    try:
        # wav_name,text=line.split("\t")
        wav_name, spk_name, language, text = line.split("|")
-        wav_name=clean_path(wav_name)
-        if (inp_wav_dir != "" and inp_wav_dir != None):
+        wav_name = clean_path(wav_name)
+        if inp_wav_dir != "" and inp_wav_dir != None:
            wav_name = os.path.basename(wav_name)
-            wav_path = "%s/%s"%(inp_wav_dir, wav_name)
+            wav_path = "%s/%s" % (inp_wav_dir, wav_name)

        else:
-            wav_path=wav_name
+            wav_path = wav_name
            wav_name = os.path.basename(wav_name)
-        name2go(wav_name,wav_path)
+        name2go(wav_name, wav_path)
    except:
-        print(line,traceback.format_exc())
+        print(line, traceback.format_exc())

-if(len(nan_fails)>0 and is_half==True):
-    is_half=False
-    model=model.float()
+if len(nan_fails) > 0 and is_half == True:
+    is_half = False
+    model = model.float()
    for wav in nan_fails:
        try:
-            name2go(wav[0],wav[1])
+            name2go(wav[0], wav[1])
        except:
-            print(wav_name,traceback.format_exc())
+            print(wav_name, traceback.format_exc())
--- a/GPT_SoVITS/prepare_datasets/3-get-semantic.py
+++ b/GPT_SoVITS/prepare_datasets/3-get-semantic.py
@@ -5,13 +5,15 @@ exp_name = os.environ.get("exp_name")
 i_part = os.environ.get("i_part")
 all_parts = os.environ.get("all_parts")
 if "_CUDA_VISIBLE_DEVICES" in os.environ:
-     os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
+    os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
 opt_dir = os.environ.get("opt_dir")
 pretrained_s2G = os.environ.get("pretrained_s2G")
 s2config_path = os.environ.get("s2config_path")

-if os.path.exists(pretrained_s2G):...
-else:raise FileNotFoundError(pretrained_s2G)
+if os.path.exists(pretrained_s2G):
+    ...
+else:
+    raise FileNotFoundError(pretrained_s2G)
 # version=os.environ.get("version","v2")
 size = os.path.getsize(pretrained_s2G)
 if size < 82978 * 1024:
@@ -25,23 +27,22 @@ elif size < 700 * 1024 * 1024:
 else:
    version = "v3"
 import torch
+
 is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
-import math, traceback
-import multiprocessing
-import sys, pdb
+import traceback
+import sys

 now_dir = os.getcwd()
 sys.path.append(now_dir)
-from random import shuffle
-import torch.multiprocessing as mp
-from glob import glob
-from tqdm import tqdm
-import logging, librosa, utils
-if version!="v3":
+import logging
+import utils
+
+if version != "v3":
    from module.models import SynthesizerTrn
 else:
    from module.models import SynthesizerTrnV3 as SynthesizerTrn
 from tools.my_utils import clean_path
+
 logging.getLogger("numba").setLevel(logging.WARNING)
 # from config import pretrained_s2G

@@ -70,7 +71,7 @@ if os.path.exists(semantic_path) == False:
        hps.train.segment_size // hps.data.hop_length,
        n_speakers=hps.data.n_speakers,
        version=version,
-        **hps.model
+        **hps.model,
    )
    if is_half == True:
        vq_model = vq_model.half().to(device)
@@ -107,7 +108,7 @@ if os.path.exists(semantic_path) == False:
        try:
            # wav_name,text=line.split("\t")
            wav_name, spk_name, language, text = line.split("|")
-            wav_name=clean_path(wav_name)
+            wav_name = clean_path(wav_name)
            wav_name = os.path.basename(wav_name)
            # name2go(name,lines1)
            name2go(wav_name, lines1)