Refactor: Format Code with Ruff and Update Deprecated G2PW Link (#2255)

* ruff check --fix * ruff format --line-length 120 --target-version py39 * Change the link for G2PW Model * update pytorch version and colab
2025-04-07 09:42:47 +01:00
parent 9da7e17efe
commit 53cac93589
132 changed files with 8185 additions and 6648 deletions
--- a/GPT_SoVITS/text/english.py
+++ b/GPT_SoVITS/text/english.py
@@ -8,10 +8,10 @@ from text.symbols import punctuation

 from text.symbols2 import symbols

-import unicodedata
 from builtins import str as unicode
 from text.en_normalization.expend import normalize
 from nltk.tokenize import TweetTokenizer
+
 word_tokenize = TweetTokenizer().tokenize
 from nltk import pos_tag

@@ -122,9 +122,9 @@ def replace_phs(phs):


 def replace_consecutive_punctuation(text):
-    punctuations = ''.join(re.escape(p) for p in punctuation)
-    pattern = f'([{punctuations}\s])([{punctuations}])+'
-    result = re.sub(pattern, r'\1', text)
+    punctuations = "".join(re.escape(p) for p in punctuation)
+    pattern = f"([{punctuations}\s])([{punctuations}])+"
+    result = re.sub(pattern, r"\1", text)
    return result


@@ -183,6 +183,7 @@ def read_dict_new():

    return g2p_dict

+
 def hot_reload_hot(g2p_dict):
    with open(CMU_DICT_HOT_PATH) as f:
        line = f.readline()
@@ -259,9 +260,12 @@ class en_G2p(G2p):
            del self.cmu[word.lower()]

        # 修正多音字
-        self.homograph2features["read"] = (['R', 'IY1', 'D'], ['R', 'EH1', 'D'], 'VBP')
-        self.homograph2features["complex"] = (['K', 'AH0', 'M', 'P', 'L', 'EH1', 'K', 'S'], ['K', 'AA1', 'M', 'P', 'L', 'EH0', 'K', 'S'], 'JJ')
-
+        self.homograph2features["read"] = (["R", "IY1", "D"], ["R", "EH1", "D"], "VBP")
+        self.homograph2features["complex"] = (
+            ["K", "AH0", "M", "P", "L", "EH1", "K", "S"],
+            ["K", "AA1", "M", "P", "L", "EH0", "K", "S"],
+            "JJ",
+        )

    def __call__(self, text):
        # tokenization
@@ -280,7 +284,7 @@ class en_G2p(G2p):
            elif len(word) == 1:
                # 单读 A 发音修正, 这里需要原格式 o_word 判断大写
                if o_word == "A":
-                    pron = ['EY1']
+                    pron = ["EY1"]
                else:
                    pron = self.cmu[word][0]
            # g2p_en 原版多音字处理
@@ -289,7 +293,7 @@ class en_G2p(G2p):
                if pos.startswith(pos1):
                    pron = pron1
                # pos1比pos长仅出现在read
-                elif len(pos) < len(pos1) and pos == pos1[:len(pos)]:
+                elif len(pos) < len(pos1) and pos == pos1[: len(pos)]:
                    pron = pron1
                else:
                    pron = pron2
@@ -302,7 +306,6 @@ class en_G2p(G2p):

        return prons[:-1]

-
    def qryword(self, o_word):
        word = o_word.lower()

@@ -320,7 +323,7 @@ class en_G2p(G2p):
            for w in word:
                # 单读 A 发音修正, 此处不存在大写的情况
                if w == "a":
-                    phones.extend(['EY1'])
+                    phones.extend(["EY1"])
                elif not w.isalpha():
                    phones.extend([w])
                else:
@@ -331,23 +334,23 @@ class en_G2p(G2p):
        if re.match(r"^([a-z]+)('s)$", word):
            phones = self.qryword(word[:-2])[:]
            # P T K F TH HH 无声辅音结尾 's 发 ['S']
-            if phones[-1] in ['P', 'T', 'K', 'F', 'TH', 'HH']:
-                phones.extend(['S'])
+            if phones[-1] in ["P", "T", "K", "F", "TH", "HH"]:
+                phones.extend(["S"])
            # S Z SH ZH CH JH 擦声结尾 's 发 ['IH1', 'Z'] 或 ['AH0', 'Z']
-            elif phones[-1] in ['S', 'Z', 'SH', 'ZH', 'CH', 'JH']:
-                phones.extend(['AH0', 'Z'])
+            elif phones[-1] in ["S", "Z", "SH", "ZH", "CH", "JH"]:
+                phones.extend(["AH0", "Z"])
            # B D G DH V M N NG L R W Y 有声辅音结尾 's 发 ['Z']
            # AH0 AH1 AH2 EY0 EY1 EY2 AE0 AE1 AE2 EH0 EH1 EH2 OW0 OW1 OW2 UH0 UH1 UH2 IY0 IY1 IY2 AA0 AA1 AA2 AO0 AO1 AO2
            # ER ER0 ER1 ER2 UW0 UW1 UW2 AY0 AY1 AY2 AW0 AW1 AW2 OY0 OY1 OY2 IH IH0 IH1 IH2 元音结尾 's 发 ['Z']
            else:
-                phones.extend(['Z'])
+                phones.extend(["Z"])
            return phones

        # 尝试进行分词，应对复合词
        comps = wordsegment.segment(word.lower())

        # 无法分词的送回去预测
-        if len(comps)==1:
+        if len(comps) == 1:
            return self.predict(word)

        # 可以分词的递归处理