Refactor: Format Code with Ruff and Update Deprecated G2PW Link (#2255)

* ruff check --fix * ruff format --line-length 120 --target-version py39 * Change the link for G2PW Model * update pytorch version and colab
2025-04-07 09:42:47 +01:00
parent 9da7e17efe
commit 53cac93589
132 changed files with 8185 additions and 6648 deletions
--- a/GPT_SoVITS/text/g2pw/utils.py
+++ b/GPT_SoVITS/text/g2pw/utils.py
@@ -15,6 +15,7 @@
 Credits
    This code is modified from https://github.com/GitYCC/g2pW
 """
+
 import os
 import re

@@ -24,14 +25,14 @@ def wordize_and_map(text: str):
    index_map_from_text_to_word = []
    index_map_from_word_to_text = []
    while len(text) > 0:
-        match_space = re.match(r'^ +', text)
+        match_space = re.match(r"^ +", text)
        if match_space:
            space_str = match_space.group(0)
            index_map_from_text_to_word += [None] * len(space_str)
-            text = text[len(space_str):]
+            text = text[len(space_str) :]
            continue

-        match_en = re.match(r'^[a-zA-Z0-9]+', text)
+        match_en = re.match(r"^[a-zA-Z0-9]+", text)
        if match_en:
            en_word = match_en.group(0)

@@ -42,7 +43,7 @@ def wordize_and_map(text: str):
            index_map_from_text_to_word += [len(words)] * len(en_word)

            words.append(en_word)
-            text = text[len(en_word):]
+            text = text[len(en_word) :]
        else:
            word_start_pos = len(index_map_from_text_to_word)
            word_end_pos = word_start_pos + 1
@@ -63,15 +64,14 @@ def tokenize_and_map(tokenizer, text: str):
    for word, (word_start, word_end) in zip(words, word2text):
        word_tokens = tokenizer.tokenize(word)

-        if len(word_tokens) == 0 or word_tokens == ['[UNK]']:
+        if len(word_tokens) == 0 or word_tokens == ["[UNK]"]:
            index_map_from_token_to_text.append((word_start, word_end))
-            tokens.append('[UNK]')
+            tokens.append("[UNK]")
        else:
            current_word_start = word_start
            for word_token in word_tokens:
-                word_token_len = len(re.sub(r'^##', '', word_token))
-                index_map_from_token_to_text.append(
-                    (current_word_start, current_word_start + word_token_len))
+                word_token_len = len(re.sub(r"^##", "", word_token))
+                index_map_from_token_to_text.append((current_word_start, current_word_start + word_token_len))
                current_word_start = current_word_start + word_token_len
                tokens.append(word_token)

@@ -85,53 +85,51 @@ def tokenize_and_map(tokenizer, text: str):

 def _load_config(config_path: os.PathLike):
    import importlib.util
-    spec = importlib.util.spec_from_file_location('__init__', config_path)
+
+    spec = importlib.util.spec_from_file_location("__init__", config_path)
    config = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(config)
    return config


 default_config_dict = {
-    'manual_seed': 1313,
-    'model_source': 'bert-base-chinese',
-    'window_size': 32,
-    'num_workers': 2,
-    'use_mask': True,
-    'use_char_phoneme': False,
-    'use_conditional': True,
-    'param_conditional': {
-        'affect_location': 'softmax',
-        'bias': True,
-        'char-linear': True,
-        'pos-linear': False,
-        'char+pos-second': True,
-        'char+pos-second_lowrank': False,
-        'lowrank_size': 0,
-        'char+pos-second_fm': False,
-        'fm_size': 0,
-        'fix_mode': None,
-        'count_json': 'train.count.json'
+    "manual_seed": 1313,
+    "model_source": "bert-base-chinese",
+    "window_size": 32,
+    "num_workers": 2,
+    "use_mask": True,
+    "use_char_phoneme": False,
+    "use_conditional": True,
+    "param_conditional": {
+        "affect_location": "softmax",
+        "bias": True,
+        "char-linear": True,
+        "pos-linear": False,
+        "char+pos-second": True,
+        "char+pos-second_lowrank": False,
+        "lowrank_size": 0,
+        "char+pos-second_fm": False,
+        "fm_size": 0,
+        "fix_mode": None,
+        "count_json": "train.count.json",
    },
-    'lr': 5e-5,
-    'val_interval': 200,
-    'num_iter': 10000,
-    'use_focal': False,
-    'param_focal': {
-        'alpha': 0.0,
-        'gamma': 0.7
+    "lr": 5e-5,
+    "val_interval": 200,
+    "num_iter": 10000,
+    "use_focal": False,
+    "param_focal": {"alpha": 0.0, "gamma": 0.7},
+    "use_pos": True,
+    "param_pos ": {
+        "weight": 0.1,
+        "pos_joint_training": True,
+        "train_pos_path": "train.pos",
+        "valid_pos_path": "dev.pos",
+        "test_pos_path": "test.pos",
    },
-    'use_pos': True,
-    'param_pos ': {
-        'weight': 0.1,
-        'pos_joint_training': True,
-        'train_pos_path': 'train.pos',
-        'valid_pos_path': 'dev.pos',
-        'test_pos_path': 'test.pos'
-    }
 }


-def load_config(config_path: os.PathLike, use_default: bool=False):
+def load_config(config_path: os.PathLike, use_default: bool = False):
    config = _load_config(config_path)
    if use_default:
        for attr, val in default_config_dict.items():