more code refactor

2024-01-16 17:14:18 +01:00
parent 0d92575115
commit 0d3d47f3c3
44 changed files with 4516 additions and 2623 deletions
--- a/GPT_SoVITS/text/chinese.py
+++ b/GPT_SoVITS/text/chinese.py
@@ -6,49 +6,56 @@ import cn2an
 from pypinyin import lazy_pinyin, Style

 import sys
+
 sys.path.append("/data/docker/liujing04/gpt-vits/gpt-vits-master")

 from text.symbols import punctuation
 from text.tone_sandhi import ToneSandhi

 current_file_path = os.path.dirname(__file__)
-pinyin_to_symbol_map = {line.split("\t")[0]: line.strip().split("\t")[1] for line in
-                        open(os.path.join(current_file_path, 'opencpop-strict.txt')).readlines()}
+pinyin_to_symbol_map = {
+    line.split("\t")[0]: line.strip().split("\t")[1]
+    for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
+}

 import jieba.posseg as psg


 rep_map = {
-    '：': ',',
-    '；': ',',
-    '，': ',',
-    '。': '.',
-    '！': '!',
-    '？': '?',
-    '\n': '.',
+    "：": ",",
+    "；": ",",
+    "，": ",",
+    "。": ".",
+    "！": "!",
+    "？": "?",
+    "\n": ".",
    "·": ",",
-    '、': ",",
-    '...': '…',
-    '$': '.',
-    '/': ',',
-    '—': "-"
+    "、": ",",
+    "...": "…",
+    "$": ".",
+    "/": ",",
+    "—": "-",
 }

 tone_modifier = ToneSandhi()

+
 def replace_punctuation(text):
-    text = text.replace("嗯", "恩").replace("呣","母")
-    pattern = re.compile('|'.join(re.escape(p) for p in rep_map.keys()))
+    text = text.replace("嗯", "恩").replace("呣", "母")
+    pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))

    replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)

-    replaced_text = re.sub(r'[^\u4e00-\u9fa5'+"".join(punctuation)+r']+', '', replaced_text)
+    replaced_text = re.sub(
+        r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text
+    )

    return replaced_text

+
 def g2p(text):
-    pattern = r'(?<=[{0}])\s*'.format(''.join(punctuation))
-    sentences = [i for i in re.split(pattern, text) if i.strip()!='']
+    pattern = r"(?<=[{0}])\s*".format("".join(punctuation))
+    sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
    phones, word2ph = _g2p(sentences)
    return phones, word2ph

@@ -56,10 +63,10 @@ def g2p(text):
 def _get_initials_finals(word):
    initials = []
    finals = []
-    orig_initials = lazy_pinyin(
-        word, neutral_tone_with_five=True, style=Style.INITIALS)
+    orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
    orig_finals = lazy_pinyin(
-        word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
+        word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
+    )
    for c, v in zip(orig_initials, orig_finals):
        initials.append(c)
        finals.append(v)
@@ -72,17 +79,16 @@ def _g2p(segments):
    for seg in segments:
        pinyins = []
        # Replace all English words in the sentence
-        seg = re.sub('[a-zA-Z]+', '', seg)
+        seg = re.sub("[a-zA-Z]+", "", seg)
        seg_cut = psg.lcut(seg)
        initials = []
        finals = []
        seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)
        for word, pos in seg_cut:
-            if pos == 'eng':
+            if pos == "eng":
                continue
            sub_initials, sub_finals = _get_initials_finals(word)
-            sub_finals = tone_modifier.modified_tone(word, pos,
-                                                          sub_finals)
+            sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
            initials.append(sub_initials)
            finals.append(sub_finals)

@@ -91,7 +97,7 @@ def _g2p(segments):
        finals = sum(finals, [])
        #
        for c, v in zip(initials, finals):
-            raw_pinyin = c+v
+            raw_pinyin = c + v
            # NOTE: post process for pypinyin outputs
            # we discriminate i, ii and iii
            if c == v:
@@ -102,40 +108,40 @@ def _g2p(segments):
                v_without_tone = v[:-1]
                tone = v[-1]

-                pinyin = c+v_without_tone
-                assert tone in '12345'
+                pinyin = c + v_without_tone
+                assert tone in "12345"

                if c:
                    # 多音节
                    v_rep_map = {
-                        "uei": 'ui',
-                        'iou': 'iu',
-                        'uen': 'un',
+                        "uei": "ui",
+                        "iou": "iu",
+                        "uen": "un",
                    }
                    if v_without_tone in v_rep_map.keys():
-                        pinyin = c+v_rep_map[v_without_tone]
+                        pinyin = c + v_rep_map[v_without_tone]
                else:
                    # 单音节
                    pinyin_rep_map = {
-                        'ing': 'ying',
-                        'i': 'yi',
-                        'in': 'yin',
-                        'u': 'wu',
+                        "ing": "ying",
+                        "i": "yi",
+                        "in": "yin",
+                        "u": "wu",
                    }
                    if pinyin in pinyin_rep_map.keys():
                        pinyin = pinyin_rep_map[pinyin]
                    else:
                        single_rep_map = {
-                            'v': 'yu',
-                            'e': 'e',
-                            'i': 'y',
-                            'u': 'w',
+                            "v": "yu",
+                            "e": "e",
+                            "i": "y",
+                            "u": "w",
                        }
                        if pinyin[0] in single_rep_map.keys():
-                            pinyin = single_rep_map[pinyin[0]]+pinyin[1:]
+                            pinyin = single_rep_map[pinyin[0]] + pinyin[1:]

                assert pinyin in pinyin_to_symbol_map.keys(), (pinyin, seg, raw_pinyin)
-                new_c, new_v = pinyin_to_symbol_map[pinyin].split(' ')
+                new_c, new_v = pinyin_to_symbol_map[pinyin].split(" ")
                new_v = new_v + tone
                phone = [new_c, new_v]
                word2ph.append(len(phone))
@@ -144,9 +150,8 @@ def _g2p(segments):
    return phones_list, word2ph


-
 def text_normalize(text):
-    numbers = re.findall(r'\d+(?:\.?\d+)?', text)
+    numbers = re.findall(r"\d+(?:\.?\d+)?", text)
    for number in numbers:
        text = text.replace(number, cn2an.an2cn(number), 1)
    text = replace_punctuation(text)
@@ -154,7 +159,7 @@ def text_normalize(text):
    return text


-if __name__ == '__main__':
+if __name__ == "__main__":
    text = "啊——但是《原神》是由,米哈\游自主，研发的一款全.新开放世界.冒险游戏"
    text = "呣呣呣～就是…大人的鼹鼠党吧？"
    text = "你好"
--- a/GPT_SoVITS/text/cleaner.py
+++ b/GPT_SoVITS/text/cleaner.py
@@ -1,29 +1,27 @@
 from text import chinese, japanese, cleaned_text_to_sequence, symbols, english

-language_module_map = {
-    'zh': chinese,
-    "ja": japanese,
-    'en': english
-}
+language_module_map = {"zh": chinese, "ja": japanese, "en": english}
 special = [
-    ('%', 'zh', "SP"),
-    ('￥', 'zh', "SP2"),
-    ('^', 'zh', "SP3"),
+    ("%", "zh", "SP"),
+    ("￥", "zh", "SP2"),
+    ("^", "zh", "SP3"),
    # ('@', 'zh', "SP4")#不搞鬼畜了，和第二版保持一致吧
 ]
+
+
 def clean_text(text, language):
    for special_s, special_l, target_symbol in special:
        if special_s in text and language == special_l:
            return clean_special(text, language, special_s, target_symbol)
    language_module = language_module_map[language]
    norm_text = language_module.text_normalize(text)
-    if(language=="zh"):
+    if language == "zh":
        phones, word2ph = language_module.g2p(norm_text)
        assert len(phones) == sum(word2ph)
        assert len(norm_text) == len(word2ph)
    else:
        phones = language_module.g2p(norm_text)
-        word2ph=None
+        word2ph = None

    for ph in phones:
        assert ph in symbols
@@ -41,17 +39,17 @@ def clean_special(text, language, special_s, target_symbol):
    new_ph = []
    for ph in phones:
        assert ph in symbols
-        if ph == ',':
+        if ph == ",":
            new_ph.append(target_symbol)
        else:
            new_ph.append(ph)
    return new_ph

+
 def text_to_sequence(text, language):
    phones = clean_text(text)
    return cleaned_text_to_sequence(phones)

-if __name__ == '__main__':
-    print(clean_text("你好%啊啊啊额、还是到付红四方。", 'zh'))
-

+if __name__ == "__main__":
+    print(clean_text("你好%啊啊啊额、还是到付红四方。", "zh"))
--- a/GPT_SoVITS/text/english.py
+++ b/GPT_SoVITS/text/english.py
@@ -8,20 +8,87 @@ from string import punctuation
 from text import symbols

 current_file_path = os.path.dirname(__file__)
-CMU_DICT_PATH = os.path.join(current_file_path, 'cmudict.rep')
-CACHE_PATH = os.path.join(current_file_path, 'cmudict_cache.pickle')
+CMU_DICT_PATH = os.path.join(current_file_path, "cmudict.rep")
+CACHE_PATH = os.path.join(current_file_path, "cmudict_cache.pickle")
 _g2p = G2p()

-arpa = {'AH0', 'S', 'AH1', 'EY2', 'AE2', 'EH0', 'OW2', 'UH0', 'NG', 'B', 'G', 'AY0', 'M', 'AA0', 'F', 'AO0', 'ER2', 'UH1', 'IY1', 'AH2', 'DH', 'IY0', 'EY1', 'IH0', 'K', 'N', 'W', 'IY2', 'T', 'AA1', 'ER1', 'EH2', 'OY0', 'UH2', 'UW1', 'Z', 'AW2', 'AW1', 'V', 'UW2', 'AA2', 'ER', 'AW0', 'UW0', 'R', 'OW1', 'EH1', 'ZH', 'AE0', 'IH2', 'IH', 'Y', 'JH', 'P', 'AY1', 'EY0', 'OY2', 'TH', 'HH', 'D', 'ER0', 'CH', 'AO1', 'AE1', 'AO2', 'OY1', 'AY2', 'IH1', 'OW0', 'L', 'SH'}
+arpa = {
+    "AH0",
+    "S",
+    "AH1",
+    "EY2",
+    "AE2",
+    "EH0",
+    "OW2",
+    "UH0",
+    "NG",
+    "B",
+    "G",
+    "AY0",
+    "M",
+    "AA0",
+    "F",
+    "AO0",
+    "ER2",
+    "UH1",
+    "IY1",
+    "AH2",
+    "DH",
+    "IY0",
+    "EY1",
+    "IH0",
+    "K",
+    "N",
+    "W",
+    "IY2",
+    "T",
+    "AA1",
+    "ER1",
+    "EH2",
+    "OY0",
+    "UH2",
+    "UW1",
+    "Z",
+    "AW2",
+    "AW1",
+    "V",
+    "UW2",
+    "AA2",
+    "ER",
+    "AW0",
+    "UW0",
+    "R",
+    "OW1",
+    "EH1",
+    "ZH",
+    "AE0",
+    "IH2",
+    "IH",
+    "Y",
+    "JH",
+    "P",
+    "AY1",
+    "EY0",
+    "OY2",
+    "TH",
+    "HH",
+    "D",
+    "ER0",
+    "CH",
+    "AO1",
+    "AE1",
+    "AO2",
+    "OY1",
+    "AY2",
+    "IH1",
+    "OW0",
+    "L",
+    "SH",
+}


 def replace_phs(phs):
-    rep_map = {
-        ';': ',',
-        ':': ',',
-        '\'': '-',
-        '"': '-'
-    }
+    rep_map = {";": ",", ":": ",", "'": "-", '"': "-"}
    phs_new = []
    for ph in phs:
        if ph in symbols:
@@ -29,9 +96,10 @@ def replace_phs(phs):
        elif ph in rep_map.keys():
            phs_new.append(rep_map[ph])
        else:
-            print('ph not in symbols: ', ph)
+            print("ph not in symbols: ", ph)
    return phs_new

+
 def read_dict():
    g2p_dict = {}
    start_line = 49
@@ -41,13 +109,13 @@ def read_dict():
        while line:
            if line_index >= start_line:
                line = line.strip()
-                word_split = line.split('  ')
+                word_split = line.split("  ")
                word = word_split[0]

-                syllable_split = word_split[1].split(' - ')
+                syllable_split = word_split[1].split(" - ")
                g2p_dict[word] = []
                for syllable in syllable_split:
-                    phone_split = syllable.split(' ')
+                    phone_split = syllable.split(" ")
                    g2p_dict[word].append(phone_split)

            line_index = line_index + 1
@@ -57,13 +125,13 @@ def read_dict():


 def cache_dict(g2p_dict, file_path):
-    with open(file_path, 'wb') as pickle_file:
+    with open(file_path, "wb") as pickle_file:
        pickle.dump(g2p_dict, pickle_file)


 def get_dict():
    if os.path.exists(CACHE_PATH):
-        with open(CACHE_PATH, 'rb') as pickle_file:
+        with open(CACHE_PATH, "rb") as pickle_file:
            g2p_dict = pickle.load(pickle_file)
    else:
        g2p_dict = read_dict()
@@ -71,6 +139,7 @@ def get_dict():

    return g2p_dict

+
 eng_dict = get_dict()


@@ -78,8 +147,8 @@ def text_normalize(text):
    # todo: eng text normalize
    return text.replace(";", ",")

-def g2p(text):

+def g2p(text):
    phones = []
    words = re.split(r"([,;.\-\?\!\s+])", text)
    for w in words:
@@ -97,6 +166,7 @@ def g2p(text):

    return replace_phs(phones)

+
 if __name__ == "__main__":
    # print(get_dict())
    print(g2p("hello"))
@@ -106,4 +176,4 @@ if __name__ == "__main__":
    #     for group in syllables:
    #         for ph in group:
    #             all_phones.add(ph)
-    # print(all_phones)
+    # print(all_phones)
--- a/GPT_SoVITS/text/japanese.py
+++ b/GPT_SoVITS/text/japanese.py
@@ -8,57 +8,63 @@ from text import symbols

 # Regular expression matching Japanese without punctuation marks:
 _japanese_characters = re.compile(
-    r'[A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]')
+    r"[A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]"
+)

 # Regular expression matching non-Japanese characters or punctuation marks:
 _japanese_marks = re.compile(
-    r'[^A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]')
+    r"[^A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]"
+)

 # List of (symbol, Japanese) pairs for marks:
-_symbols_to_japanese = [(re.compile('%s' % x[0]), x[1]) for x in [
-    ('％', 'パーセント')
-]]
+_symbols_to_japanese = [(re.compile("%s" % x[0]), x[1]) for x in [("％", "パーセント")]]


 # List of (consonant, sokuon) pairs:
-_real_sokuon = [(re.compile('%s' % x[0]), x[1]) for x in [
-    (r'Q([↑↓]*[kg])', r'k#\1'),
-    (r'Q([↑↓]*[tdjʧ])', r't#\1'),
-    (r'Q([↑↓]*[sʃ])', r's\1'),
-    (r'Q([↑↓]*[pb])', r'p#\1')
-]]
+_real_sokuon = [
+    (re.compile("%s" % x[0]), x[1])
+    for x in [
+        (r"Q([↑↓]*[kg])", r"k#\1"),
+        (r"Q([↑↓]*[tdjʧ])", r"t#\1"),
+        (r"Q([↑↓]*[sʃ])", r"s\1"),
+        (r"Q([↑↓]*[pb])", r"p#\1"),
+    ]
+]

 # List of (consonant, hatsuon) pairs:
-_real_hatsuon = [(re.compile('%s' % x[0]), x[1]) for x in [
-    (r'N([↑↓]*[pbm])', r'm\1'),
-    (r'N([↑↓]*[ʧʥj])', r'n^\1'),
-    (r'N([↑↓]*[tdn])', r'n\1'),
-    (r'N([↑↓]*[kg])', r'ŋ\1')
-]]
-
+_real_hatsuon = [
+    (re.compile("%s" % x[0]), x[1])
+    for x in [
+        (r"N([↑↓]*[pbm])", r"m\1"),
+        (r"N([↑↓]*[ʧʥj])", r"n^\1"),
+        (r"N([↑↓]*[tdn])", r"n\1"),
+        (r"N([↑↓]*[kg])", r"ŋ\1"),
+    ]
+]


 def post_replace_ph(ph):
    rep_map = {
-        '：': ',',
-        '；': ',',
-        '，': ',',
-        '。': '.',
-        '！': '!',
-        '？': '?',
-        '\n': '.',
+        "：": ",",
+        "；": ",",
+        "，": ",",
+        "。": ".",
+        "！": "!",
+        "？": "?",
+        "\n": ".",
        "·": ",",
-        '、': ",",
-        '...': '…'
+        "、": ",",
+        "...": "…",
    }
    if ph in rep_map.keys():
        ph = rep_map[ph]
    if ph in symbols:
        return ph
    if ph not in symbols:
-        ph = 'UNK'
+        ph = "UNK"
    return ph

+
 def symbols_to_japanese(text):
    for regex, replacement in _symbols_to_japanese:
        text = re.sub(regex, replacement, text)
@@ -66,7 +72,7 @@ def symbols_to_japanese(text):


 def preprocess_jap(text):
-    '''Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html'''
+    """Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html"""
    text = symbols_to_japanese(text)
    sentences = re.split(_japanese_marks, text)
    marks = re.findall(_japanese_marks, text)
@@ -77,13 +83,15 @@ def preprocess_jap(text):
            text += p.split(" ")

        if i < len(marks):
-            text += [marks[i].replace(' ', '')]
+            text += [marks[i].replace(" ", "")]
    return text

+
 def text_normalize(text):
    # todo: jap text normalize
    return text

+
 def g2p(norm_text):
    phones = preprocess_jap(norm_text)
    phones = [post_replace_ph(i) for i in phones]
@@ -91,7 +99,7 @@ def g2p(norm_text):
    return phones


-if __name__ == '__main__':
+if __name__ == "__main__":
    for line in open("../../../Downloads/transcript_utf8.txt").readlines():
        text = line.split(":")[1]
        phones = g2p(text)
--- a/GPT_SoVITS/text/symbols.py
+++ b/GPT_SoVITS/text/symbols.py
@@ -1,24 +1,397 @@
 import os

 # punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
-punctuation = ['!', '?', '…', ",", "."]#@是SP停顿
+punctuation = ["!", "?", "…", ",", "."]  # @是SP停顿
 punctuation.append("-")
-pu_symbols = punctuation + ["SP", 'SP2', 'SP3', "UNK"]
+pu_symbols = punctuation + ["SP", "SP2", "SP3", "UNK"]
 # pu_symbols = punctuation + ["SP", 'SP2', 'SP3','SP4', "UNK"]
-pad = '_'
+pad = "_"

-c = ['AA', 'EE', 'OO', 'b', 'c', 'ch', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 'sh', 't', 'w', 'x', 'y', 'z', 'zh']
-v = ['E1', 'En1', 'a1', 'ai1', 'an1', 'ang1', 'ao1', 'e1', 'ei1', 'en1', 'eng1', 'er1', 'i1', 'i01', 'ia1', 'ian1', 'iang1', 'iao1', 'ie1', 'in1', 'ing1', 'iong1', 'ir1', 'iu1', 'o1', 'ong1', 'ou1', 'u1', 'ua1', 'uai1', 'uan1', 'uang1', 'ui1', 'un1', 'uo1', 'v1', 'van1', 've1', 'vn1', 'E2', 'En2', 'a2', 'ai2', 'an2', 'ang2', 'ao2', 'e2', 'ei2', 'en2', 'eng2', 'er2', 'i2', 'i02', 'ia2', 'ian2', 'iang2', 'iao2', 'ie2', 'in2', 'ing2', 'iong2', 'ir2', 'iu2', 'o2', 'ong2', 'ou2', 'u2', 'ua2', 'uai2', 'uan2', 'uang2', 'ui2', 'un2', 'uo2', 'v2', 'van2', 've2', 'vn2', 'E3', 'En3', 'a3', 'ai3', 'an3', 'ang3', 'ao3', 'e3', 'ei3', 'en3', 'eng3', 'er3', 'i3', 'i03', 'ia3', 'ian3', 'iang3', 'iao3', 'ie3', 'in3', 'ing3', 'iong3', 'ir3', 'iu3', 'o3', 'ong3', 'ou3', 'u3', 'ua3', 'uai3', 'uan3', 'uang3', 'ui3', 'un3', 'uo3', 'v3', 'van3', 've3', 'vn3', 'E4', 'En4', 'a4', 'ai4', 'an4', 'ang4', 'ao4', 'e4', 'ei4', 'en4', 'eng4', 'er4', 'i4', 'i04', 'ia4', 'ian4', 'iang4', 'iao4', 'ie4', 'in4', 'ing4', 'iong4', 'ir4', 'iu4', 'o4', 'ong4', 'ou4', 'u4', 'ua4', 'uai4', 'uan4', 'uang4', 'ui4', 'un4', 'uo4', 'v4', 'van4', 've4', 'vn4', 'E5', 'En5', 'a5', 'ai5', 'an5', 'ang5', 'ao5', 'e5', 'ei5', 'en5', 'eng5', 'er5', 'i5', 'i05', 'ia5', 'ian5', 'iang5', 'iao5', 'ie5', 'in5', 'ing5', 'iong5', 'ir5', 'iu5', 'o5', 'ong5', 'ou5', 'u5', 'ua5', 'uai5', 'uan5', 'uang5', 'ui5', 'un5', 'uo5', 'v5', 'van5', 've5', 'vn5']
+c = [
+    "AA",
+    "EE",
+    "OO",
+    "b",
+    "c",
+    "ch",
+    "d",
+    "f",
+    "g",
+    "h",
+    "j",
+    "k",
+    "l",
+    "m",
+    "n",
+    "p",
+    "q",
+    "r",
+    "s",
+    "sh",
+    "t",
+    "w",
+    "x",
+    "y",
+    "z",
+    "zh",
+]
+v = [
+    "E1",
+    "En1",
+    "a1",
+    "ai1",
+    "an1",
+    "ang1",
+    "ao1",
+    "e1",
+    "ei1",
+    "en1",
+    "eng1",
+    "er1",
+    "i1",
+    "i01",
+    "ia1",
+    "ian1",
+    "iang1",
+    "iao1",
+    "ie1",
+    "in1",
+    "ing1",
+    "iong1",
+    "ir1",
+    "iu1",
+    "o1",
+    "ong1",
+    "ou1",
+    "u1",
+    "ua1",
+    "uai1",
+    "uan1",
+    "uang1",
+    "ui1",
+    "un1",
+    "uo1",
+    "v1",
+    "van1",
+    "ve1",
+    "vn1",
+    "E2",
+    "En2",
+    "a2",
+    "ai2",
+    "an2",
+    "ang2",
+    "ao2",
+    "e2",
+    "ei2",
+    "en2",
+    "eng2",
+    "er2",
+    "i2",
+    "i02",
+    "ia2",
+    "ian2",
+    "iang2",
+    "iao2",
+    "ie2",
+    "in2",
+    "ing2",
+    "iong2",
+    "ir2",
+    "iu2",
+    "o2",
+    "ong2",
+    "ou2",
+    "u2",
+    "ua2",
+    "uai2",
+    "uan2",
+    "uang2",
+    "ui2",
+    "un2",
+    "uo2",
+    "v2",
+    "van2",
+    "ve2",
+    "vn2",
+    "E3",
+    "En3",
+    "a3",
+    "ai3",
+    "an3",
+    "ang3",
+    "ao3",
+    "e3",
+    "ei3",
+    "en3",
+    "eng3",
+    "er3",
+    "i3",
+    "i03",
+    "ia3",
+    "ian3",
+    "iang3",
+    "iao3",
+    "ie3",
+    "in3",
+    "ing3",
+    "iong3",
+    "ir3",
+    "iu3",
+    "o3",
+    "ong3",
+    "ou3",
+    "u3",
+    "ua3",
+    "uai3",
+    "uan3",
+    "uang3",
+    "ui3",
+    "un3",
+    "uo3",
+    "v3",
+    "van3",
+    "ve3",
+    "vn3",
+    "E4",
+    "En4",
+    "a4",
+    "ai4",
+    "an4",
+    "ang4",
+    "ao4",
+    "e4",
+    "ei4",
+    "en4",
+    "eng4",
+    "er4",
+    "i4",
+    "i04",
+    "ia4",
+    "ian4",
+    "iang4",
+    "iao4",
+    "ie4",
+    "in4",
+    "ing4",
+    "iong4",
+    "ir4",
+    "iu4",
+    "o4",
+    "ong4",
+    "ou4",
+    "u4",
+    "ua4",
+    "uai4",
+    "uan4",
+    "uang4",
+    "ui4",
+    "un4",
+    "uo4",
+    "v4",
+    "van4",
+    "ve4",
+    "vn4",
+    "E5",
+    "En5",
+    "a5",
+    "ai5",
+    "an5",
+    "ang5",
+    "ao5",
+    "e5",
+    "ei5",
+    "en5",
+    "eng5",
+    "er5",
+    "i5",
+    "i05",
+    "ia5",
+    "ian5",
+    "iang5",
+    "iao5",
+    "ie5",
+    "in5",
+    "ing5",
+    "iong5",
+    "ir5",
+    "iu5",
+    "o5",
+    "ong5",
+    "ou5",
+    "u5",
+    "ua5",
+    "uai5",
+    "uan5",
+    "uang5",
+    "ui5",
+    "un5",
+    "uo5",
+    "v5",
+    "van5",
+    "ve5",
+    "vn5",
+]

-v_without_tone = ['E', 'En', 'a', 'ai', 'an', 'ang', 'ao', 'e', 'ei', 'en', 'eng', 'er', 'i', 'i0', 'ia', 'ian', 'iang', 'iao', 'ie', 'in', 'ing', 'iong', 'ir', 'iu', 'o', 'ong', 'ou', 'u', 'ua', 'uai', 'uan', 'uang', 'ui', 'un', 'uo', 'v', 'van', 've', 'vn']
+v_without_tone = [
+    "E",
+    "En",
+    "a",
+    "ai",
+    "an",
+    "ang",
+    "ao",
+    "e",
+    "ei",
+    "en",
+    "eng",
+    "er",
+    "i",
+    "i0",
+    "ia",
+    "ian",
+    "iang",
+    "iao",
+    "ie",
+    "in",
+    "ing",
+    "iong",
+    "ir",
+    "iu",
+    "o",
+    "ong",
+    "ou",
+    "u",
+    "ua",
+    "uai",
+    "uan",
+    "uang",
+    "ui",
+    "un",
+    "uo",
+    "v",
+    "van",
+    "ve",
+    "vn",
+]

 # japanese
-ja_symbols = ['I', 'N', 'U', 'a', 'b', 'by', 'ch', 'cl', 'd', 'dy', 'e', 'f', 'g', 'gy', 'h', 'hy', 'i', 'j', 'k', 'ky',
-              'm', 'my', 'n', 'ny', 'o', 'p', 'py', 'r', 'ry', 's', 'sh', 't', 'ts', 'u', 'v', 'w', 'y', 'z']
+ja_symbols = [
+    "I",
+    "N",
+    "U",
+    "a",
+    "b",
+    "by",
+    "ch",
+    "cl",
+    "d",
+    "dy",
+    "e",
+    "f",
+    "g",
+    "gy",
+    "h",
+    "hy",
+    "i",
+    "j",
+    "k",
+    "ky",
+    "m",
+    "my",
+    "n",
+    "ny",
+    "o",
+    "p",
+    "py",
+    "r",
+    "ry",
+    "s",
+    "sh",
+    "t",
+    "ts",
+    "u",
+    "v",
+    "w",
+    "y",
+    "z",
+]

-arpa = {'AH0', 'S', 'AH1', 'EY2', 'AE2', 'EH0', 'OW2', 'UH0', 'NG', 'B', 'G', 'AY0', 'M', 'AA0', 'F', 'AO0', 'ER2', 'UH1', 'IY1', 'AH2', 'DH', 'IY0', 'EY1', 'IH0', 'K', 'N', 'W', 'IY2', 'T', 'AA1', 'ER1', 'EH2', 'OY0', 'UH2', 'UW1', 'Z', 'AW2', 'AW1', 'V', 'UW2', 'AA2', 'ER', 'AW0', 'UW0', 'R', 'OW1', 'EH1', 'ZH', 'AE0', 'IH2', 'IH', 'Y', 'JH', 'P', 'AY1', 'EY0', 'OY2', 'TH', 'HH', 'D', 'ER0', 'CH', 'AO1', 'AE1', 'AO2', 'OY1', 'AY2', 'IH1', 'OW0', 'L', 'SH'}
+arpa = {
+    "AH0",
+    "S",
+    "AH1",
+    "EY2",
+    "AE2",
+    "EH0",
+    "OW2",
+    "UH0",
+    "NG",
+    "B",
+    "G",
+    "AY0",
+    "M",
+    "AA0",
+    "F",
+    "AO0",
+    "ER2",
+    "UH1",
+    "IY1",
+    "AH2",
+    "DH",
+    "IY0",
+    "EY1",
+    "IH0",
+    "K",
+    "N",
+    "W",
+    "IY2",
+    "T",
+    "AA1",
+    "ER1",
+    "EH2",
+    "OY0",
+    "UH2",
+    "UW1",
+    "Z",
+    "AW2",
+    "AW1",
+    "V",
+    "UW2",
+    "AA2",
+    "ER",
+    "AW0",
+    "UW0",
+    "R",
+    "OW1",
+    "EH1",
+    "ZH",
+    "AE0",
+    "IH2",
+    "IH",
+    "Y",
+    "JH",
+    "P",
+    "AY1",
+    "EY0",
+    "OY2",
+    "TH",
+    "HH",
+    "D",
+    "ER0",
+    "CH",
+    "AO1",
+    "AE1",
+    "AO2",
+    "OY1",
+    "AY2",
+    "IH1",
+    "OW0",
+    "L",
+    "SH",
+}

 symbols = [pad] + c + v + ja_symbols + pu_symbols + list(arpa)
 symbols = sorted(set(symbols))
-if __name__ == '__main__':
-    print(len(symbols))
+if __name__ == "__main__":
+    print(len(symbols))
--- a/GPT_SoVITS/text/tone_sandhi.py
+++ b/GPT_SoVITS/text/tone_sandhi.py
@@ -19,51 +19,442 @@ from pypinyin import lazy_pinyin
 from pypinyin import Style


-class ToneSandhi():
+class ToneSandhi:
    def __init__(self):
        self.must_neural_tone_words = {
-            '麻烦', '麻利', '鸳鸯', '高粱', '骨头', '骆驼', '马虎', '首饰', '馒头', '馄饨', '风筝',
-            '难为', '队伍', '阔气', '闺女', '门道', '锄头', '铺盖', '铃铛', '铁匠', '钥匙', '里脊',
-            '里头', '部分', '那么', '道士', '造化', '迷糊', '连累', '这么', '这个', '运气', '过去',
-            '软和', '转悠', '踏实', '跳蚤', '跟头', '趔趄', '财主', '豆腐', '讲究', '记性', '记号',
-            '认识', '规矩', '见识', '裁缝', '补丁', '衣裳', '衣服', '衙门', '街坊', '行李', '行当',
-            '蛤蟆', '蘑菇', '薄荷', '葫芦', '葡萄', '萝卜', '荸荠', '苗条', '苗头', '苍蝇', '芝麻',
-            '舒服', '舒坦', '舌头', '自在', '膏药', '脾气', '脑袋', '脊梁', '能耐', '胳膊', '胭脂',
-            '胡萝', '胡琴', '胡同', '聪明', '耽误', '耽搁', '耷拉', '耳朵', '老爷', '老实', '老婆',
-            '老头', '老太', '翻腾', '罗嗦', '罐头', '编辑', '结实', '红火', '累赘', '糨糊', '糊涂',
-            '精神', '粮食', '簸箕', '篱笆', '算计', '算盘', '答应', '笤帚', '笑语', '笑话', '窟窿',
-            '窝囊', '窗户', '稳当', '稀罕', '称呼', '秧歌', '秀气', '秀才', '福气', '祖宗', '砚台',
-            '码头', '石榴', '石头', '石匠', '知识', '眼睛', '眯缝', '眨巴', '眉毛', '相声', '盘算',
-            '白净', '痢疾', '痛快', '疟疾', '疙瘩', '疏忽', '畜生', '生意', '甘蔗', '琵琶', '琢磨',
-            '琉璃', '玻璃', '玫瑰', '玄乎', '狐狸', '状元', '特务', '牲口', '牙碜', '牌楼', '爽快',
-            '爱人', '热闹', '烧饼', '烟筒', '烂糊', '点心', '炊帚', '灯笼', '火候', '漂亮', '滑溜',
-            '溜达', '温和', '清楚', '消息', '浪头', '活泼', '比方', '正经', '欺负', '模糊', '槟榔',
-            '棺材', '棒槌', '棉花', '核桃', '栅栏', '柴火', '架势', '枕头', '枇杷', '机灵', '本事',
-            '木头', '木匠', '朋友', '月饼', '月亮', '暖和', '明白', '时候', '新鲜', '故事', '收拾',
-            '收成', '提防', '挖苦', '挑剔', '指甲', '指头', '拾掇', '拳头', '拨弄', '招牌', '招呼',
-            '抬举', '护士', '折腾', '扫帚', '打量', '打算', '打点', '打扮', '打听', '打发', '扎实',
-            '扁担', '戒指', '懒得', '意识', '意思', '情形', '悟性', '怪物', '思量', '怎么', '念头',
-            '念叨', '快活', '忙活', '志气', '心思', '得罪', '张罗', '弟兄', '开通', '应酬', '庄稼',
-            '干事', '帮手', '帐篷', '希罕', '师父', '师傅', '巴结', '巴掌', '差事', '工夫', '岁数',
-            '屁股', '尾巴', '少爷', '小气', '小伙', '将就', '对头', '对付', '寡妇', '家伙', '客气',
-            '实在', '官司', '学问', '学生', '字号', '嫁妆', '媳妇', '媒人', '婆家', '娘家', '委屈',
-            '姑娘', '姐夫', '妯娌', '妥当', '妖精', '奴才', '女婿', '头发', '太阳', '大爷', '大方',
-            '大意', '大夫', '多少', '多么', '外甥', '壮实', '地道', '地方', '在乎', '困难', '嘴巴',
-            '嘱咐', '嘟囔', '嘀咕', '喜欢', '喇嘛', '喇叭', '商量', '唾沫', '哑巴', '哈欠', '哆嗦',
-            '咳嗽', '和尚', '告诉', '告示', '含糊', '吓唬', '后头', '名字', '名堂', '合同', '吆喝',
-            '叫唤', '口袋', '厚道', '厉害', '千斤', '包袱', '包涵', '匀称', '勤快', '动静', '动弹',
-            '功夫', '力气', '前头', '刺猬', '刺激', '别扭', '利落', '利索', '利害', '分析', '出息',
-            '凑合', '凉快', '冷战', '冤枉', '冒失', '养活', '关系', '先生', '兄弟', '便宜', '使唤',
-            '佩服', '作坊', '体面', '位置', '似的', '伙计', '休息', '什么', '人家', '亲戚', '亲家',
-            '交情', '云彩', '事情', '买卖', '主意', '丫头', '丧气', '两口', '东西', '东家', '世故',
-            '不由', '不在', '下水', '下巴', '上头', '上司', '丈夫', '丈人', '一辈', '那个', '菩萨',
-            '父亲', '母亲', '咕噜', '邋遢', '费用', '冤家', '甜头', '介绍', '荒唐', '大人', '泥鳅',
-            '幸福', '熟悉', '计划', '扑腾', '蜡烛', '姥爷', '照顾', '喉咙', '吉他', '弄堂', '蚂蚱',
-            '凤凰', '拖沓', '寒碜', '糟蹋', '倒腾', '报复', '逻辑', '盘缠', '喽啰', '牢骚', '咖喱',
-            '扫把', '惦记'
+            "麻烦",
+            "麻利",
+            "鸳鸯",
+            "高粱",
+            "骨头",
+            "骆驼",
+            "马虎",
+            "首饰",
+            "馒头",
+            "馄饨",
+            "风筝",
+            "难为",
+            "队伍",
+            "阔气",
+            "闺女",
+            "门道",
+            "锄头",
+            "铺盖",
+            "铃铛",
+            "铁匠",
+            "钥匙",
+            "里脊",
+            "里头",
+            "部分",
+            "那么",
+            "道士",
+            "造化",
+            "迷糊",
+            "连累",
+            "这么",
+            "这个",
+            "运气",
+            "过去",
+            "软和",
+            "转悠",
+            "踏实",
+            "跳蚤",
+            "跟头",
+            "趔趄",
+            "财主",
+            "豆腐",
+            "讲究",
+            "记性",
+            "记号",
+            "认识",
+            "规矩",
+            "见识",
+            "裁缝",
+            "补丁",
+            "衣裳",
+            "衣服",
+            "衙门",
+            "街坊",
+            "行李",
+            "行当",
+            "蛤蟆",
+            "蘑菇",
+            "薄荷",
+            "葫芦",
+            "葡萄",
+            "萝卜",
+            "荸荠",
+            "苗条",
+            "苗头",
+            "苍蝇",
+            "芝麻",
+            "舒服",
+            "舒坦",
+            "舌头",
+            "自在",
+            "膏药",
+            "脾气",
+            "脑袋",
+            "脊梁",
+            "能耐",
+            "胳膊",
+            "胭脂",
+            "胡萝",
+            "胡琴",
+            "胡同",
+            "聪明",
+            "耽误",
+            "耽搁",
+            "耷拉",
+            "耳朵",
+            "老爷",
+            "老实",
+            "老婆",
+            "老头",
+            "老太",
+            "翻腾",
+            "罗嗦",
+            "罐头",
+            "编辑",
+            "结实",
+            "红火",
+            "累赘",
+            "糨糊",
+            "糊涂",
+            "精神",
+            "粮食",
+            "簸箕",
+            "篱笆",
+            "算计",
+            "算盘",
+            "答应",
+            "笤帚",
+            "笑语",
+            "笑话",
+            "窟窿",
+            "窝囊",
+            "窗户",
+            "稳当",
+            "稀罕",
+            "称呼",
+            "秧歌",
+            "秀气",
+            "秀才",
+            "福气",
+            "祖宗",
+            "砚台",
+            "码头",
+            "石榴",
+            "石头",
+            "石匠",
+            "知识",
+            "眼睛",
+            "眯缝",
+            "眨巴",
+            "眉毛",
+            "相声",
+            "盘算",
+            "白净",
+            "痢疾",
+            "痛快",
+            "疟疾",
+            "疙瘩",
+            "疏忽",
+            "畜生",
+            "生意",
+            "甘蔗",
+            "琵琶",
+            "琢磨",
+            "琉璃",
+            "玻璃",
+            "玫瑰",
+            "玄乎",
+            "狐狸",
+            "状元",
+            "特务",
+            "牲口",
+            "牙碜",
+            "牌楼",
+            "爽快",
+            "爱人",
+            "热闹",
+            "烧饼",
+            "烟筒",
+            "烂糊",
+            "点心",
+            "炊帚",
+            "灯笼",
+            "火候",
+            "漂亮",
+            "滑溜",
+            "溜达",
+            "温和",
+            "清楚",
+            "消息",
+            "浪头",
+            "活泼",
+            "比方",
+            "正经",
+            "欺负",
+            "模糊",
+            "槟榔",
+            "棺材",
+            "棒槌",
+            "棉花",
+            "核桃",
+            "栅栏",
+            "柴火",
+            "架势",
+            "枕头",
+            "枇杷",
+            "机灵",
+            "本事",
+            "木头",
+            "木匠",
+            "朋友",
+            "月饼",
+            "月亮",
+            "暖和",
+            "明白",
+            "时候",
+            "新鲜",
+            "故事",
+            "收拾",
+            "收成",
+            "提防",
+            "挖苦",
+            "挑剔",
+            "指甲",
+            "指头",
+            "拾掇",
+            "拳头",
+            "拨弄",
+            "招牌",
+            "招呼",
+            "抬举",
+            "护士",
+            "折腾",
+            "扫帚",
+            "打量",
+            "打算",
+            "打点",
+            "打扮",
+            "打听",
+            "打发",
+            "扎实",
+            "扁担",
+            "戒指",
+            "懒得",
+            "意识",
+            "意思",
+            "情形",
+            "悟性",
+            "怪物",
+            "思量",
+            "怎么",
+            "念头",
+            "念叨",
+            "快活",
+            "忙活",
+            "志气",
+            "心思",
+            "得罪",
+            "张罗",
+            "弟兄",
+            "开通",
+            "应酬",
+            "庄稼",
+            "干事",
+            "帮手",
+            "帐篷",
+            "希罕",
+            "师父",
+            "师傅",
+            "巴结",
+            "巴掌",
+            "差事",
+            "工夫",
+            "岁数",
+            "屁股",
+            "尾巴",
+            "少爷",
+            "小气",
+            "小伙",
+            "将就",
+            "对头",
+            "对付",
+            "寡妇",
+            "家伙",
+            "客气",
+            "实在",
+            "官司",
+            "学问",
+            "学生",
+            "字号",
+            "嫁妆",
+            "媳妇",
+            "媒人",
+            "婆家",
+            "娘家",
+            "委屈",
+            "姑娘",
+            "姐夫",
+            "妯娌",
+            "妥当",
+            "妖精",
+            "奴才",
+            "女婿",
+            "头发",
+            "太阳",
+            "大爷",
+            "大方",
+            "大意",
+            "大夫",
+            "多少",
+            "多么",
+            "外甥",
+            "壮实",
+            "地道",
+            "地方",
+            "在乎",
+            "困难",
+            "嘴巴",
+            "嘱咐",
+            "嘟囔",
+            "嘀咕",
+            "喜欢",
+            "喇嘛",
+            "喇叭",
+            "商量",
+            "唾沫",
+            "哑巴",
+            "哈欠",
+            "哆嗦",
+            "咳嗽",
+            "和尚",
+            "告诉",
+            "告示",
+            "含糊",
+            "吓唬",
+            "后头",
+            "名字",
+            "名堂",
+            "合同",
+            "吆喝",
+            "叫唤",
+            "口袋",
+            "厚道",
+            "厉害",
+            "千斤",
+            "包袱",
+            "包涵",
+            "匀称",
+            "勤快",
+            "动静",
+            "动弹",
+            "功夫",
+            "力气",
+            "前头",
+            "刺猬",
+            "刺激",
+            "别扭",
+            "利落",
+            "利索",
+            "利害",
+            "分析",
+            "出息",
+            "凑合",
+            "凉快",
+            "冷战",
+            "冤枉",
+            "冒失",
+            "养活",
+            "关系",
+            "先生",
+            "兄弟",
+            "便宜",
+            "使唤",
+            "佩服",
+            "作坊",
+            "体面",
+            "位置",
+            "似的",
+            "伙计",
+            "休息",
+            "什么",
+            "人家",
+            "亲戚",
+            "亲家",
+            "交情",
+            "云彩",
+            "事情",
+            "买卖",
+            "主意",
+            "丫头",
+            "丧气",
+            "两口",
+            "东西",
+            "东家",
+            "世故",
+            "不由",
+            "不在",
+            "下水",
+            "下巴",
+            "上头",
+            "上司",
+            "丈夫",
+            "丈人",
+            "一辈",
+            "那个",
+            "菩萨",
+            "父亲",
+            "母亲",
+            "咕噜",
+            "邋遢",
+            "费用",
+            "冤家",
+            "甜头",
+            "介绍",
+            "荒唐",
+            "大人",
+            "泥鳅",
+            "幸福",
+            "熟悉",
+            "计划",
+            "扑腾",
+            "蜡烛",
+            "姥爷",
+            "照顾",
+            "喉咙",
+            "吉他",
+            "弄堂",
+            "蚂蚱",
+            "凤凰",
+            "拖沓",
+            "寒碜",
+            "糟蹋",
+            "倒腾",
+            "报复",
+            "逻辑",
+            "盘缠",
+            "喽啰",
+            "牢骚",
+            "咖喱",
+            "扫把",
+            "惦记",
        }
        self.must_not_neural_tone_words = {
-            "男子", "女子", "分子", "原子", "量子", "莲子", "石子", "瓜子", "电子", "人人", "虎虎"
+            "男子",
+            "女子",
+            "分子",
+            "原子",
+            "量子",
+            "莲子",
+            "石子",
+            "瓜子",
+            "电子",
+            "人人",
+            "虎虎",
        }
        self.punc = "：，；。？！“”‘’':,;.?!"

@@ -72,14 +463,15 @@ class ToneSandhi():
    # word: "家里"
    # pos: "s"
    # finals: ['ia1', 'i3']
-    def _neural_sandhi(self, word: str, pos: str,
-                       finals: List[str]) -> List[str]:
-
+    def _neural_sandhi(self, word: str, pos: str, finals: List[str]) -> List[str]:
        # reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺
        for j, item in enumerate(word):
-            if j - 1 >= 0 and item == word[j - 1] and pos[0] in {
-                    "n", "v", "a"
-            } and word not in self.must_not_neural_tone_words:
+            if (
+                j - 1 >= 0
+                and item == word[j - 1]
+                and pos[0] in {"n", "v", "a"}
+                and word not in self.must_not_neural_tone_words
+            ):
                finals[j] = finals[j][:-1] + "5"
        ge_idx = word.find("个")
        if len(word) >= 1 and word[-1] in "吧呢哈啊呐噻嘛吖嗨呐哦哒额滴哩哟喽啰耶喔诶":
@@ -89,9 +481,12 @@ class ToneSandhi():
        # e.g. 走了, 看着, 去过
        elif len(word) == 1 and word in "了着过" and pos in {"ul", "uz", "ug"}:
            finals[-1] = finals[-1][:-1] + "5"
-        elif len(word) > 1 and word[-1] in "们子" and pos in {
-                "r", "n"
-        } and word not in self.must_not_neural_tone_words:
+        elif (
+            len(word) > 1
+            and word[-1] in "们子"
+            and pos in {"r", "n"}
+            and word not in self.must_not_neural_tone_words
+        ):
            finals[-1] = finals[-1][:-1] + "5"
        # e.g. 桌上, 地下, 家里
        elif len(word) > 1 and word[-1] in "上下里" and pos in {"s", "l", "f"}:
@@ -100,21 +495,26 @@ class ToneSandhi():
        elif len(word) > 1 and word[-1] in "来去" and word[-2] in "上下进出回过起开":
            finals[-1] = finals[-1][:-1] + "5"
        # 个做量词
-        elif (ge_idx >= 1 and
-              (word[ge_idx - 1].isnumeric() or
-               word[ge_idx - 1] in "几有两半多各整每做是")) or word == '个':
+        elif (
+            ge_idx >= 1
+            and (word[ge_idx - 1].isnumeric() or word[ge_idx - 1] in "几有两半多各整每做是")
+        ) or word == "个":
            finals[ge_idx] = finals[ge_idx][:-1] + "5"
        else:
-            if word in self.must_neural_tone_words or word[
-                    -2:] in self.must_neural_tone_words:
+            if (
+                word in self.must_neural_tone_words
+                or word[-2:] in self.must_neural_tone_words
+            ):
                finals[-1] = finals[-1][:-1] + "5"

        word_list = self._split_word(word)
-        finals_list = [finals[:len(word_list[0])], finals[len(word_list[0]):]]
+        finals_list = [finals[: len(word_list[0])], finals[len(word_list[0]) :]]
        for i, word in enumerate(word_list):
            # conventional neural in Chinese
-            if word in self.must_neural_tone_words or word[
-                    -2:] in self.must_neural_tone_words:
+            if (
+                word in self.must_neural_tone_words
+                or word[-2:] in self.must_neural_tone_words
+            ):
                finals_list[i][-1] = finals_list[i][-1][:-1] + "5"
        finals = sum(finals_list, [])
        return finals
@@ -126,15 +526,15 @@ class ToneSandhi():
        else:
            for i, char in enumerate(word):
                # "不" before tone4 should be bu2, e.g. 不怕
-                if char == "不" and i + 1 < len(word) and finals[i +
-                                                                1][-1] == "4":
+                if char == "不" and i + 1 < len(word) and finals[i + 1][-1] == "4":
                    finals[i] = finals[i][:-1] + "2"
        return finals

    def _yi_sandhi(self, word: str, finals: List[str]) -> List[str]:
        # "一" in number sequences, e.g. 一零零, 二一零
        if word.find("一") != -1 and all(
-            [item.isnumeric() for item in word if item != "一"]):
+            [item.isnumeric() for item in word if item != "一"]
+        ):
            return finals
        # "一" between reduplication words shold be yi5, e.g. 看一看
        elif len(word) == 3 and word[1] == "一" and word[0] == word[-1]:
@@ -161,10 +561,10 @@ class ToneSandhi():
        first_subword = word_list[0]
        first_begin_idx = word.find(first_subword)
        if first_begin_idx == 0:
-            second_subword = word[len(first_subword):]
+            second_subword = word[len(first_subword) :]
            new_word_list = [first_subword, second_subword]
        else:
-            second_subword = word[:-len(first_subword)]
+            second_subword = word[: -len(first_subword)]
            new_word_list = [second_subword, first_subword]
        return new_word_list

@@ -182,18 +582,19 @@ class ToneSandhi():
                elif len(word_list[0]) == 1:
                    finals[1] = finals[1][:-1] + "2"
            else:
-                finals_list = [
-                    finals[:len(word_list[0])], finals[len(word_list[0]):]
-                ]
+                finals_list = [finals[: len(word_list[0])], finals[len(word_list[0]) :]]
                if len(finals_list) == 2:
                    for i, sub in enumerate(finals_list):
                        # e.g. 所有/人
                        if self._all_tone_three(sub) and len(sub) == 2:
                            finals_list[i][0] = finals_list[i][0][:-1] + "2"
                        # e.g. 好/喜欢
-                        elif i == 1 and not self._all_tone_three(sub) and finals_list[i][0][-1] == "3" and \
-                                finals_list[0][-1][-1] == "3":
-
+                        elif (
+                            i == 1
+                            and not self._all_tone_three(sub)
+                            and finals_list[i][0][-1] == "3"
+                            and finals_list[0][-1][-1] == "3"
+                        ):
                            finals_list[0][-1] = finals_list[0][-1][:-1] + "2"
                        finals = sum(finals_list, [])
        # split idiom into two words who's length is 2
@@ -222,7 +623,7 @@ class ToneSandhi():
                new_seg.append((word, pos))
            last_word = word[:]
        if last_word == "不":
-            new_seg.append((last_word, 'd'))
+            new_seg.append((last_word, "d"))
            last_word = ""
        return new_seg

@@ -236,12 +637,21 @@ class ToneSandhi():
        new_seg = []
        # function 1
        for i, (word, pos) in enumerate(seg):
-            if i - 1 >= 0 and word == "一" and i + 1 < len(seg) and seg[i - 1][
-                    0] == seg[i + 1][0] and seg[i - 1][1] == "v":
+            if (
+                i - 1 >= 0
+                and word == "一"
+                and i + 1 < len(seg)
+                and seg[i - 1][0] == seg[i + 1][0]
+                and seg[i - 1][1] == "v"
+            ):
                new_seg[i - 1][0] = new_seg[i - 1][0] + "一" + new_seg[i - 1][0]
            else:
-                if i - 2 >= 0 and seg[i - 1][0] == "一" and seg[i - 2][
-                        0] == word and pos == "v":
+                if (
+                    i - 2 >= 0
+                    and seg[i - 1][0] == "一"
+                    and seg[i - 2][0] == word
+                    and pos == "v"
+                ):
                    continue
                else:
                    new_seg.append([word, pos])
@@ -257,22 +667,27 @@ class ToneSandhi():

    # the first and the second words are all_tone_three
    def _merge_continuous_three_tones(
-            self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
+        self, seg: List[Tuple[str, str]]
+    ) -> List[Tuple[str, str]]:
        new_seg = []
        sub_finals_list = [
-            lazy_pinyin(
-                word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
+            lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
            for (word, pos) in seg
        ]
        assert len(sub_finals_list) == len(seg)
        merge_last = [False] * len(seg)
        for i, (word, pos) in enumerate(seg):
-            if i - 1 >= 0 and self._all_tone_three(
-                    sub_finals_list[i - 1]) and self._all_tone_three(
-                        sub_finals_list[i]) and not merge_last[i - 1]:
+            if (
+                i - 1 >= 0
+                and self._all_tone_three(sub_finals_list[i - 1])
+                and self._all_tone_three(sub_finals_list[i])
+                and not merge_last[i - 1]
+            ):
                # if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
-                if not self._is_reduplication(seg[i - 1][0]) and len(
-                        seg[i - 1][0]) + len(seg[i][0]) <= 3:
+                if (
+                    not self._is_reduplication(seg[i - 1][0])
+                    and len(seg[i - 1][0]) + len(seg[i][0]) <= 3
+                ):
                    new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
                    merge_last[i] = True
                else:
@@ -287,21 +702,27 @@ class ToneSandhi():

    # the last char of first word and the first char of second word is tone_three
    def _merge_continuous_three_tones_2(
-            self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
+        self, seg: List[Tuple[str, str]]
+    ) -> List[Tuple[str, str]]:
        new_seg = []
        sub_finals_list = [
-            lazy_pinyin(
-                word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
+            lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
            for (word, pos) in seg
        ]
        assert len(sub_finals_list) == len(seg)
        merge_last = [False] * len(seg)
        for i, (word, pos) in enumerate(seg):
-            if i - 1 >= 0 and sub_finals_list[i - 1][-1][-1] == "3" and sub_finals_list[i][0][-1] == "3" and not \
-                    merge_last[i - 1]:
+            if (
+                i - 1 >= 0
+                and sub_finals_list[i - 1][-1][-1] == "3"
+                and sub_finals_list[i][0][-1] == "3"
+                and not merge_last[i - 1]
+            ):
                # if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
-                if not self._is_reduplication(seg[i - 1][0]) and len(
-                        seg[i - 1][0]) + len(seg[i][0]) <= 3:
+                if (
+                    not self._is_reduplication(seg[i - 1][0])
+                    and len(seg[i - 1][0]) + len(seg[i][0]) <= 3
+                ):
                    new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
                    merge_last[i] = True
                else:
@@ -313,14 +734,13 @@ class ToneSandhi():
    def _merge_er(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
        new_seg = []
        for i, (word, pos) in enumerate(seg):
-            if i - 1 >= 0 and word == "儿" and seg[i-1][0] != "#":
+            if i - 1 >= 0 and word == "儿" and seg[i - 1][0] != "#":
                new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
            else:
                new_seg.append([word, pos])
        return new_seg

-    def _merge_reduplication(
-            self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
+    def _merge_reduplication(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
        new_seg = []
        for i, (word, pos) in enumerate(seg):
            if new_seg and word == new_seg[-1][0]:
@@ -329,8 +749,7 @@ class ToneSandhi():
                new_seg.append([word, pos])
        return new_seg

-    def pre_merge_for_modify(
-            self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
+    def pre_merge_for_modify(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
        seg = self._merge_bu(seg)
        try:
            seg = self._merge_yi(seg)
@@ -349,8 +768,7 @@ class ToneSandhi():
        seg = self._merge_er(seg)
        return seg

-    def modified_tone(self, word: str, pos: str,
-                      finals: List[str]) -> List[str]:
+    def modified_tone(self, word: str, pos: str, finals: List[str]) -> List[str]:
        finals = self._bu_sandhi(word, finals)
        finals = self._yi_sandhi(word, finals)
        finals = self._neural_sandhi(word, pos, finals)