Better normlization (#1351)

2024-07-27 16:03:43 +08:00
parent f042030cca
commit e851ae34c9
6 changed files with 93 additions and 18 deletions
--- a/GPT_SoVITS/text/english.py
+++ b/GPT_SoVITS/text/english.py
@@ -4,7 +4,7 @@ import re
 import wordsegment
 from g2p_en import G2p

-from string import punctuation
+from text.symbols import punctuation

 from text import symbols

@@ -110,6 +110,13 @@ def replace_phs(phs):
    return phs_new


+def replace_consecutive_punctuation(text):
+    punctuations = ''.join(re.escape(p) for p in punctuation)
+    pattern = f'([{punctuations}])([{punctuations}])+'
+    result = re.sub(pattern, r'\1', text)
+    return result
+
+
 def read_dict():
    g2p_dict = {}
    start_line = 49
@@ -234,6 +241,9 @@ def text_normalize(text):
    text = re.sub(r"(?i)i\.e\.", "that is", text)
    text = re.sub(r"(?i)e\.g\.", "for example", text)

+    # 避免重复标点引起的参考泄露
+    text = replace_consecutive_punctuation(text)
+
    return text