Adjust text normlization

2024-02-21 01:14:09 +08:00
parent a16de2e7c6
commit 84062074a3
4 changed files with 27 additions and 5 deletions
--- a/GPT_SoVITS/text/zh_normalization/text_normlization.py
+++ b/GPT_SoVITS/text/zh_normalization/text_normlization.py
@@ -33,6 +33,7 @@ from .num import RE_NUMBER
 from .num import RE_PERCENTAGE
 from .num import RE_POSITIVE_QUANTIFIERS
 from .num import RE_RANGE
+from .num import RE_TO_RANGE
 from .num import replace_default_num
 from .num import replace_frac
 from .num import replace_negative_num
@@ -40,6 +41,7 @@ from .num import replace_number
 from .num import replace_percentage
 from .num import replace_positive_quantifier
 from .num import replace_range
+from .num import replace_to_range
 from .phonecode import RE_MOBILE_PHONE
 from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
 from .phonecode import RE_TELEPHONE
@@ -65,7 +67,7 @@ class TextNormalizer():
        if lang == "zh":
            text = text.replace(" ", "")
            # 过滤掉特殊字符
-            text = re.sub(r'[——《》【】<=>{}()（）#&@“”^_|…\\]', '', text)
+            text = re.sub(r'[——《》【】<=>{}()（）#&@“”^_|\\]', '', text)
        text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
        text = text.strip()
        sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
@@ -73,8 +75,8 @@ class TextNormalizer():

    def _post_replace(self, sentence: str) -> str:
        sentence = sentence.replace('/', '每')
-        sentence = sentence.replace('~', '至')
-        sentence = sentence.replace('～', '至')
+        # sentence = sentence.replace('~', '至')
+        # sentence = sentence.replace('～', '至')
        sentence = sentence.replace('①', '一')
        sentence = sentence.replace('②', '二')
        sentence = sentence.replace('③', '三')
@@ -128,6 +130,8 @@ class TextNormalizer():
        sentence = RE_TIME_RANGE.sub(replace_time, sentence)
        sentence = RE_TIME.sub(replace_time, sentence)

+        # 处理~波浪号作为至的替换
+        sentence = RE_TO_RANGE.sub(replace_to_range, sentence)
        sentence = RE_TEMPERATURE.sub(replace_temperature, sentence)
        sentence = replace_measure(sentence)
        sentence = RE_FRAC.sub(replace_frac, sentence)