Version Check (#1390)

* version check

* fix webui and symbols

* fix v1 language map
This commit is contained in:
KamioRinn
2024-08-05 17:24:42 +08:00
committed by GitHub
parent 0c25e57959
commit 4e34814c70
8 changed files with 157 additions and 78 deletions

View File

@@ -1,18 +1,26 @@
import os
if os.environ.get("version","v1")=="v1":
from text.symbols import symbols
else:
from text.symbols2 import symbols
# if os.environ.get("version","v1")=="v1":
# from text.symbols import symbols
# else:
# from text.symbols2 import symbols
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
from text import symbols as symbols_v1
from text import symbols2 as symbols_v2
def cleaned_text_to_sequence(cleaned_text):
_symbol_to_id_v1 = {s: i for i, s in enumerate(symbols_v1.symbols)}
_symbol_to_id_v2 = {s: i for i, s in enumerate(symbols_v2.symbols)}
def cleaned_text_to_sequence(cleaned_text, version):
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
Args:
text: string to convert to a sequence
Returns:
List of integers corresponding to the symbols in the text
'''
phones = [_symbol_to_id[symbol] for symbol in cleaned_text]
if version == "v1":
phones = [_symbol_to_id_v1[symbol] for symbol in cleaned_text]
else:
phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
return phones

View File

@@ -1,13 +1,17 @@
from text import japanese, cleaned_text_to_sequence, english,korean,cantonese
import os
if os.environ.get("version","v1")=="v1":
from text import chinese
from text.symbols import symbols
else:
from text import chinese2 as chinese
from text.symbols2 import symbols
# if os.environ.get("version","v1")=="v1":
# from text import chinese
# from text.symbols import symbols
# else:
# from text import chinese2 as chinese
# from text.symbols2 import symbols
from text import symbols as symbols_v1
from text import symbols2 as symbols_v2
from text import chinese as chinese_v1
from text import chinese2 as chinese_v2
language_module_map = {"zh": chinese, "ja": japanese, "en": english, "ko": korean,"yue":cantonese}
special = [
# ("%", "zh", "SP"),
("", "zh", "SP2"),
@@ -16,13 +20,20 @@ special = [
]
def clean_text(text, language):
def clean_text(text, language, version):
if version == "v1":
symbols = symbols_v1.symbols
language_module_map = {"zh": chinese_v1, "ja": japanese, "en": english}
else:
symbols = symbols_v2.symbols
language_module_map = {"zh": chinese_v2, "ja": japanese, "en": english, "ko": korean,"yue":cantonese}
if(language not in language_module_map):
language="en"
text=" "
for special_s, special_l, target_symbol in special:
if special_s in text and language == special_l:
return clean_special(text, language, special_s, target_symbol)
return clean_special(text, language, special_s, target_symbol, version)
language_module = language_module_map[language]
if hasattr(language_module,"text_normalize"):
norm_text = language_module.text_normalize(text)
@@ -42,11 +53,18 @@ def clean_text(text, language):
word2ph = None
for ph in phones:
assert ph in symbols
phones = ['UNK' if ph not in symbols else ph for ph in phones]
return phones, word2ph, norm_text
def clean_special(text, language, special_s, target_symbol):
def clean_special(text, language, special_s, target_symbol, version):
if version == "v1":
symbols = symbols_v1.symbols
language_module_map = {"zh": chinese_v1, "ja": japanese, "en": english}
else:
symbols = symbols_v2.symbols
language_module_map = {"zh": chinese_v2, "ja": japanese, "en": english, "ko": korean,"yue":cantonese}
"""
特殊静音段sp符号处理
"""

View File

@@ -6,10 +6,7 @@ from g2p_en import G2p
from text.symbols import punctuation
if os.environ.get("version","v1")=="v1":
from text.symbols import symbols
else:
from text.symbols2 import symbols
from text.symbols2 import symbols
import unicodedata
from builtins import str as unicode

View File

@@ -4,12 +4,6 @@ import sys
import pyopenjtalk
import os
if os.environ.get("version","v1")=="v1":
from text.symbols import symbols
else:
from text.symbols2 import symbols
from text.symbols import punctuation
# Regular expression matching Japanese without punctuation marks:
_japanese_characters = re.compile(
@@ -61,12 +55,13 @@ def post_replace_ph(ph):
"": ",",
"...": "",
}
if ph in rep_map.keys():
ph = rep_map[ph]
if ph in symbols:
return ph
if ph not in symbols:
ph = "UNK"
# if ph in symbols:
# return ph
# if ph not in symbols:
# ph = "UNK"
return ph

View File

@@ -2,11 +2,8 @@ import re
from jamo import h2j, j2hcj
import ko_pron
from g2pk2 import G2p
import os
if os.environ.get("version","v1")=="v1":
from text.symbols import symbols
else:
from text.symbols2 import symbols
from text.symbols2 import symbols
# This is a list of Korean classifiers preceded by pure Korean numerals.
_korean_classifiers = '군데 권 개 그루 닢 대 두 마리 모 모금 뭇 발 발짝 방 번 벌 보루 살 수 술 시 쌈 움큼 정 짝 채 척 첩 축 켤레 톨 통'