more code refactor

This commit is contained in:
Blaise
2024-01-16 17:14:18 +01:00
parent 0d92575115
commit 0d3d47f3c3
44 changed files with 4516 additions and 2623 deletions

View File

@@ -1,29 +1,27 @@
from text import chinese, japanese, cleaned_text_to_sequence, symbols, english
language_module_map = {
'zh': chinese,
"ja": japanese,
'en': english
}
language_module_map = {"zh": chinese, "ja": japanese, "en": english}
special = [
('%', 'zh', "SP"),
('', 'zh', "SP2"),
('^', 'zh', "SP3"),
("%", "zh", "SP"),
("", "zh", "SP2"),
("^", "zh", "SP3"),
# ('@', 'zh', "SP4")#不搞鬼畜了,和第二版保持一致吧
]
def clean_text(text, language):
for special_s, special_l, target_symbol in special:
if special_s in text and language == special_l:
return clean_special(text, language, special_s, target_symbol)
language_module = language_module_map[language]
norm_text = language_module.text_normalize(text)
if(language=="zh"):
if language == "zh":
phones, word2ph = language_module.g2p(norm_text)
assert len(phones) == sum(word2ph)
assert len(norm_text) == len(word2ph)
else:
phones = language_module.g2p(norm_text)
word2ph=None
word2ph = None
for ph in phones:
assert ph in symbols
@@ -41,17 +39,17 @@ def clean_special(text, language, special_s, target_symbol):
new_ph = []
for ph in phones:
assert ph in symbols
if ph == ',':
if ph == ",":
new_ph.append(target_symbol)
else:
new_ph.append(ph)
return new_ph
def text_to_sequence(text, language):
phones = clean_text(text)
return cleaned_text_to_sequence(phones)
if __name__ == '__main__':
print(clean_text("你好%啊啊啊额、还是到付红四方。", 'zh'))
if __name__ == "__main__":
print(clean_text("你好%啊啊啊额、还是到付红四方。", "zh"))