Version Check (#1390)
* version check * fix webui and symbols * fix v1 language map
This commit is contained in:
@@ -1,18 +1,26 @@
|
||||
import os
|
||||
if os.environ.get("version","v1")=="v1":
|
||||
from text.symbols import symbols
|
||||
else:
|
||||
from text.symbols2 import symbols
|
||||
# if os.environ.get("version","v1")=="v1":
|
||||
# from text.symbols import symbols
|
||||
# else:
|
||||
# from text.symbols2 import symbols
|
||||
|
||||
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
|
||||
from text import symbols as symbols_v1
|
||||
from text import symbols2 as symbols_v2
|
||||
|
||||
def cleaned_text_to_sequence(cleaned_text):
|
||||
_symbol_to_id_v1 = {s: i for i, s in enumerate(symbols_v1.symbols)}
|
||||
_symbol_to_id_v2 = {s: i for i, s in enumerate(symbols_v2.symbols)}
|
||||
|
||||
def cleaned_text_to_sequence(cleaned_text, version):
|
||||
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
|
||||
Args:
|
||||
text: string to convert to a sequence
|
||||
Returns:
|
||||
List of integers corresponding to the symbols in the text
|
||||
'''
|
||||
phones = [_symbol_to_id[symbol] for symbol in cleaned_text]
|
||||
if version == "v1":
|
||||
phones = [_symbol_to_id_v1[symbol] for symbol in cleaned_text]
|
||||
else:
|
||||
phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
|
||||
|
||||
return phones
|
||||
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
from text import japanese, cleaned_text_to_sequence, english,korean,cantonese
|
||||
import os
|
||||
if os.environ.get("version","v1")=="v1":
|
||||
from text import chinese
|
||||
from text.symbols import symbols
|
||||
else:
|
||||
from text import chinese2 as chinese
|
||||
from text.symbols2 import symbols
|
||||
# if os.environ.get("version","v1")=="v1":
|
||||
# from text import chinese
|
||||
# from text.symbols import symbols
|
||||
# else:
|
||||
# from text import chinese2 as chinese
|
||||
# from text.symbols2 import symbols
|
||||
|
||||
from text import symbols as symbols_v1
|
||||
from text import symbols2 as symbols_v2
|
||||
from text import chinese as chinese_v1
|
||||
from text import chinese2 as chinese_v2
|
||||
|
||||
language_module_map = {"zh": chinese, "ja": japanese, "en": english, "ko": korean,"yue":cantonese}
|
||||
special = [
|
||||
# ("%", "zh", "SP"),
|
||||
("¥", "zh", "SP2"),
|
||||
@@ -16,13 +20,20 @@ special = [
|
||||
]
|
||||
|
||||
|
||||
def clean_text(text, language):
|
||||
def clean_text(text, language, version):
|
||||
if version == "v1":
|
||||
symbols = symbols_v1.symbols
|
||||
language_module_map = {"zh": chinese_v1, "ja": japanese, "en": english}
|
||||
else:
|
||||
symbols = symbols_v2.symbols
|
||||
language_module_map = {"zh": chinese_v2, "ja": japanese, "en": english, "ko": korean,"yue":cantonese}
|
||||
|
||||
if(language not in language_module_map):
|
||||
language="en"
|
||||
text=" "
|
||||
for special_s, special_l, target_symbol in special:
|
||||
if special_s in text and language == special_l:
|
||||
return clean_special(text, language, special_s, target_symbol)
|
||||
return clean_special(text, language, special_s, target_symbol, version)
|
||||
language_module = language_module_map[language]
|
||||
if hasattr(language_module,"text_normalize"):
|
||||
norm_text = language_module.text_normalize(text)
|
||||
@@ -42,11 +53,18 @@ def clean_text(text, language):
|
||||
word2ph = None
|
||||
|
||||
for ph in phones:
|
||||
assert ph in symbols
|
||||
phones = ['UNK' if ph not in symbols else ph for ph in phones]
|
||||
return phones, word2ph, norm_text
|
||||
|
||||
|
||||
def clean_special(text, language, special_s, target_symbol):
|
||||
def clean_special(text, language, special_s, target_symbol, version):
|
||||
if version == "v1":
|
||||
symbols = symbols_v1.symbols
|
||||
language_module_map = {"zh": chinese_v1, "ja": japanese, "en": english}
|
||||
else:
|
||||
symbols = symbols_v2.symbols
|
||||
language_module_map = {"zh": chinese_v2, "ja": japanese, "en": english, "ko": korean,"yue":cantonese}
|
||||
|
||||
"""
|
||||
特殊静音段sp符号处理
|
||||
"""
|
||||
|
||||
@@ -6,10 +6,7 @@ from g2p_en import G2p
|
||||
|
||||
from text.symbols import punctuation
|
||||
|
||||
if os.environ.get("version","v1")=="v1":
|
||||
from text.symbols import symbols
|
||||
else:
|
||||
from text.symbols2 import symbols
|
||||
from text.symbols2 import symbols
|
||||
|
||||
import unicodedata
|
||||
from builtins import str as unicode
|
||||
|
||||
@@ -4,12 +4,6 @@ import sys
|
||||
|
||||
import pyopenjtalk
|
||||
|
||||
|
||||
import os
|
||||
if os.environ.get("version","v1")=="v1":
|
||||
from text.symbols import symbols
|
||||
else:
|
||||
from text.symbols2 import symbols
|
||||
from text.symbols import punctuation
|
||||
# Regular expression matching Japanese without punctuation marks:
|
||||
_japanese_characters = re.compile(
|
||||
@@ -61,12 +55,13 @@ def post_replace_ph(ph):
|
||||
"、": ",",
|
||||
"...": "…",
|
||||
}
|
||||
|
||||
if ph in rep_map.keys():
|
||||
ph = rep_map[ph]
|
||||
if ph in symbols:
|
||||
return ph
|
||||
if ph not in symbols:
|
||||
ph = "UNK"
|
||||
# if ph in symbols:
|
||||
# return ph
|
||||
# if ph not in symbols:
|
||||
# ph = "UNK"
|
||||
return ph
|
||||
|
||||
|
||||
|
||||
@@ -2,11 +2,8 @@ import re
|
||||
from jamo import h2j, j2hcj
|
||||
import ko_pron
|
||||
from g2pk2 import G2p
|
||||
import os
|
||||
if os.environ.get("version","v1")=="v1":
|
||||
from text.symbols import symbols
|
||||
else:
|
||||
from text.symbols2 import symbols
|
||||
|
||||
from text.symbols2 import symbols
|
||||
|
||||
# This is a list of Korean classifiers preceded by pure Korean numerals.
|
||||
_korean_classifiers = '군데 권 개 그루 닢 대 두 마리 모 모금 뭇 발 발짝 방 번 벌 보루 살 수 술 시 쌈 움큼 정 짝 채 척 첩 축 켤레 톨 통'
|
||||
|
||||
Reference in New Issue
Block a user