Refactor: Format Code with Ruff and Update Deprecated G2PW Link (#2255)
* ruff check --fix * ruff format --line-length 120 --target-version py39 * Change the link for G2PW Model * update pytorch version and colab
This commit is contained in:
@@ -17,17 +17,25 @@ PP_FIX_DICT_PATH = os.path.join(current_file_path, "polyphonic-fix.rep")
|
||||
|
||||
|
||||
class G2PWPinyin(Pinyin):
|
||||
def __init__(self, model_dir='G2PWModel/', model_source=None,
|
||||
enable_non_tradional_chinese=True,
|
||||
v_to_u=False, neutral_tone_with_five=False, tone_sandhi=False, **kwargs):
|
||||
def __init__(
|
||||
self,
|
||||
model_dir="G2PWModel/",
|
||||
model_source=None,
|
||||
enable_non_tradional_chinese=True,
|
||||
v_to_u=False,
|
||||
neutral_tone_with_five=False,
|
||||
tone_sandhi=False,
|
||||
**kwargs,
|
||||
):
|
||||
self._g2pw = G2PWOnnxConverter(
|
||||
model_dir=model_dir,
|
||||
style='pinyin',
|
||||
style="pinyin",
|
||||
model_source=model_source,
|
||||
enable_non_tradional_chinese=enable_non_tradional_chinese,
|
||||
)
|
||||
self._converter = Converter(
|
||||
self._g2pw, v_to_u=v_to_u,
|
||||
self._g2pw,
|
||||
v_to_u=v_to_u,
|
||||
neutral_tone_with_five=neutral_tone_with_five,
|
||||
tone_sandhi=tone_sandhi,
|
||||
)
|
||||
@@ -37,31 +45,25 @@ class G2PWPinyin(Pinyin):
|
||||
|
||||
|
||||
class Converter(UltimateConverter):
|
||||
def __init__(self, g2pw_instance, v_to_u=False,
|
||||
neutral_tone_with_five=False,
|
||||
tone_sandhi=False, **kwargs):
|
||||
def __init__(self, g2pw_instance, v_to_u=False, neutral_tone_with_five=False, tone_sandhi=False, **kwargs):
|
||||
super(Converter, self).__init__(
|
||||
v_to_u=v_to_u,
|
||||
neutral_tone_with_five=neutral_tone_with_five,
|
||||
tone_sandhi=tone_sandhi, **kwargs)
|
||||
v_to_u=v_to_u, neutral_tone_with_five=neutral_tone_with_five, tone_sandhi=tone_sandhi, **kwargs
|
||||
)
|
||||
|
||||
self._g2pw = g2pw_instance
|
||||
|
||||
def convert(self, words, style, heteronym, errors, strict, **kwargs):
|
||||
pys = []
|
||||
if RE_HANS.match(words):
|
||||
pys = self._to_pinyin(words, style=style, heteronym=heteronym,
|
||||
errors=errors, strict=strict)
|
||||
pys = self._to_pinyin(words, style=style, heteronym=heteronym, errors=errors, strict=strict)
|
||||
post_data = self.post_pinyin(words, heteronym, pys)
|
||||
if post_data is not None:
|
||||
pys = post_data
|
||||
|
||||
pys = self.convert_styles(
|
||||
pys, words, style, heteronym, errors, strict)
|
||||
pys = self.convert_styles(pys, words, style, heteronym, errors, strict)
|
||||
|
||||
else:
|
||||
py = self.handle_nopinyin(words, style=style, errors=errors,
|
||||
heteronym=heteronym, strict=strict)
|
||||
py = self.handle_nopinyin(words, style=style, errors=errors, heteronym=heteronym, strict=strict)
|
||||
if py:
|
||||
pys.extend(py)
|
||||
|
||||
@@ -73,13 +75,11 @@ class Converter(UltimateConverter):
|
||||
g2pw_pinyin = self._g2pw(han)
|
||||
|
||||
if not g2pw_pinyin: # g2pw 不支持的汉字改为使用 pypinyin 原有逻辑
|
||||
return super(Converter, self).convert(
|
||||
han, Style.TONE, heteronym, errors, strict, **kwargs)
|
||||
return super(Converter, self).convert(han, Style.TONE, heteronym, errors, strict, **kwargs)
|
||||
|
||||
for i, item in enumerate(g2pw_pinyin[0]):
|
||||
if item is None: # g2pw 不支持的汉字改为使用 pypinyin 原有逻辑
|
||||
py = super(Converter, self).convert(
|
||||
han[i], Style.TONE, heteronym, errors, strict, **kwargs)
|
||||
py = super(Converter, self).convert(han[i], Style.TONE, heteronym, errors, strict, **kwargs)
|
||||
pinyins.extend(py)
|
||||
else:
|
||||
pinyins.append([to_tone(item)])
|
||||
@@ -104,7 +104,7 @@ def _remove_dup_and_empty(lst_list):
|
||||
if lst:
|
||||
new_lst_list.append(lst)
|
||||
else:
|
||||
new_lst_list.append([''])
|
||||
new_lst_list.append([""])
|
||||
|
||||
return new_lst_list
|
||||
|
||||
@@ -127,17 +127,17 @@ def get_dict():
|
||||
|
||||
def read_dict():
|
||||
polyphonic_dict = {}
|
||||
with open(PP_DICT_PATH,encoding="utf-8") as f:
|
||||
with open(PP_DICT_PATH, encoding="utf-8") as f:
|
||||
line = f.readline()
|
||||
while line:
|
||||
key, value_str = line.split(':')
|
||||
key, value_str = line.split(":")
|
||||
value = eval(value_str.strip())
|
||||
polyphonic_dict[key.strip()] = value
|
||||
line = f.readline()
|
||||
with open(PP_FIX_DICT_PATH,encoding="utf-8") as f:
|
||||
with open(PP_FIX_DICT_PATH, encoding="utf-8") as f:
|
||||
line = f.readline()
|
||||
while line:
|
||||
key, value_str = line.split(':')
|
||||
key, value_str = line.split(":")
|
||||
value = eval(value_str.strip())
|
||||
polyphonic_dict[key.strip()] = value
|
||||
line = f.readline()
|
||||
|
||||
Reference in New Issue
Block a user