Refactor: Format Code with Ruff and Update Deprecated G2PW Link (#2255)
* ruff check --fix * ruff format --line-length 120 --target-version py39 * Change the link for G2PW Model * update pytorch version and colab
This commit is contained in:
@@ -18,10 +18,7 @@ from pypinyin.constants import SUPPORT_UCS4
|
||||
|
||||
# 全角半角转换
|
||||
# 英文字符全角 -> 半角映射表 (num: 52)
|
||||
F2H_ASCII_LETTERS = {
|
||||
ord(char) + 65248: ord(char)
|
||||
for char in string.ascii_letters
|
||||
}
|
||||
F2H_ASCII_LETTERS = {ord(char) + 65248: ord(char) for char in string.ascii_letters}
|
||||
|
||||
# 英文字符半角 -> 全角映射表
|
||||
H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()}
|
||||
@@ -37,26 +34,29 @@ F2H_PUNCTUATIONS = {ord(char) + 65248: ord(char) for char in string.punctuation}
|
||||
H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()}
|
||||
|
||||
# 空格 (num: 1)
|
||||
F2H_SPACE = {'\u3000': ' '}
|
||||
H2F_SPACE = {' ': '\u3000'}
|
||||
F2H_SPACE = {"\u3000": " "}
|
||||
H2F_SPACE = {" ": "\u3000"}
|
||||
|
||||
# 非"有拼音的汉字"的字符串,可用于NSW提取
|
||||
if SUPPORT_UCS4:
|
||||
RE_NSW = re.compile(r'(?:[^'
|
||||
r'\u3007' # 〇
|
||||
r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF]
|
||||
r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF]
|
||||
r'\uf900-\ufaff' # CJK兼容:[F900-FAFF]
|
||||
r'\U00020000-\U0002A6DF' # CJK扩展B:[20000-2A6DF]
|
||||
r'\U0002A703-\U0002B73F' # CJK扩展C:[2A700-2B73F]
|
||||
r'\U0002B740-\U0002B81D' # CJK扩展D:[2B740-2B81D]
|
||||
r'\U0002F80A-\U0002FA1F' # CJK兼容扩展:[2F800-2FA1F]
|
||||
r'])+')
|
||||
RE_NSW = re.compile(
|
||||
r"(?:[^"
|
||||
r"\u3007" # 〇
|
||||
r"\u3400-\u4dbf" # CJK扩展A:[3400-4DBF]
|
||||
r"\u4e00-\u9fff" # CJK基本:[4E00-9FFF]
|
||||
r"\uf900-\ufaff" # CJK兼容:[F900-FAFF]
|
||||
r"\U00020000-\U0002A6DF" # CJK扩展B:[20000-2A6DF]
|
||||
r"\U0002A703-\U0002B73F" # CJK扩展C:[2A700-2B73F]
|
||||
r"\U0002B740-\U0002B81D" # CJK扩展D:[2B740-2B81D]
|
||||
r"\U0002F80A-\U0002FA1F" # CJK兼容扩展:[2F800-2FA1F]
|
||||
r"])+"
|
||||
)
|
||||
else:
|
||||
RE_NSW = re.compile( # pragma: no cover
|
||||
r'(?:[^'
|
||||
r'\u3007' # 〇
|
||||
r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF]
|
||||
r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF]
|
||||
r'\uf900-\ufaff' # CJK兼容:[F900-FAFF]
|
||||
r'])+')
|
||||
r"(?:[^"
|
||||
r"\u3007" # 〇
|
||||
r"\u3400-\u4dbf" # CJK扩展A:[3400-4DBF]
|
||||
r"\u4e00-\u9fff" # CJK基本:[4E00-9FFF]
|
||||
r"\uf900-\ufaff" # CJK兼容:[F900-FAFF]
|
||||
r"])+"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user