Refactor: Format Code with Ruff and Update Deprecated G2PW Link (#2255)
* ruff check --fix * ruff format --line-length 120 --target-version py39 * Change the link for G2PW Model * update pytorch version and colab
This commit is contained in:
@@ -8,19 +8,17 @@ exp_name = os.environ.get("exp_name")
|
||||
i_part = os.environ.get("i_part")
|
||||
all_parts = os.environ.get("all_parts")
|
||||
if "_CUDA_VISIBLE_DEVICES" in os.environ:
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
|
||||
opt_dir = os.environ.get("opt_dir")
|
||||
bert_pretrained_dir = os.environ.get("bert_pretrained_dir")
|
||||
import torch
|
||||
|
||||
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
||||
version = os.environ.get('version', None)
|
||||
import sys, numpy as np, traceback, pdb
|
||||
version = os.environ.get("version", None)
|
||||
import traceback
|
||||
import os.path
|
||||
from glob import glob
|
||||
from tqdm import tqdm
|
||||
from text.cleaner import clean_text
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
import numpy as np
|
||||
from tools.my_utils import clean_path
|
||||
|
||||
# inp_text=sys.argv[1]
|
||||
@@ -36,13 +34,13 @@ from time import time as ttime
|
||||
import shutil
|
||||
|
||||
|
||||
def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
|
||||
dir=os.path.dirname(path)
|
||||
name=os.path.basename(path)
|
||||
def my_save(fea, path): #####fix issue: torch.save doesn't support chinese path
|
||||
dir = os.path.dirname(path)
|
||||
name = os.path.basename(path)
|
||||
# tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part)
|
||||
tmp_path="%s%s.pth"%(ttime(),i_part)
|
||||
torch.save(fea,tmp_path)
|
||||
shutil.move(tmp_path,"%s/%s"%(dir,name))
|
||||
tmp_path = "%s%s.pth" % (ttime(), i_part)
|
||||
torch.save(fea, tmp_path)
|
||||
shutil.move(tmp_path, "%s/%s" % (dir, name))
|
||||
|
||||
|
||||
txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part)
|
||||
@@ -56,8 +54,10 @@ if os.path.exists(txt_path) == False:
|
||||
# device = "mps"
|
||||
else:
|
||||
device = "cpu"
|
||||
if os.path.exists(bert_pretrained_dir):...
|
||||
else:raise FileNotFoundError(bert_pretrained_dir)
|
||||
if os.path.exists(bert_pretrained_dir):
|
||||
...
|
||||
else:
|
||||
raise FileNotFoundError(bert_pretrained_dir)
|
||||
tokenizer = AutoTokenizer.from_pretrained(bert_pretrained_dir)
|
||||
bert_model = AutoModelForMaskedLM.from_pretrained(bert_pretrained_dir)
|
||||
if is_half == True:
|
||||
@@ -86,12 +86,10 @@ if os.path.exists(txt_path) == False:
|
||||
def process(data, res):
|
||||
for name, text, lan in data:
|
||||
try:
|
||||
name=clean_path(name)
|
||||
name = clean_path(name)
|
||||
name = os.path.basename(name)
|
||||
print(name)
|
||||
phones, word2ph, norm_text = clean_text(
|
||||
text.replace("%", "-").replace("¥", ","), lan, version
|
||||
)
|
||||
phones, word2ph, norm_text = clean_text(text.replace("%", "-").replace("¥", ","), lan, version)
|
||||
path_bert = "%s/%s.pt" % (bert_dir, name)
|
||||
if os.path.exists(path_bert) == False and lan == "zh":
|
||||
bert_feature = get_bert_feature(norm_text, word2ph)
|
||||
@@ -131,9 +129,7 @@ if os.path.exists(txt_path) == False:
|
||||
wav_name, spk_name, language, text = line.split("|")
|
||||
# todo.append([name,text,"zh"])
|
||||
if language in language_v1_to_language_v2.keys():
|
||||
todo.append(
|
||||
[wav_name, text, language_v1_to_language_v2.get(language, language)]
|
||||
)
|
||||
todo.append([wav_name, text, language_v1_to_language_v2.get(language, language)])
|
||||
else:
|
||||
print(f"\033[33m[Waring] The {language = } of {wav_name} is not supported for training.\033[0m")
|
||||
except:
|
||||
|
||||
Reference in New Issue
Block a user