@@ -18,14 +18,19 @@ import LangSegment, os, re, sys, json
|
||||
import pdb
|
||||
import torch
|
||||
|
||||
if len(sys.argv)==1:sys.argv.append('v1')
|
||||
version=os.environ.get("version","v1")
|
||||
version="v2"if sys.argv[1]=="v2" else version
|
||||
os.environ['version']=version
|
||||
language=os.environ.get("language","auto")
|
||||
language=sys.argv[-1] if len(sys.argv[-1])==5 else language
|
||||
pretrained_sovits_name="GPT_SoVITS/pretrained_models/s2G488k.pth"if version=="v1"else"GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth"
|
||||
pretrained_gpt_name="GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"if version=="v1"else "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt"
|
||||
version=os.environ.get("version","v2")
|
||||
pretrained_sovits_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", "GPT_SoVITS/pretrained_models/s2G488k.pth"]
|
||||
pretrained_gpt_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt", "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"]
|
||||
|
||||
_ =[[],[]]
|
||||
for i in range(2):
|
||||
if os.path.exists(pretrained_gpt_name[i]):
|
||||
_[0].append(pretrained_gpt_name[i])
|
||||
if os.path.exists(pretrained_sovits_name[i]):
|
||||
_[-1].append(pretrained_sovits_name[i])
|
||||
pretrained_gpt_name,pretrained_sovits_name = _
|
||||
|
||||
|
||||
|
||||
if os.path.exists(f"./weight.json"):
|
||||
pass
|
||||
@@ -39,7 +44,11 @@ with open(f"./weight.json", 'r', encoding="utf-8") as file:
|
||||
"gpt_path", weight_data.get('GPT',{}).get(version,pretrained_gpt_name))
|
||||
sovits_path = os.environ.get(
|
||||
"sovits_path", weight_data.get('SoVITS',{}).get(version,pretrained_sovits_name))
|
||||
|
||||
if isinstance(gpt_path,list):
|
||||
gpt_path = gpt_path[0]
|
||||
if isinstance(sovits_path,list):
|
||||
sovits_path = sovits_path[0]
|
||||
|
||||
# gpt_path = os.environ.get(
|
||||
# "gpt_path", pretrained_gpt_name
|
||||
# )
|
||||
@@ -73,8 +82,10 @@ from text.cleaner import clean_text
|
||||
from time import time as ttime
|
||||
from module.mel_processing import spectrogram_torch
|
||||
from tools.my_utils import load_audio
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||
|
||||
language=os.environ.get("language","auto")
|
||||
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||
if language != 'auto':
|
||||
i18n = I18nAuto(language=language)
|
||||
else:
|
||||
@@ -87,6 +98,29 @@ if torch.cuda.is_available():
|
||||
else:
|
||||
device = "cpu"
|
||||
|
||||
dict_language_v1 = {
|
||||
i18n("中文"): "all_zh",#全部按中文识别
|
||||
i18n("英文"): "en",#全部按英文识别#######不变
|
||||
i18n("日文"): "all_ja",#全部按日文识别
|
||||
i18n("中英混合"): "zh",#按中英混合识别####不变
|
||||
i18n("日英混合"): "ja",#按日英混合识别####不变
|
||||
i18n("多语种混合"): "auto",#多语种启动切分识别语种
|
||||
}
|
||||
dict_language_v2 = {
|
||||
i18n("中文"): "all_zh",#全部按中文识别
|
||||
i18n("英文"): "en",#全部按英文识别#######不变
|
||||
i18n("日文"): "all_ja",#全部按日文识别
|
||||
i18n("粤语"): "all_yue",#全部按中文识别
|
||||
i18n("韩文"): "all_ko",#全部按韩文识别
|
||||
i18n("中英混合"): "zh",#按中英混合识别####不变
|
||||
i18n("日英混合"): "ja",#按日英混合识别####不变
|
||||
i18n("粤英混合"): "yue",#按粤英混合识别####不变
|
||||
i18n("韩英混合"): "ko",#按韩英混合识别####不变
|
||||
i18n("多语种混合"): "auto",#多语种启动切分识别语种
|
||||
i18n("多语种混合(粤语)"): "auto_yue",#多语种启动切分识别语种
|
||||
}
|
||||
dict_language = dict_language_v1 if version =='v1' else dict_language_v2
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(bert_path)
|
||||
bert_model = AutoModelForMaskedLM.from_pretrained(bert_path)
|
||||
if is_half == True:
|
||||
@@ -146,8 +180,8 @@ else:
|
||||
ssl_model = ssl_model.to(device)
|
||||
|
||||
|
||||
def change_sovits_weights(sovits_path):
|
||||
global vq_model, hps
|
||||
def change_sovits_weights(sovits_path,prompt_language=None,text_language=None):
|
||||
global vq_model, hps, version, dict_language
|
||||
dict_s2 = torch.load(sovits_path, map_location="cpu")
|
||||
hps = dict_s2["config"]
|
||||
hps = DictToAttrRecursive(hps)
|
||||
@@ -156,6 +190,7 @@ def change_sovits_weights(sovits_path):
|
||||
hps.model.version = "v1"
|
||||
else:
|
||||
hps.model.version = "v2"
|
||||
version = hps.model.version
|
||||
# print("sovits版本:",hps.model.version)
|
||||
vq_model = SynthesizerTrn(
|
||||
hps.data.filter_length // 2 + 1,
|
||||
@@ -171,11 +206,25 @@ def change_sovits_weights(sovits_path):
|
||||
vq_model = vq_model.to(device)
|
||||
vq_model.eval()
|
||||
print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
|
||||
dict_language = dict_language_v1 if version =='v1' else dict_language_v2
|
||||
with open("./weight.json")as f:
|
||||
data=f.read()
|
||||
data=json.loads(data)
|
||||
data["SoVITS"][version]=sovits_path
|
||||
data=f.read()
|
||||
data=json.loads(data)
|
||||
data["SoVITS"][version]=sovits_path
|
||||
with open("./weight.json","w")as f:f.write(json.dumps(data))
|
||||
if prompt_language is not None and text_language is not None:
|
||||
if prompt_language in list(dict_language.keys()):
|
||||
prompt_text_update, prompt_language_update = {'__type__':'update'}, {'__type__':'update', 'value':prompt_language}
|
||||
else:
|
||||
prompt_text_update = {'__type__':'update', 'value':''}
|
||||
prompt_language_update = {'__type__':'update', 'value':i18n("中文")}
|
||||
if text_language in list(dict_language.keys()):
|
||||
text_update, text_language_update = {'__type__':'update'}, {'__type__':'update', 'value':text_language}
|
||||
else:
|
||||
text_update = {'__type__':'update', 'value':''}
|
||||
text_language_update = {'__type__':'update', 'value':i18n("中文")}
|
||||
return {'__type__':'update', 'choices':list(dict_language.keys())}, {'__type__':'update', 'choices':list(dict_language.keys())}, prompt_text_update, prompt_language_update, text_update, text_language_update
|
||||
|
||||
|
||||
|
||||
change_sovits_weights(sovits_path)
|
||||
@@ -196,9 +245,9 @@ def change_gpt_weights(gpt_path):
|
||||
total = sum([param.nelement() for param in t2s_model.parameters()])
|
||||
print("Number of parameter: %.2fM" % (total / 1e6))
|
||||
with open("./weight.json")as f:
|
||||
data=f.read()
|
||||
data=json.loads(data)
|
||||
data["GPT"][version]=gpt_path
|
||||
data=f.read()
|
||||
data=json.loads(data)
|
||||
data["GPT"][version]=gpt_path
|
||||
with open("./weight.json","w")as f:f.write(json.dumps(data))
|
||||
|
||||
|
||||
@@ -220,22 +269,6 @@ def get_spepc(hps, filename):
|
||||
)
|
||||
return spec
|
||||
|
||||
|
||||
dict_language = {
|
||||
i18n("中文"): "all_zh",#全部按中文识别
|
||||
i18n("粤语"): "all_yue",#全部按中文识别
|
||||
i18n("英文"): "en",#全部按英文识别#######不变
|
||||
i18n("日文"): "all_ja",#全部按日文识别
|
||||
i18n("韩文"): "all_ko",#全部按韩文识别
|
||||
i18n("中英混合"): "zh",#按中英混合识别####不变
|
||||
i18n("粤英混合"): "yue",#按粤英混合识别####不变
|
||||
i18n("日英混合"): "ja",#按日英混合识别####不变
|
||||
i18n("韩英混合"): "ko",#按韩英混合识别####不变
|
||||
i18n("多语种混合"): "auto",#多语种启动切分识别语种
|
||||
i18n("多语种混合(粤语)"): "auto_yue",#多语种启动切分识别语种
|
||||
}
|
||||
|
||||
|
||||
def clean_text_inf(text, language, version):
|
||||
phones, word2ph, norm_text = clean_text(text, language, version)
|
||||
phones = cleaned_text_to_sequence(phones, version)
|
||||
@@ -363,7 +396,6 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
|
||||
prompt_language = dict_language[prompt_language]
|
||||
text_language = dict_language[text_language]
|
||||
|
||||
version = vq_model.version
|
||||
|
||||
if not ref_free:
|
||||
prompt_text = prompt_text.strip("\n")
|
||||
@@ -599,29 +631,29 @@ def process_text(texts):
|
||||
|
||||
|
||||
def change_choices():
|
||||
SoVITS_names, GPT_names = get_weights_names()
|
||||
SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
|
||||
return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"}
|
||||
|
||||
|
||||
SoVITS_weight_root="SoVITS_weights_v2" if version=='v2' else "SoVITS_weights"
|
||||
GPT_weight_root="GPT_weights_v2" if version=='v2' else "GPT_weights"
|
||||
os.makedirs("SoVITS_weights",exist_ok=True)
|
||||
os.makedirs("GPT_weights",exist_ok=True)
|
||||
os.makedirs("SoVITS_weights_v2",exist_ok=True)
|
||||
os.makedirs("GPT_weights_v2",exist_ok=True)
|
||||
SoVITS_weight_root=["SoVITS_weights_v2","SoVITS_weights"]
|
||||
GPT_weight_root=["GPT_weights_v2","GPT_weights"]
|
||||
for path in SoVITS_weight_root+GPT_weight_root:
|
||||
os.makedirs(path,exist_ok=True)
|
||||
|
||||
|
||||
def get_weights_names():
|
||||
SoVITS_names = [pretrained_sovits_name]
|
||||
for name in os.listdir(SoVITS_weight_root):
|
||||
if name.endswith(".pth"): SoVITS_names.append("%s/%s" % (SoVITS_weight_root, name))
|
||||
GPT_names = [pretrained_gpt_name]
|
||||
for name in os.listdir(GPT_weight_root):
|
||||
if name.endswith(".ckpt"): GPT_names.append("%s/%s" % (GPT_weight_root, name))
|
||||
def get_weights_names(GPT_weight_root, SoVITS_weight_root):
|
||||
SoVITS_names = [i for i in pretrained_sovits_name]
|
||||
for path in SoVITS_weight_root:
|
||||
for name in os.listdir(path):
|
||||
if name.endswith(".pth"): SoVITS_names.append("%s/%s" % (path, name))
|
||||
GPT_names = [i for i in pretrained_gpt_name]
|
||||
for path in GPT_weight_root:
|
||||
for name in os.listdir(path):
|
||||
if name.endswith(".ckpt"): GPT_names.append("%s/%s" % (path, name))
|
||||
return SoVITS_names, GPT_names
|
||||
|
||||
|
||||
SoVITS_names, GPT_names = get_weights_names()
|
||||
SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
|
||||
|
||||
def html_center(text, label='p'):
|
||||
return f"""<div style="text-align: center; margin: 100; padding: 50;">
|
||||
@@ -633,6 +665,7 @@ def html_left(text, label='p'):
|
||||
<{label} style="margin: 0; padding: 0;">{text}</{label}>
|
||||
</div>"""
|
||||
|
||||
|
||||
with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
gr.Markdown(
|
||||
value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.")
|
||||
@@ -640,21 +673,19 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
with gr.Group():
|
||||
gr.Markdown(html_center(i18n("模型切换"),'h3'))
|
||||
with gr.Row():
|
||||
GPT_dropdown = gr.Dropdown(label=i18n("GPT模型列表"), choices=sorted(GPT_names, key=custom_sort_key), value=gpt_path, interactive=True,scale=13)
|
||||
SoVITS_dropdown = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=sorted(SoVITS_names, key=custom_sort_key), value=sovits_path, interactive=True,scale=13)
|
||||
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary",scale=13)
|
||||
GPT_dropdown = gr.Dropdown(label=i18n("GPT模型列表"), choices=sorted(GPT_names, key=custom_sort_key), value=gpt_path, interactive=True, scale=14)
|
||||
SoVITS_dropdown = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=sorted(SoVITS_names, key=custom_sort_key), value=sovits_path, interactive=True, scale=14)
|
||||
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary", scale=14)
|
||||
refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown])
|
||||
SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown], [])
|
||||
GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], [])
|
||||
gr.Markdown(html_center(i18n("*请上传并填写参考信息"),'h3'))
|
||||
with gr.Row():
|
||||
inp_ref = gr.Audio(label=i18n("请上传3~10秒内参考音频,超过会报错!"), type="filepath",scale=13)
|
||||
inp_ref = gr.Audio(label=i18n("请上传3~10秒内参考音频,超过会报错!"), type="filepath", scale=13)
|
||||
with gr.Column(scale=13):
|
||||
ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True)
|
||||
gr.Markdown(html_left(i18n("使用无参考文本模式时建议使用微调的GPT,听不清参考音频说的啥(不晓得写啥)可以开。<br>开启后无视填写的参考文本。")))
|
||||
prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="", lines=3, max_lines=3)
|
||||
prompt_language = gr.Dropdown(
|
||||
label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文"),scale=14
|
||||
label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文"), scale=14
|
||||
)
|
||||
gr.Markdown(html_center(i18n("*请填写需要合成的目标文本和语种模式"),'h3'))
|
||||
with gr.Row():
|
||||
@@ -683,13 +714,15 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
# get_phoneme_button = gr.Button(i18n("目标文本转音素"), variant="primary")
|
||||
with gr.Row():
|
||||
inference_button = gr.Button(i18n("合成语音"), variant="primary", size='lg', scale=25)
|
||||
output = gr.Audio(label=i18n("输出的语音"),scale=14)
|
||||
output = gr.Audio(label=i18n("输出的语音"), scale=14)
|
||||
|
||||
inference_button.click(
|
||||
get_tts_wav,
|
||||
[inp_ref, prompt_text, prompt_language, text, text_language, how_to_cut, top_k, top_p, temperature, ref_text_free,speed,if_freeze],
|
||||
[output],
|
||||
)
|
||||
SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown,prompt_language,text_language], [prompt_language,text_language,prompt_text,prompt_language,text,text_language])
|
||||
GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], [])
|
||||
|
||||
# gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
|
||||
# with gr.Row():
|
||||
|
||||
Reference in New Issue
Block a user