Merge branch 'main' into inference_optin

This commit is contained in:
Watchtower-Liu
2024-02-16 17:03:11 +08:00
22 changed files with 260 additions and 46 deletions

View File

@@ -245,7 +245,14 @@ def splite_en_inf(sentence, language):
def clean_text_inf(text, language):
phones, word2ph, norm_text = clean_text(text, language.replace("all_",""))
formattext = ""
language = language.replace("all_","")
for tmp in LangSegment.getTexts(text):
if tmp["lang"] == language:
formattext += tmp["text"] + " "
while " " in formattext:
formattext = formattext.replace(" ", " ")
phones, word2ph, norm_text = clean_text(formattext, language)
phones = cleaned_text_to_sequence(phones)
return phones, word2ph, norm_text
@@ -305,9 +312,8 @@ def nonen_get_bert_inf(text, language):
print(langlist)
bert_list = []
for i in range(len(textlist)):
text = textlist[i]
lang = langlist[i]
phones, word2ph, norm_text = clean_text_inf(text, lang)
phones, word2ph, norm_text = clean_text_inf(textlist[i], lang)
bert = get_bert_inf(phones, word2ph, norm_text, lang)
bert_list.append(bert)
bert = torch.cat(bert_list, dim=1)
@@ -359,7 +365,7 @@ def merge_short_text_in_array(texts, threshold):
result[len(result) - 1] += text
return result
def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切")):
def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, top_p=0.6, temperature=0.6):
t0 = ttime()
prompt_language = dict_language[prompt_language]
text_language = dict_language[text_language]
@@ -439,7 +445,9 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
None,
bert,
# prompt_phone_len=ph_offset,
top_k=config["inference"]["top_k"],
top_k=top_k,
top_p=top_p,
temperature=temperature,
early_stop_num=hz * max_sec,
)
t3 = ttime()
@@ -616,6 +624,10 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
value=i18n("凑四句一切"),
interactive=True,
)
with gr.Row():
top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=5,interactive=True)
top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True)
temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True)
inference_button = gr.Button(i18n("合成语音"), variant="primary")
output = gr.Audio(label=i18n("输出的语音"))