修复了t2s模型无prompt输入时的bug GPT_SoVITS/AR/models/t2s_model.py

增加一些新特性，并修复了一些bug GPT_SoVITS/TTS_infer_pack/TTS.py 优化网页布局 GPT_SoVITS/inference_webui.py
2024-03-10 01:20:42 +08:00
parent 2fe3207d71
commit ed2ffe1356
3 changed files with 194 additions and 101 deletions
--- a/GPT_SoVITS/AR/models/t2s_model.py
+++ b/GPT_SoVITS/AR/models/t2s_model.py
@@ -549,7 +549,6 @@ class Text2SemanticDecoder(nn.Module):
        y_list = [None]*y.shape[0]
        batch_idx_map = list(range(y.shape[0]))
        idx_list = [None]*y.shape[0]
-        cache_y_emb = y_emb
        for idx in tqdm(range(1500)):
            if idx == 0:
                xy_dec, k_cache, v_cache = self.t2s_transformer.process_prompt(xy_pos, xy_attn_mask)
@@ -589,8 +588,6 @@ class Text2SemanticDecoder(nn.Module):
            if reserved_idx_of_batch_for_y is not None:
                # index = torch.LongTensor(batch_idx_map).to(y.device)
                y = torch.index_select(y, dim=0, index=reserved_idx_of_batch_for_y)
-                if cache_y_emb is not None:
-                    cache_y_emb = torch.index_select(cache_y_emb, dim=0, index=reserved_idx_of_batch_for_y)
                if k_cache is not None :
                    for i in range(len(k_cache)):
                        k_cache[i] = torch.index_select(k_cache[i], dim=0, index=reserved_idx_of_batch_for_y)
@@ -617,8 +614,8 @@ class Text2SemanticDecoder(nn.Module):

            ####################### update next step ###################################
            y_emb = self.ar_audio_embedding(y[:, -1:])
-            xy_pos = y_emb * self.ar_audio_position.x_scale + self.ar_audio_position.alpha * self.ar_audio_position.pe[:, y_len + idx]
-
+            xy_pos = y_emb * self.ar_audio_position.x_scale + self.ar_audio_position.alpha * self.ar_audio_position.pe[:, y_len + idx].to( dtype= y_emb.dtype,device=y_emb.device)
+            
        if (None in idx_list):
            for i in range(x.shape[0]):
                if idx_list[i] is None: