Add files via upload

This commit is contained in:
RVC-Boss
2024-01-16 17:38:48 +08:00
committed by GitHub
parent 143d32f621
commit 41ca6028d6
65 changed files with 139856 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
from . import cnhubert, whisper_enc
content_module_map = {
'cnhubert': cnhubert,
'whisper': whisper_enc
}

View File

@@ -0,0 +1,97 @@
import time
import librosa
import torch
import torch.nn.functional as F
import soundfile as sf
import logging
logging.getLogger("numba").setLevel(logging.WARNING)
from transformers import (
Wav2Vec2FeatureExtractor,
HubertModel,
Wav2Vec2Model,
)
import utils
import torch.nn as nn
cnhubert_base_path=None
class CNHubert(nn.Module):
def __init__(self):
super().__init__()
self.model = HubertModel.from_pretrained(cnhubert_base_path)
self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(cnhubert_base_path)
def forward(self, x):
input_values = self.feature_extractor(x, return_tensors="pt", sampling_rate=16000).input_values.to(x.device)
feats = self.model(input_values)["last_hidden_state"]
return feats
# class CNHubertLarge(nn.Module):
# def __init__(self):
# super().__init__()
# self.model = HubertModel.from_pretrained("/data/docker/liujing04/gpt-vits/chinese-hubert-large")
# self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("/data/docker/liujing04/gpt-vits/chinese-hubert-large")
# def forward(self, x):
# input_values = self.feature_extractor(x, return_tensors="pt", sampling_rate=16000).input_values.to(x.device)
# feats = self.model(input_values)["last_hidden_state"]
# return feats
#
# class CVec(nn.Module):
# def __init__(self):
# super().__init__()
# self.model = HubertModel.from_pretrained("/data/docker/liujing04/vc-webui-big/hubert_base")
# self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("/data/docker/liujing04/vc-webui-big/hubert_base")
# def forward(self, x):
# input_values = self.feature_extractor(x, return_tensors="pt", sampling_rate=16000).input_values.to(x.device)
# feats = self.model(input_values)["last_hidden_state"]
# return feats
#
# class cnw2v2base(nn.Module):
# def __init__(self):
# super().__init__()
# self.model = Wav2Vec2Model.from_pretrained("/data/docker/liujing04/gpt-vits/chinese-wav2vec2-base")
# self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("/data/docker/liujing04/gpt-vits/chinese-wav2vec2-base")
# def forward(self, x):
# input_values = self.feature_extractor(x, return_tensors="pt", sampling_rate=16000).input_values.to(x.device)
# feats = self.model(input_values)["last_hidden_state"]
# return feats
def get_model():
model = CNHubert()
model.eval()
return model
# def get_large_model():
# model = CNHubertLarge()
# model.eval()
# return model
#
# def get_model_cvec():
# model = CVec()
# model.eval()
# return model
#
# def get_model_cnw2v2base():
# model = cnw2v2base()
# model.eval()
# return model
def get_content(hmodel, wav_16k_tensor):
with torch.no_grad():
feats = hmodel(wav_16k_tensor)
return feats.transpose(1,2)
if __name__ == '__main__':
model = get_model()
src_path = "/Users/Shared/原音频2.wav"
wav_16k_tensor = utils.load_wav_to_torch_and_resample(src_path, 16000)
model = model
wav_16k_tensor = wav_16k_tensor
feats = get_content(model,wav_16k_tensor)
print(feats.shape)

View File

@@ -0,0 +1,22 @@
import torch
def get_model():
import whisper
model = whisper.load_model("small", device='cpu')
return model.encoder
def get_content(model=None, wav_16k_tensor=None):
from whisper import log_mel_spectrogram, pad_or_trim
dev = next(model.parameters()).device
mel = log_mel_spectrogram(wav_16k_tensor).to(dev)[:, :3000]
# if torch.cuda.is_available():
# mel = mel.to(torch.float16)
feature_len = mel.shape[-1] // 2
assert mel.shape[-1] < 3000, "输入音频过长只允许输入30以内音频"
with torch.no_grad():
feature = model(pad_or_trim(mel, 3000).unsqueeze(0))[:1, :feature_len, :].transpose(1,2)
return feature