Introduce Docker and Windows CI Workflow, Pre-commit Formatting, and Language Resource Auto-Download (#2351)
* Docker Auto-Build Workflow * Rename * Update * Fix Bugs * Disable Progress Bar When workflows triggered * Fix Wget * Fix Bugs * Fix Bugs * Update Wget * Update Workflows * Accelerate Docker Image Building * Fix Install.sh * Add Skip-Check For Action Runner * Fix Dockerfile * . * . * . * . * Delete File in Runner * Add Sort * Delete More Files * Delete More * . * . * . * Add Pre-Commit Hook Update Docker * Add Code Spell Check * [pre-commit.ci] trigger * [pre-commit.ci] trigger * [pre-commit.ci] trigger * Fix Bugs * . * Disable Progress Bar and Logs while using GitHub Actions * . * . * Fix Bugs * update conda * fix bugs * Fix Bugs * fix bugs * . * . * Quiet Installation * fix bugs * . * fix bug * . * Fix pre-commit.ci and Docker * fix bugs * . * Update Docker & Pre-Commit * fix bugs * Update Req * Update Req * Update OpenCC * update precommit * . * Update .pre-commit-config.yaml * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update Docs and fix bugs * Fix \ * Fix MacOS * . * test * . * Add Tag Alias * . * fix bugs * fix bugs * make image smaller * update pre-commit config * . * . * fix bugs * use miniconda * Fix Wrong Path * . * debug * debug * revert * Fix Bugs * Update Docs, Add Dict Auto Download in install.sh * update docker_build * Update Docs for Install.sh * update docker docs about architecture * Add Xcode-Commandline-Tool Installation * Update Docs 1. Add Missing VC17 2. Modufied the Order of FFmpeg Installation and Requirements Installation 3. Remove Duplicate FFmpeg * Fix Wrong Cuda Version * Update TESTED ENV * Add PYTHONNOUSERSITE(-s) * Fix Wrapper * Update install.sh For Robustness * Ignore .git * Preload CUDNN For Ctranslate2 * Remove Gradio Warnings * Update Colab * Fix OpenCC Problems * Update Win DLL Strategy * Fix Onnxruntime-gpu NVRTC Error * Fix Path Problems * Add Windows Packages Workflow * WIP * WIP * WIP * WIP * WIP * WIP * . * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * WIP * Fix Path * Fix Path * Enable Logging * Set 7-Zip compression level to maximum (-mx=9) * Use Multithread in ONNX Session * Fix Tag Bugs * Add Time * Add Time * Add Time * Compress More * Copy DLL to Solve VC Runtime DLL Missing Issues * Expose FFmpeg Errors, Copy Only Part of Visual C++ Runtime * Update build_windows_packages.ps1 * Update build_windows_packages.ps1 * Update build_windows_packages.ps1 * Update build_windows_packages.ps1 * WIP * WIP * WIP * Update build_windows_packages.ps1 * Update install.sh * Update build_windows_packages.ps1 * Update docker-publish.yaml * Update install.sh * Update Dockerfile * Update docker_build.sh * Update miniconda_install.sh * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update Colab-WebUI.ipynb * Update Colab-Inference.ipynb * Update docker-compose.yaml * 更新 build_windows_packages.ps1 * Update install.sh --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -10,6 +10,7 @@ from faster_whisper import WhisperModel
|
||||
from tqdm import tqdm
|
||||
|
||||
from tools.asr.config import check_fw_local_models
|
||||
from tools.my_utils import load_cudnn
|
||||
|
||||
# fmt: off
|
||||
language_code_list = [
|
||||
@@ -93,6 +94,8 @@ def execute_asr(input_folder, output_folder, model_size, language, precision):
|
||||
return output_file_path
|
||||
|
||||
|
||||
load_cudnn()
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
import ctypes
|
||||
import os
|
||||
import traceback
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import ffmpeg
|
||||
import numpy as np
|
||||
import gradio as gr
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
|
||||
i18n = I18nAuto(language=os.environ.get("language", "Auto"))
|
||||
|
||||
|
||||
@@ -15,7 +19,7 @@ def load_audio(file, sr):
|
||||
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
|
||||
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
|
||||
file = clean_path(file) # 防止小白拷路径头尾带了空格和"和回车
|
||||
if os.path.exists(file) == False:
|
||||
if os.path.exists(file) is False:
|
||||
raise RuntimeError("You input a wrong audio path that does not exists, please fix it!")
|
||||
out, _ = (
|
||||
ffmpeg.input(file, threads=0)
|
||||
@@ -23,7 +27,11 @@ def load_audio(file, sr):
|
||||
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
||||
)
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
out, _ = (
|
||||
ffmpeg.input(file, threads=0)
|
||||
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
||||
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True)
|
||||
) # Expose the Error
|
||||
raise RuntimeError(i18n("音频加载失败"))
|
||||
|
||||
return np.frombuffer(out, np.float32).flatten()
|
||||
@@ -127,3 +135,97 @@ def check_details(path_list=None, is_train=False, is_dataset_processing=False):
|
||||
...
|
||||
else:
|
||||
gr.Warning(i18n("缺少语义数据集"))
|
||||
|
||||
|
||||
def load_cudnn():
|
||||
import torch
|
||||
|
||||
if not torch.cuda.is_available():
|
||||
print("[INFO] CUDA is not available, skipping cuDNN setup.")
|
||||
return
|
||||
|
||||
if sys.platform == "win32":
|
||||
torch_lib_dir = Path(torch.__file__).parent / "lib"
|
||||
if torch_lib_dir.exists():
|
||||
os.add_dll_directory(str(torch_lib_dir))
|
||||
print(f"[INFO] Added DLL directory: {torch_lib_dir}")
|
||||
matching_files = sorted(torch_lib_dir.glob("cudnn_cnn*.dll"))
|
||||
if not matching_files:
|
||||
print(f"[ERROR] No cudnn_cnn*.dll found in {torch_lib_dir}")
|
||||
return
|
||||
for dll_path in matching_files:
|
||||
dll_name = os.path.basename(dll_path)
|
||||
try:
|
||||
ctypes.CDLL(dll_name)
|
||||
print(f"[INFO] Loaded: {dll_name}")
|
||||
except OSError as e:
|
||||
print(f"[WARNING] Failed to load {dll_name}: {e}")
|
||||
else:
|
||||
print(f"[WARNING] Torch lib directory not found: {torch_lib_dir}")
|
||||
|
||||
elif sys.platform == "linux":
|
||||
site_packages = Path(torch.__file__).resolve().parents[1]
|
||||
cudnn_dir = site_packages / "nvidia" / "cudnn" / "lib"
|
||||
|
||||
if not cudnn_dir.exists():
|
||||
print(f"[ERROR] cudnn dir not found: {cudnn_dir}")
|
||||
return
|
||||
|
||||
matching_files = sorted(cudnn_dir.glob("libcudnn_cnn*.so*"))
|
||||
if not matching_files:
|
||||
print(f"[ERROR] No libcudnn_cnn*.so* found in {cudnn_dir}")
|
||||
return
|
||||
|
||||
for so_path in matching_files:
|
||||
try:
|
||||
ctypes.CDLL(so_path, mode=ctypes.RTLD_GLOBAL) # type: ignore
|
||||
print(f"[INFO] Loaded: {so_path}")
|
||||
except OSError as e:
|
||||
print(f"[WARNING] Failed to load {so_path}: {e}")
|
||||
|
||||
|
||||
def load_nvrtc():
|
||||
import torch
|
||||
|
||||
if not torch.cuda.is_available():
|
||||
print("[INFO] CUDA is not available, skipping nvrtc setup.")
|
||||
return
|
||||
|
||||
if sys.platform == "win32":
|
||||
torch_lib_dir = Path(torch.__file__).parent / "lib"
|
||||
if torch_lib_dir.exists():
|
||||
os.add_dll_directory(str(torch_lib_dir))
|
||||
print(f"[INFO] Added DLL directory: {torch_lib_dir}")
|
||||
matching_files = sorted(torch_lib_dir.glob("nvrtc*.dll"))
|
||||
if not matching_files:
|
||||
print(f"[ERROR] No nvrtc*.dll found in {torch_lib_dir}")
|
||||
return
|
||||
for dll_path in matching_files:
|
||||
dll_name = os.path.basename(dll_path)
|
||||
try:
|
||||
ctypes.CDLL(dll_name)
|
||||
print(f"[INFO] Loaded: {dll_name}")
|
||||
except OSError as e:
|
||||
print(f"[WARNING] Failed to load {dll_name}: {e}")
|
||||
else:
|
||||
print(f"[WARNING] Torch lib directory not found: {torch_lib_dir}")
|
||||
|
||||
elif sys.platform == "linux":
|
||||
site_packages = Path(torch.__file__).resolve().parents[1]
|
||||
nvrtc_dir = site_packages / "nvidia" / "cuda_nvrtc" / "lib"
|
||||
|
||||
if not nvrtc_dir.exists():
|
||||
print(f"[ERROR] nvrtc dir not found: {nvrtc_dir}")
|
||||
return
|
||||
|
||||
matching_files = sorted(nvrtc_dir.glob("libnvrtc*.so*"))
|
||||
if not matching_files:
|
||||
print(f"[ERROR] No libnvrtc*.so* found in {nvrtc_dir}")
|
||||
return
|
||||
|
||||
for so_path in matching_files:
|
||||
try:
|
||||
ctypes.CDLL(so_path, mode=ctypes.RTLD_GLOBAL) # type: ignore
|
||||
print(f"[INFO] Loaded: {so_path}")
|
||||
except OSError as e:
|
||||
print(f"[WARNING] Failed to load {so_path}: {e}")
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import argparse
|
||||
import os
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
|
||||
try:
|
||||
@@ -11,8 +11,8 @@ try:
|
||||
except:
|
||||
...
|
||||
|
||||
import librosa
|
||||
import gradio as gr
|
||||
import librosa
|
||||
import numpy as np
|
||||
import soundfile
|
||||
|
||||
@@ -303,7 +303,7 @@ if __name__ == "__main__":
|
||||
|
||||
set_global(args.load_json, args.load_list, args.json_key_text, args.json_key_path, args.g_batch)
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
with gr.Blocks(analytics_enabled=False) as demo:
|
||||
with gr.Row():
|
||||
btn_change_index = gr.Button("Change Index")
|
||||
btn_submit_change = gr.Button("Submit Text")
|
||||
|
||||
@@ -32,18 +32,10 @@ def make_pair(mix_dir, inst_dir):
|
||||
input_exts = [".wav", ".m4a", ".mp3", ".mp4", ".flac"]
|
||||
|
||||
X_list = sorted(
|
||||
[
|
||||
os.path.join(mix_dir, fname)
|
||||
for fname in os.listdir(mix_dir)
|
||||
if os.path.splitext(fname)[1] in input_exts
|
||||
]
|
||||
[os.path.join(mix_dir, fname) for fname in os.listdir(mix_dir) if os.path.splitext(fname)[1] in input_exts]
|
||||
)
|
||||
y_list = sorted(
|
||||
[
|
||||
os.path.join(inst_dir, fname)
|
||||
for fname in os.listdir(inst_dir)
|
||||
if os.path.splitext(fname)[1] in input_exts
|
||||
]
|
||||
[os.path.join(inst_dir, fname) for fname in os.listdir(inst_dir) if os.path.splitext(fname)[1] in input_exts]
|
||||
)
|
||||
|
||||
filelist = list(zip(X_list, y_list))
|
||||
@@ -65,14 +57,10 @@ def train_val_split(dataset_dir, split_mode, val_rate, val_filelist):
|
||||
train_filelist = filelist[:-val_size]
|
||||
val_filelist = filelist[-val_size:]
|
||||
else:
|
||||
train_filelist = [
|
||||
pair for pair in filelist if list(pair) not in val_filelist
|
||||
]
|
||||
train_filelist = [pair for pair in filelist if list(pair) not in val_filelist]
|
||||
elif split_mode == "subdirs":
|
||||
if len(val_filelist) != 0:
|
||||
raise ValueError(
|
||||
"The `val_filelist` option is not available in `subdirs` mode"
|
||||
)
|
||||
raise ValueError("The `val_filelist` option is not available in `subdirs` mode")
|
||||
|
||||
train_filelist = make_pair(
|
||||
os.path.join(dataset_dir, "training/mixtures"),
|
||||
@@ -91,9 +79,7 @@ def augment(X, y, reduction_rate, reduction_mask, mixup_rate, mixup_alpha):
|
||||
perm = np.random.permutation(len(X))
|
||||
for i, idx in enumerate(tqdm(perm)):
|
||||
if np.random.uniform() < reduction_rate:
|
||||
y[idx] = spec_utils.reduce_vocal_aggressively(
|
||||
X[idx], y[idx], reduction_mask
|
||||
)
|
||||
y[idx] = spec_utils.reduce_vocal_aggressively(X[idx], y[idx], reduction_mask)
|
||||
|
||||
if np.random.uniform() < 0.5:
|
||||
# swap channel
|
||||
@@ -152,9 +138,7 @@ def make_training_set(filelist, cropsize, patches, sr, hop_length, n_fft, offset
|
||||
|
||||
def make_validation_set(filelist, cropsize, sr, hop_length, n_fft, offset):
|
||||
patch_list = []
|
||||
patch_dir = "cs{}_sr{}_hl{}_nf{}_of{}".format(
|
||||
cropsize, sr, hop_length, n_fft, offset
|
||||
)
|
||||
patch_dir = "cs{}_sr{}_hl{}_nf{}_of{}".format(cropsize, sr, hop_length, n_fft, offset)
|
||||
os.makedirs(patch_dir, exist_ok=True)
|
||||
|
||||
for i, (X_path, y_path) in enumerate(tqdm(filelist)):
|
||||
|
||||
@@ -63,9 +63,7 @@ class Encoder(nn.Module):
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(
|
||||
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
||||
):
|
||||
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
||||
super(Decoder, self).__init__()
|
||||
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
||||
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
||||
@@ -91,24 +89,14 @@ class ASPPModule(nn.Module):
|
||||
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ),
|
||||
)
|
||||
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
||||
self.conv3 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ
|
||||
)
|
||||
self.conv4 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ
|
||||
)
|
||||
self.conv5 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.bottleneck = nn.Sequential(
|
||||
Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1)
|
||||
)
|
||||
self.conv3 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
|
||||
self.conv4 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
|
||||
self.conv5 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.bottleneck = nn.Sequential(Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1))
|
||||
|
||||
def forward(self, x):
|
||||
_, _, h, w = x.size()
|
||||
feat1 = F.interpolate(
|
||||
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
||||
)
|
||||
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode="bilinear", align_corners=True)
|
||||
feat2 = self.conv2(x)
|
||||
feat3 = self.conv3(x)
|
||||
feat4 = self.conv4(x)
|
||||
|
||||
@@ -63,9 +63,7 @@ class Encoder(nn.Module):
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(
|
||||
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
||||
):
|
||||
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
||||
super(Decoder, self).__init__()
|
||||
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
||||
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
||||
@@ -91,24 +89,14 @@ class ASPPModule(nn.Module):
|
||||
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ),
|
||||
)
|
||||
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
||||
self.conv3 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ
|
||||
)
|
||||
self.conv4 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ
|
||||
)
|
||||
self.conv5 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.bottleneck = nn.Sequential(
|
||||
Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1)
|
||||
)
|
||||
self.conv3 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
|
||||
self.conv4 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
|
||||
self.conv5 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.bottleneck = nn.Sequential(Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1))
|
||||
|
||||
def forward(self, x):
|
||||
_, _, h, w = x.size()
|
||||
feat1 = F.interpolate(
|
||||
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
||||
)
|
||||
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode="bilinear", align_corners=True)
|
||||
feat2 = self.conv2(x)
|
||||
feat3 = self.conv3(x)
|
||||
feat4 = self.conv4(x)
|
||||
|
||||
@@ -63,9 +63,7 @@ class Encoder(nn.Module):
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(
|
||||
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
||||
):
|
||||
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
||||
super(Decoder, self).__init__()
|
||||
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
||||
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
||||
@@ -91,24 +89,14 @@ class ASPPModule(nn.Module):
|
||||
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ),
|
||||
)
|
||||
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
||||
self.conv3 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ
|
||||
)
|
||||
self.conv4 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ
|
||||
)
|
||||
self.conv5 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.bottleneck = nn.Sequential(
|
||||
Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1)
|
||||
)
|
||||
self.conv3 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
|
||||
self.conv4 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
|
||||
self.conv5 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.bottleneck = nn.Sequential(Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1))
|
||||
|
||||
def forward(self, x):
|
||||
_, _, h, w = x.size()
|
||||
feat1 = F.interpolate(
|
||||
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
||||
)
|
||||
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode="bilinear", align_corners=True)
|
||||
feat2 = self.conv2(x)
|
||||
feat3 = self.conv3(x)
|
||||
feat4 = self.conv4(x)
|
||||
|
||||
@@ -63,9 +63,7 @@ class Encoder(nn.Module):
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(
|
||||
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
||||
):
|
||||
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
||||
super(Decoder, self).__init__()
|
||||
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
||||
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
||||
@@ -91,30 +89,16 @@ class ASPPModule(nn.Module):
|
||||
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ),
|
||||
)
|
||||
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
||||
self.conv3 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ
|
||||
)
|
||||
self.conv4 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ
|
||||
)
|
||||
self.conv5 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.conv6 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.conv7 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.bottleneck = nn.Sequential(
|
||||
Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1)
|
||||
)
|
||||
self.conv3 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
|
||||
self.conv4 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
|
||||
self.conv5 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.conv6 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.conv7 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.bottleneck = nn.Sequential(Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1))
|
||||
|
||||
def forward(self, x):
|
||||
_, _, h, w = x.size()
|
||||
feat1 = F.interpolate(
|
||||
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
||||
)
|
||||
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode="bilinear", align_corners=True)
|
||||
feat2 = self.conv2(x)
|
||||
feat3 = self.conv3(x)
|
||||
feat4 = self.conv4(x)
|
||||
|
||||
@@ -63,9 +63,7 @@ class Encoder(nn.Module):
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(
|
||||
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
||||
):
|
||||
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
||||
super(Decoder, self).__init__()
|
||||
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
||||
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
||||
@@ -91,30 +89,16 @@ class ASPPModule(nn.Module):
|
||||
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ),
|
||||
)
|
||||
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
||||
self.conv3 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ
|
||||
)
|
||||
self.conv4 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ
|
||||
)
|
||||
self.conv5 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.conv6 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.conv7 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.bottleneck = nn.Sequential(
|
||||
Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1)
|
||||
)
|
||||
self.conv3 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
|
||||
self.conv4 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
|
||||
self.conv5 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.conv6 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.conv7 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.bottleneck = nn.Sequential(Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1))
|
||||
|
||||
def forward(self, x):
|
||||
_, _, h, w = x.size()
|
||||
feat1 = F.interpolate(
|
||||
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
||||
)
|
||||
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode="bilinear", align_corners=True)
|
||||
feat2 = self.conv2(x)
|
||||
feat3 = self.conv3(x)
|
||||
feat4 = self.conv4(x)
|
||||
|
||||
@@ -63,9 +63,7 @@ class Encoder(nn.Module):
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(
|
||||
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
||||
):
|
||||
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
||||
super(Decoder, self).__init__()
|
||||
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
||||
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
||||
@@ -91,30 +89,16 @@ class ASPPModule(nn.Module):
|
||||
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ),
|
||||
)
|
||||
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
||||
self.conv3 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ
|
||||
)
|
||||
self.conv4 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ
|
||||
)
|
||||
self.conv5 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.conv6 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.conv7 = SeperableConv2DBNActiv(
|
||||
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.bottleneck = nn.Sequential(
|
||||
Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1)
|
||||
)
|
||||
self.conv3 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
|
||||
self.conv4 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
|
||||
self.conv5 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.conv6 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.conv7 = SeperableConv2DBNActiv(nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.bottleneck = nn.Sequential(Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1))
|
||||
|
||||
def forward(self, x):
|
||||
_, _, h, w = x.size()
|
||||
feat1 = F.interpolate(
|
||||
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
||||
)
|
||||
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode="bilinear", align_corners=True)
|
||||
feat2 = self.conv2(x)
|
||||
feat3 = self.conv3(x)
|
||||
feat4 = self.conv4(x)
|
||||
|
||||
@@ -40,9 +40,7 @@ class Encoder(nn.Module):
|
||||
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(
|
||||
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
||||
):
|
||||
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
||||
super(Decoder, self).__init__()
|
||||
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
||||
# self.conv2 = Conv2DBNActiv(nout, nout, ksize, 1, pad, activ=activ)
|
||||
@@ -72,23 +70,15 @@ class ASPPModule(nn.Module):
|
||||
Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ),
|
||||
)
|
||||
self.conv2 = Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ)
|
||||
self.conv3 = Conv2DBNActiv(
|
||||
nin, nout, 3, 1, dilations[0], dilations[0], activ=activ
|
||||
)
|
||||
self.conv4 = Conv2DBNActiv(
|
||||
nin, nout, 3, 1, dilations[1], dilations[1], activ=activ
|
||||
)
|
||||
self.conv5 = Conv2DBNActiv(
|
||||
nin, nout, 3, 1, dilations[2], dilations[2], activ=activ
|
||||
)
|
||||
self.conv3 = Conv2DBNActiv(nin, nout, 3, 1, dilations[0], dilations[0], activ=activ)
|
||||
self.conv4 = Conv2DBNActiv(nin, nout, 3, 1, dilations[1], dilations[1], activ=activ)
|
||||
self.conv5 = Conv2DBNActiv(nin, nout, 3, 1, dilations[2], dilations[2], activ=activ)
|
||||
self.bottleneck = Conv2DBNActiv(nout * 5, nout, 1, 1, 0, activ=activ)
|
||||
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
||||
|
||||
def forward(self, x):
|
||||
_, _, h, w = x.size()
|
||||
feat1 = F.interpolate(
|
||||
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
||||
)
|
||||
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode="bilinear", align_corners=True)
|
||||
feat2 = self.conv2(x)
|
||||
feat3 = self.conv3(x)
|
||||
feat4 = self.conv4(x)
|
||||
@@ -106,12 +96,8 @@ class LSTMModule(nn.Module):
|
||||
def __init__(self, nin_conv, nin_lstm, nout_lstm):
|
||||
super(LSTMModule, self).__init__()
|
||||
self.conv = Conv2DBNActiv(nin_conv, 1, 1, 1, 0)
|
||||
self.lstm = nn.LSTM(
|
||||
input_size=nin_lstm, hidden_size=nout_lstm // 2, bidirectional=True
|
||||
)
|
||||
self.dense = nn.Sequential(
|
||||
nn.Linear(nout_lstm, nin_lstm), nn.BatchNorm1d(nin_lstm), nn.ReLU()
|
||||
)
|
||||
self.lstm = nn.LSTM(input_size=nin_lstm, hidden_size=nout_lstm // 2, bidirectional=True)
|
||||
self.dense = nn.Sequential(nn.Linear(nout_lstm, nin_lstm), nn.BatchNorm1d(nin_lstm), nn.ReLU())
|
||||
|
||||
def forward(self, x):
|
||||
N, _, nbins, nframes = x.size()
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
default_param = {}
|
||||
@@ -48,9 +47,7 @@ class ModelParameters(object):
|
||||
import zipfile
|
||||
|
||||
with zipfile.ZipFile(config_path, "r") as zip:
|
||||
self.param = json.loads(
|
||||
zip.read("param.json"), object_pairs_hook=int_keys
|
||||
)
|
||||
self.param = json.loads(zip.read("param.json"), object_pairs_hook=int_keys)
|
||||
elif ".json" == pathlib.Path(config_path).suffix:
|
||||
with open(config_path, "r") as f:
|
||||
self.param = json.loads(f.read(), object_pairs_hook=int_keys)
|
||||
@@ -65,5 +62,5 @@ class ModelParameters(object):
|
||||
"stereo_n",
|
||||
"reverse",
|
||||
]:
|
||||
if not k in self.param:
|
||||
if k not in self.param:
|
||||
self.param[k] = False
|
||||
|
||||
@@ -3,8 +3,6 @@ import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
|
||||
from . import spec_utils
|
||||
|
||||
|
||||
class BaseASPPNet(nn.Module):
|
||||
def __init__(self, nin, ch, dilations=(4, 8, 16)):
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
|
||||
@@ -6,9 +6,7 @@ from . import layers_new
|
||||
|
||||
|
||||
class BaseNet(nn.Module):
|
||||
def __init__(
|
||||
self, nin, nout, nin_lstm, nout_lstm, dilations=((4, 2), (8, 4), (12, 6))
|
||||
):
|
||||
def __init__(self, nin, nout, nin_lstm, nout_lstm, dilations=((4, 2), (8, 4), (12, 6))):
|
||||
super(BaseNet, self).__init__()
|
||||
self.enc1 = layers_new.Conv2DBNActiv(nin, nout, 3, 1, 1)
|
||||
self.enc2 = layers_new.Encoder(nout, nout * 2, 3, 2, 1)
|
||||
@@ -56,21 +54,15 @@ class CascadedNet(nn.Module):
|
||||
layers_new.Conv2DBNActiv(nout // 2, nout // 4, 1, 1, 0),
|
||||
)
|
||||
|
||||
self.stg1_high_band_net = BaseNet(
|
||||
2, nout // 4, self.nin_lstm // 2, nout_lstm // 2
|
||||
)
|
||||
self.stg1_high_band_net = BaseNet(2, nout // 4, self.nin_lstm // 2, nout_lstm // 2)
|
||||
|
||||
self.stg2_low_band_net = nn.Sequential(
|
||||
BaseNet(nout // 4 + 2, nout, self.nin_lstm // 2, nout_lstm),
|
||||
layers_new.Conv2DBNActiv(nout, nout // 2, 1, 1, 0),
|
||||
)
|
||||
self.stg2_high_band_net = BaseNet(
|
||||
nout // 4 + 2, nout // 2, self.nin_lstm // 2, nout_lstm // 2
|
||||
)
|
||||
self.stg2_high_band_net = BaseNet(nout // 4 + 2, nout // 2, self.nin_lstm // 2, nout_lstm // 2)
|
||||
|
||||
self.stg3_full_band_net = BaseNet(
|
||||
3 * nout // 4 + 2, nout, self.nin_lstm, nout_lstm
|
||||
)
|
||||
self.stg3_full_band_net = BaseNet(3 * nout // 4 + 2, nout, self.nin_lstm, nout_lstm)
|
||||
|
||||
self.out = nn.Conv2d(nout, 2, 1, bias=False)
|
||||
self.aux_out = nn.Conv2d(3 * nout // 4, 2, 1, bias=False)
|
||||
|
||||
@@ -27,9 +27,7 @@ def crop_center(h1, h2):
|
||||
return h1
|
||||
|
||||
|
||||
def wave_to_spectrogram(
|
||||
wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False
|
||||
):
|
||||
def wave_to_spectrogram(wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False):
|
||||
if reverse:
|
||||
wave_left = np.flip(np.asfortranarray(wave[0]))
|
||||
wave_right = np.flip(np.asfortranarray(wave[1]))
|
||||
@@ -43,7 +41,7 @@ def wave_to_spectrogram(
|
||||
wave_left = np.asfortranarray(wave[0])
|
||||
wave_right = np.asfortranarray(wave[1])
|
||||
|
||||
spec_left = librosa.stft(wave_left, n_fft=n_fft, hop_length=hop_length)
|
||||
spec_left = librosa.stft(wave_left, n_fft=n_fft, hop_length=hop_length)
|
||||
spec_right = librosa.stft(wave_right, n_fft=n_fft, hop_length=hop_length)
|
||||
|
||||
spec = np.asfortranarray([spec_left, spec_right])
|
||||
@@ -51,9 +49,7 @@ def wave_to_spectrogram(
|
||||
return spec
|
||||
|
||||
|
||||
def wave_to_spectrogram_mt(
|
||||
wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False
|
||||
):
|
||||
def wave_to_spectrogram_mt(wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False):
|
||||
import threading
|
||||
|
||||
if reverse:
|
||||
@@ -103,21 +99,13 @@ def combine_spectrograms(specs, mp):
|
||||
raise ValueError("Too much bins")
|
||||
|
||||
# lowpass fiter
|
||||
if (
|
||||
mp.param["pre_filter_start"] > 0
|
||||
): # and mp.param['band'][bands_n]['res_type'] in ['scipy', 'polyphase']:
|
||||
if mp.param["pre_filter_start"] > 0: # and mp.param['band'][bands_n]['res_type'] in ['scipy', 'polyphase']:
|
||||
if bands_n == 1:
|
||||
spec_c = fft_lp_filter(
|
||||
spec_c, mp.param["pre_filter_start"], mp.param["pre_filter_stop"]
|
||||
)
|
||||
spec_c = fft_lp_filter(spec_c, mp.param["pre_filter_start"], mp.param["pre_filter_stop"])
|
||||
else:
|
||||
gp = 1
|
||||
for b in range(
|
||||
mp.param["pre_filter_start"] + 1, mp.param["pre_filter_stop"]
|
||||
):
|
||||
g = math.pow(
|
||||
10, -(b - mp.param["pre_filter_start"]) * (3.5 - gp) / 20.0
|
||||
)
|
||||
for b in range(mp.param["pre_filter_start"] + 1, mp.param["pre_filter_stop"]):
|
||||
g = math.pow(10, -(b - mp.param["pre_filter_start"]) * (3.5 - gp) / 20.0)
|
||||
gp = g
|
||||
spec_c[:, b, :] *= g
|
||||
|
||||
@@ -189,9 +177,7 @@ def mask_silence(mag, ref, thres=0.2, min_range=64, fade_size=32):
|
||||
else:
|
||||
e += fade_size
|
||||
|
||||
mag[:, :, s + fade_size : e - fade_size] += ref[
|
||||
:, :, s + fade_size : e - fade_size
|
||||
]
|
||||
mag[:, :, s + fade_size : e - fade_size] += ref[:, :, s + fade_size : e - fade_size]
|
||||
old_e = e
|
||||
|
||||
return mag
|
||||
@@ -207,9 +193,7 @@ def cache_or_load(mix_path, inst_path, mp):
|
||||
mix_basename = os.path.splitext(os.path.basename(mix_path))[0]
|
||||
inst_basename = os.path.splitext(os.path.basename(inst_path))[0]
|
||||
|
||||
cache_dir = "mph{}".format(
|
||||
hashlib.sha1(json.dumps(mp.param, sort_keys=True).encode("utf-8")).hexdigest()
|
||||
)
|
||||
cache_dir = "mph{}".format(hashlib.sha1(json.dumps(mp.param, sort_keys=True).encode("utf-8")).hexdigest())
|
||||
mix_cache_dir = os.path.join("cache", cache_dir)
|
||||
inst_cache_dir = os.path.join("cache", cache_dir)
|
||||
|
||||
@@ -230,31 +214,27 @@ def cache_or_load(mix_path, inst_path, mp):
|
||||
|
||||
if d == len(mp.param["band"]): # high-end band
|
||||
X_wave[d], _ = librosa.load(
|
||||
mix_path,
|
||||
sr = bp["sr"],
|
||||
mono = False,
|
||||
dtype = np.float32,
|
||||
res_type = bp["res_type"]
|
||||
mix_path, sr=bp["sr"], mono=False, dtype=np.float32, res_type=bp["res_type"]
|
||||
)
|
||||
y_wave[d], _ = librosa.load(
|
||||
inst_path,
|
||||
sr = bp["sr"],
|
||||
mono = False,
|
||||
dtype = np.float32,
|
||||
res_type = bp["res_type"],
|
||||
sr=bp["sr"],
|
||||
mono=False,
|
||||
dtype=np.float32,
|
||||
res_type=bp["res_type"],
|
||||
)
|
||||
else: # lower bands
|
||||
X_wave[d] = librosa.resample(
|
||||
X_wave[d + 1],
|
||||
orig_sr = mp.param["band"][d + 1]["sr"],
|
||||
target_sr = bp["sr"],
|
||||
res_type = bp["res_type"],
|
||||
orig_sr=mp.param["band"][d + 1]["sr"],
|
||||
target_sr=bp["sr"],
|
||||
res_type=bp["res_type"],
|
||||
)
|
||||
y_wave[d] = librosa.resample(
|
||||
y_wave[d + 1],
|
||||
orig_sr = mp.param["band"][d + 1]["sr"],
|
||||
target_sr = bp["sr"],
|
||||
res_type = bp["res_type"],
|
||||
orig_sr=mp.param["band"][d + 1]["sr"],
|
||||
target_sr=bp["sr"],
|
||||
res_type=bp["res_type"],
|
||||
)
|
||||
|
||||
X_wave[d], y_wave[d] = align_wave_head_and_tail(X_wave[d], y_wave[d])
|
||||
@@ -302,9 +282,7 @@ def spectrogram_to_wave(spec, hop_length, mid_side, mid_side_b2, reverse):
|
||||
if reverse:
|
||||
return np.asfortranarray([np.flip(wave_left), np.flip(wave_right)])
|
||||
elif mid_side:
|
||||
return np.asfortranarray(
|
||||
[np.add(wave_left, wave_right / 2), np.subtract(wave_left, wave_right / 2)]
|
||||
)
|
||||
return np.asfortranarray([np.add(wave_left, wave_right / 2), np.subtract(wave_left, wave_right / 2)])
|
||||
elif mid_side_b2:
|
||||
return np.asfortranarray(
|
||||
[
|
||||
@@ -326,9 +304,7 @@ def spectrogram_to_wave_mt(spec, hop_length, mid_side, reverse, mid_side_b2):
|
||||
global wave_left
|
||||
wave_left = librosa.istft(**kwargs)
|
||||
|
||||
thread = threading.Thread(
|
||||
target=run_thread, kwargs={"stft_matrix": spec_left, "hop_length": hop_length}
|
||||
)
|
||||
thread = threading.Thread(target=run_thread, kwargs={"stft_matrix": spec_left, "hop_length": hop_length})
|
||||
thread.start()
|
||||
wave_right = librosa.istft(spec_right, hop_length=hop_length)
|
||||
thread.join()
|
||||
@@ -336,9 +312,7 @@ def spectrogram_to_wave_mt(spec, hop_length, mid_side, reverse, mid_side_b2):
|
||||
if reverse:
|
||||
return np.asfortranarray([np.flip(wave_left), np.flip(wave_right)])
|
||||
elif mid_side:
|
||||
return np.asfortranarray(
|
||||
[np.add(wave_left, wave_right / 2), np.subtract(wave_left, wave_right / 2)]
|
||||
)
|
||||
return np.asfortranarray([np.add(wave_left, wave_right / 2), np.subtract(wave_left, wave_right / 2)])
|
||||
elif mid_side_b2:
|
||||
return np.asfortranarray(
|
||||
[
|
||||
@@ -357,21 +331,15 @@ def cmb_spectrogram_to_wave(spec_m, mp, extra_bins_h=None, extra_bins=None):
|
||||
|
||||
for d in range(1, bands_n + 1):
|
||||
bp = mp.param["band"][d]
|
||||
spec_s = np.ndarray(
|
||||
shape=(2, bp["n_fft"] // 2 + 1, spec_m.shape[2]), dtype=complex
|
||||
)
|
||||
spec_s = np.ndarray(shape=(2, bp["n_fft"] // 2 + 1, spec_m.shape[2]), dtype=complex)
|
||||
h = bp["crop_stop"] - bp["crop_start"]
|
||||
spec_s[:, bp["crop_start"] : bp["crop_stop"], :] = spec_m[
|
||||
:, offset : offset + h, :
|
||||
]
|
||||
spec_s[:, bp["crop_start"] : bp["crop_stop"], :] = spec_m[:, offset : offset + h, :]
|
||||
|
||||
offset += h
|
||||
if d == bands_n: # higher
|
||||
if extra_bins_h: # if --high_end_process bypass
|
||||
max_bin = bp["n_fft"] // 2
|
||||
spec_s[:, max_bin - extra_bins_h : max_bin, :] = extra_bins[
|
||||
:, :extra_bins_h, :
|
||||
]
|
||||
spec_s[:, max_bin - extra_bins_h : max_bin, :] = extra_bins[:, :extra_bins_h, :]
|
||||
if bp["hpf_start"] > 0:
|
||||
spec_s = fft_hp_filter(spec_s, bp["hpf_start"], bp["hpf_stop"] - 1)
|
||||
if bands_n == 1:
|
||||
@@ -405,9 +373,9 @@ def cmb_spectrogram_to_wave(spec_m, mp, extra_bins_h=None, extra_bins=None):
|
||||
mp.param["mid_side_b2"],
|
||||
mp.param["reverse"],
|
||||
),
|
||||
orig_sr = bp["sr"],
|
||||
target_sr = sr,
|
||||
res_type = "sinc_fastest",
|
||||
orig_sr=bp["sr"],
|
||||
target_sr=sr,
|
||||
res_type="sinc_fastest",
|
||||
)
|
||||
else: # mid
|
||||
spec_s = fft_hp_filter(spec_s, bp["hpf_start"], bp["hpf_stop"] - 1)
|
||||
@@ -456,10 +424,7 @@ def mirroring(a, spec_m, input_high_end, mp):
|
||||
np.abs(
|
||||
spec_m[
|
||||
:,
|
||||
mp.param["pre_filter_start"]
|
||||
- 10
|
||||
- input_high_end.shape[1] : mp.param["pre_filter_start"]
|
||||
- 10,
|
||||
mp.param["pre_filter_start"] - 10 - input_high_end.shape[1] : mp.param["pre_filter_start"] - 10,
|
||||
:,
|
||||
]
|
||||
),
|
||||
@@ -467,19 +432,14 @@ def mirroring(a, spec_m, input_high_end, mp):
|
||||
)
|
||||
mirror = mirror * np.exp(1.0j * np.angle(input_high_end))
|
||||
|
||||
return np.where(
|
||||
np.abs(input_high_end) <= np.abs(mirror), input_high_end, mirror
|
||||
)
|
||||
return np.where(np.abs(input_high_end) <= np.abs(mirror), input_high_end, mirror)
|
||||
|
||||
if "mirroring2" == a:
|
||||
mirror = np.flip(
|
||||
np.abs(
|
||||
spec_m[
|
||||
:,
|
||||
mp.param["pre_filter_start"]
|
||||
- 10
|
||||
- input_high_end.shape[1] : mp.param["pre_filter_start"]
|
||||
- 10,
|
||||
mp.param["pre_filter_start"] - 10 - input_high_end.shape[1] : mp.param["pre_filter_start"] - 10,
|
||||
:,
|
||||
]
|
||||
),
|
||||
@@ -528,7 +488,6 @@ def istft(spec, hl):
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
import sys
|
||||
import time
|
||||
|
||||
import cv2
|
||||
@@ -573,10 +532,10 @@ if __name__ == "__main__":
|
||||
if d == len(mp.param["band"]): # high-end band
|
||||
wave[d], _ = librosa.load(
|
||||
args.input[i],
|
||||
sr = bp["sr"],
|
||||
mono = False,
|
||||
dtype = np.float32,
|
||||
res_type = bp["res_type"],
|
||||
sr=bp["sr"],
|
||||
mono=False,
|
||||
dtype=np.float32,
|
||||
res_type=bp["res_type"],
|
||||
)
|
||||
|
||||
if len(wave[d].shape) == 1: # mono to stereo
|
||||
@@ -584,9 +543,9 @@ if __name__ == "__main__":
|
||||
else: # lower bands
|
||||
wave[d] = librosa.resample(
|
||||
wave[d + 1],
|
||||
orig_sr = mp.param["band"][d + 1]["sr"],
|
||||
target_sr = bp["sr"],
|
||||
res_type = bp["res_type"],
|
||||
orig_sr=mp.param["band"][d + 1]["sr"],
|
||||
target_sr=bp["sr"],
|
||||
res_type=bp["res_type"],
|
||||
)
|
||||
|
||||
spec[d] = wave_to_spectrogram(
|
||||
|
||||
@@ -27,9 +27,7 @@ def inference(X_spec, device, model, aggressiveness, data):
|
||||
data : dic configs
|
||||
"""
|
||||
|
||||
def _execute(
|
||||
X_mag_pad, roi_size, n_window, device, model, aggressiveness, is_half=True
|
||||
):
|
||||
def _execute(X_mag_pad, roi_size, n_window, device, model, aggressiveness, is_half=True):
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
preds = []
|
||||
@@ -39,9 +37,7 @@ def inference(X_spec, device, model, aggressiveness, data):
|
||||
total_iterations = sum(iterations)
|
||||
for i in tqdm(range(n_window)):
|
||||
start = i * roi_size
|
||||
X_mag_window = X_mag_pad[
|
||||
None, :, :, start : start + data["window_size"]
|
||||
]
|
||||
X_mag_window = X_mag_pad[None, :, :, start : start + data["window_size"]]
|
||||
X_mag_window = torch.from_numpy(X_mag_window)
|
||||
if is_half:
|
||||
X_mag_window = X_mag_window.half()
|
||||
@@ -76,9 +72,7 @@ def inference(X_spec, device, model, aggressiveness, data):
|
||||
is_half = True
|
||||
else:
|
||||
is_half = False
|
||||
pred = _execute(
|
||||
X_mag_pad, roi_size, n_window, device, model, aggressiveness, is_half
|
||||
)
|
||||
pred = _execute(X_mag_pad, roi_size, n_window, device, model, aggressiveness, is_half)
|
||||
pred = pred[:, :, :n_frame]
|
||||
|
||||
if data["tta"]:
|
||||
@@ -88,9 +82,7 @@ def inference(X_spec, device, model, aggressiveness, data):
|
||||
|
||||
X_mag_pad = np.pad(X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode="constant")
|
||||
|
||||
pred_tta = _execute(
|
||||
X_mag_pad, roi_size, n_window, device, model, aggressiveness, is_half
|
||||
)
|
||||
pred_tta = _execute(X_mag_pad, roi_size, n_window, device, model, aggressiveness, is_half)
|
||||
pred_tta = pred_tta[:, :, roi_size // 2 :]
|
||||
pred_tta = pred_tta[:, :, :n_frame]
|
||||
|
||||
|
||||
@@ -1,26 +1,22 @@
|
||||
import logging
|
||||
import os
|
||||
import traceback
|
||||
|
||||
import gradio as gr
|
||||
import logging
|
||||
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
from tools.my_utils import clean_path
|
||||
|
||||
i18n = I18nAuto()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
import sys
|
||||
|
||||
import ffmpeg
|
||||
import torch
|
||||
import sys
|
||||
from bsroformer import Roformer_Loader
|
||||
from mdxnet import MDXNetDereverb
|
||||
from vr import AudioPre, AudioPreDeEcho
|
||||
from bsroformer import Roformer_Loader
|
||||
|
||||
try:
|
||||
import gradio.analytics as analytics
|
||||
|
||||
analytics.version_check = lambda: None
|
||||
except:
|
||||
...
|
||||
|
||||
weight_uvr5_root = "tools/uvr5/uvr5_weights"
|
||||
uvr5_names = []
|
||||
@@ -129,7 +125,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
|
||||
yield "\n".join(infos)
|
||||
|
||||
|
||||
with gr.Blocks(title="UVR5 WebUI") as app:
|
||||
with gr.Blocks(title="UVR5 WebUI", analytics_enabled=False) as app:
|
||||
gr.Markdown(
|
||||
value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.")
|
||||
+ "<br>"
|
||||
|
||||
Reference in New Issue
Block a user