Merge branch 'main' into main

2024-01-29 23:25:48 +08:00
parent 21c0ab1233 5afe04cf3a
commit d98dd2dd26
51 changed files with 5441 additions and 1101 deletions
@@ -1,4 +1,4 @@
-import platform
+import platform,os,traceback
 import ffmpeg
 import numpy as np

@@ -9,12 +9,17 @@ def load_audio(file, sr):
        # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
        # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
        file = clean_path(file)  # 防止小白拷路径头尾带了空格和"和回车
+        if os.path.exists(file) == False:
+            raise RuntimeError(
+                "You input a wrong audio path that does not exists, please fix it!"
+            )
        out, _ = (
            ffmpeg.input(file, threads=0)
            .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
            .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
        )
    except Exception as e:
+        traceback.print_exc()
        raise RuntimeError(f"Failed to load audio: {e}")

    return np.frombuffer(out, np.float32).flatten()
@@ -110,6 +110,7 @@ def b_submit_change(*text_list):

 def b_delete_audio(*checkbox_list):
    global g_data_json, g_index, g_max_json_index
+    b_save_file()
    change = False
    for i, checkbox in reversed(list(enumerate(checkbox_list))):
        if g_index + i < len(g_data_json):
@@ -121,8 +122,8 @@ def b_delete_audio(*checkbox_list):
    if g_index > g_max_json_index:
        g_index = g_max_json_index
        g_index = g_index if g_index >= 0 else 0
-    # if change:
-    #     b_save_file()
+    if change:
+        b_save_file()
    # return gr.Slider(value=g_index, maximum=(g_max_json_index if g_max_json_index>=0 else 0)), *b_change_index(g_index, g_batch)
    return {"value":g_index,"__type__":"update","maximum":(g_max_json_index if g_max_json_index>=0 else 0)},*b_change_index(g_index, g_batch)

@@ -172,6 +173,7 @@ def b_audio_split(audio_breakpoint, *checkbox_list):

 def b_merge_audio(interval_r, *checkbox_list):
    global g_data_json , g_max_json_index
+    b_save_file()
    checked_index = []
    audios_path = []
    audios_text = []
@@ -110,6 +110,9 @@ class AudioPre:
        y_spec_m = pred * X_phase
        v_spec_m = X_spec_m - y_spec_m

+        if is_hp3 == True:
+            ins_root,vocal_root = vocal_root,ins_root
+
        if ins_root is not None:
            if self.data["high_end_process"].startswith("mirroring"):
                input_high_end_ = spec_utils.mirroring(
@@ -5,7 +5,8 @@ from tools.i18n.i18n import I18nAuto
 i18n = I18nAuto()

 logger = logging.getLogger(__name__)
-import ffmpeg
+import librosa
+import soundfile as sf
 import torch
 import sys
 from mdxnet import MDXNetDereverb
@@ -51,52 +52,55 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
            paths = [path.name for path in paths]
        for path in paths:
            inp_path = os.path.join(inp_root, path)
-            need_reformat = 1
-            done = 0
+            if(os.path.isfile(inp_path)==False):continue
            try:
-                info = ffmpeg.probe(inp_path, cmd="ffprobe")
-                if (
-                    info["streams"][0]["channels"] == 2
-                    and info["streams"][0]["sample_rate"] == "44100"
-                ):
-                    need_reformat = 0
-                    pre_fun._path_audio_(
-                        inp_path, save_root_ins, save_root_vocal, format0, is_hp3=is_hp3
+                done = 0
+                try:
+                    y, sr = librosa.load(inp_path, sr=None)
+                    info = sf.info(inp_path)
+                    channels = info.channels
+                    if channels == 2 and sr == 44100:
+                        need_reformat = 0
+                        pre_fun._path_audio_(
+                            inp_path, save_root_ins, save_root_vocal, format0, is_hp3=is_hp3
+                        )
+                        done = 1
+                    else:
+                        need_reformat = 1
+                except:
+                    need_reformat = 1
+                    traceback.print_exc()
+                if need_reformat == 1:
+                    tmp_path = "%s/%s.reformatted.wav" % (
+                        os.path.join(os.environ["TEMP"]),
+                        os.path.basename(inp_path),
                    )
-                    done = 1
-            except:
-                need_reformat = 1
-                traceback.print_exc()
-            if need_reformat == 1:
-                tmp_path = "%s/%s.reformatted.wav" % (
-                    os.path.join(os.environ["TEMP"]),
-                    os.path.basename(inp_path),
-                )
-                os.system(
-                    "ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y"
-                    % (inp_path, tmp_path)
-                )
-                inp_path = tmp_path
-            try:
-                if done == 0:
-                    pre_fun._path_audio_(
-                        inp_path, save_root_ins, save_root_vocal, format0
-                    )
-                infos.append("%s->Success" % (os.path.basename(inp_path)))
-                yield "\n".join(infos)
-            except:
+                    y_resampled = librosa.resample(y, sr, 44100)
+                    sf.write(tmp_path, y_resampled, 44100, "PCM_16")
+                    inp_path = tmp_path
                try:
                    if done == 0:
                        pre_fun._path_audio_(
-                            inp_path, save_root_ins, save_root_vocal, format0
+                            inp_path, save_root_ins, save_root_vocal, format0, is_hp3=is_hp3
                        )
                    infos.append("%s->Success" % (os.path.basename(inp_path)))
                    yield "\n".join(infos)
                except:
-                    infos.append(
-                        "%s->%s" % (os.path.basename(inp_path), traceback.format_exc())
-                    )
-                    yield "\n".join(infos)
+                    try:
+                        if done == 0:
+                            pre_fun._path_audio_(
+                                inp_path, save_root_ins, save_root_vocal, format0, is_hp3=is_hp3
+                            )
+                        infos.append("%s->Success" % (os.path.basename(inp_path)))
+                        yield "\n".join(infos)
+                    except:
+                        infos.append(
+                            "%s->%s" % (os.path.basename(inp_path), traceback.format_exc())
+                        )
+                        yield "\n".join(infos)
+            except:
+                infos.append("Oh my god. %s->%s"%(os.path.basename(inp_path), traceback.format_exc()))
+                yield "\n".join(infos)
    except:
        infos.append(traceback.format_exc())
        yield "\n".join(infos)
@@ -116,10 +120,10 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
    yield "\n".join(infos)


-with gr.Blocks(title="RVC WebUI") as app:
+with gr.Blocks(title="UVR5 WebUI") as app:
    gr.Markdown(
        value=
-            "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>."
+            i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.")
    )
    with gr.Tabs():
        with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):
@@ -144,7 +148,7 @@ with gr.Blocks(title="RVC WebUI") as app:
                            minimum=0,
                            maximum=20,
                            step=1,
-                            label="人声提取激进程度",
+                            label=i18n("人声提取激进程度"),
                            value=10,
                            interactive=True,
                            visible=False,  # 先不开放调整
@@ -183,4 +187,4 @@ app.queue(concurrency_count=511, max_size=1022).launch(
    share=is_share,
    server_port=webui_port_uvr5,
    quiet=True,
-)
+)