Refactor: Format Code with Ruff and Update Deprecated G2PW Link (#2255)

* ruff check --fix * ruff format --line-length 120 --target-version py39 * Change the link for G2PW Model * update pytorch version and colab
2025-04-07 09:42:47 +01:00
parent 9da7e17efe
commit 53cac93589
132 changed files with 8185 additions and 6648 deletions
--- a/GPT_SoVITS/BigVGAN/tests/test_activation.py
+++ b/GPT_SoVITS/BigVGAN/tests/test_activation.py
@@ -3,6 +3,7 @@

 import os
 import sys
+
 # to import modules from parent_dir
 parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 sys.path.append(parent_dir)
@@ -24,14 +25,10 @@ def test_anti_alias_activation():
    data = torch.rand((10, 10, 200), device="cuda")

    # Check activations.Snake cuda vs. torch
-    fused_anti_alias_activation = activation1d.Activation1d(
-        activation=Snake(10), fused=True
-    ).cuda()
+    fused_anti_alias_activation = activation1d.Activation1d(activation=Snake(10), fused=True).cuda()
    fused_activation_output = fused_anti_alias_activation(data)

-    torch_anti_alias_activation = activation1d.Activation1d(
-        activation=Snake(10), fused=False
-    ).cuda()
+    torch_anti_alias_activation = activation1d.Activation1d(activation=Snake(10), fused=False).cuda()
    torch_activation_output = torch_anti_alias_activation(data)

    test_result = (fused_activation_output - torch_activation_output).abs()
--- a/GPT_SoVITS/BigVGAN/tests/test_activation_snake_beta.py
+++ b/GPT_SoVITS/BigVGAN/tests/test_activation_snake_beta.py
@@ -3,6 +3,7 @@

 import os
 import sys
+
 # to import modules from parent_dir
 parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 sys.path.append(parent_dir)
@@ -24,14 +25,10 @@ def test_anti_alias_activation():
    data = torch.rand((10, 10, 200), device="cuda")

    # Check activations, Snake CUDA vs. Torch
-    fused_anti_alias_activation = activation1d.Activation1d(
-        activation=SnakeBeta(10), fused=True
-    ).cuda()
+    fused_anti_alias_activation = activation1d.Activation1d(activation=SnakeBeta(10), fused=True).cuda()
    fused_activation_output = fused_anti_alias_activation(data)

-    torch_anti_alias_activation = activation1d.Activation1d(
-        activation=SnakeBeta(10), fused=False
-    ).cuda()
+    torch_anti_alias_activation = activation1d.Activation1d(activation=SnakeBeta(10), fused=False).cuda()
    torch_activation_output = torch_anti_alias_activation(data)

    test_result = (fused_activation_output - torch_activation_output).abs()
@@ -57,7 +54,6 @@ def test_anti_alias_activation():
        )


-
 if __name__ == "__main__":
    from alias_free_activation.cuda import load

--- a/GPT_SoVITS/BigVGAN/tests/test_cuda_vs_torch_model.py
+++ b/GPT_SoVITS/BigVGAN/tests/test_cuda_vs_torch_model.py
@@ -42,9 +42,7 @@ def generate_soundwave(duration=5.0, sr=24000):


 def get_mel(x, h):
-    return mel_spectrogram(
-        x, h.n_fft, h.num_mels, h.sampling_rate, h.hop_size, h.win_size, h.fmin, h.fmax
-    )
+    return mel_spectrogram(x, h.n_fft, h.num_mels, h.sampling_rate, h.hop_size, h.win_size, h.fmin, h.fmax)


 def load_checkpoint(filepath, device):
@@ -56,9 +54,7 @@ def load_checkpoint(filepath, device):


 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Test script to check CUDA kernel correctness."
-    )
+    parser = argparse.ArgumentParser(description="Test script to check CUDA kernel correctness.")
    parser.add_argument(
        "--checkpoint_file",
        type=str,
@@ -91,27 +87,25 @@ if __name__ == "__main__":
    # define number of samples and length of mel frame to benchmark
    num_sample = 10
    num_mel_frame = 16384
-    
+
    # CUDA kernel correctness check
    diff = 0.0
    for i in tqdm(range(num_sample)):
        # Random mel
        data = torch.rand((1, h.num_mels, num_mel_frame), device="cuda")
-                
+
        with torch.inference_mode():
            audio_original = generator_original(data)
-            
+
        with torch.inference_mode():
            audio_cuda_kernel = generator_cuda_kernel(data)

        # Both outputs should be (almost) the same
        test_result = (audio_original - audio_cuda_kernel).abs()
        diff += test_result.mean(dim=-1).item()
-    
+
    diff /= num_sample
-    if (
-        diff <= 2e-3
-    ):  # We can expect a small difference (~1e-3) which does not affect perceptual quality
+    if diff <= 2e-3:  # We can expect a small difference (~1e-3) which does not affect perceptual quality
        print(
            f"\n[Success] test CUDA fused vs. plain torch BigVGAN inference"
            f"\n > mean_difference={diff}"
@@ -125,9 +119,9 @@ if __name__ == "__main__":
            f"\n > fused_values={audio_cuda_kernel[-1][-1][-30:].tolist()}, "
            f"\n > torch_values={audio_original[-1][-1][-30:].tolist()}"
        )
-    
+
    del data, audio_original, audio_cuda_kernel
-    
+
    # Variables for tracking total time and VRAM usage
    toc_total_original = 0
    toc_total_cuda_kernel = 0
@@ -145,10 +139,10 @@ if __name__ == "__main__":
            audio_original = generator_original(data)
        torch.cuda.synchronize()
        toc = time() - tic
-        toc_total_original += toc   
+        toc_total_original += toc

        vram_used_original_total += torch.cuda.max_memory_allocated(device="cuda")
-        
+
        del data, audio_original
        torch.cuda.empty_cache()

@@ -163,11 +157,11 @@ if __name__ == "__main__":
        torch.cuda.synchronize()
        toc = time() - tic
        toc_total_cuda_kernel += toc
-        
+
        audio_length_total += audio_cuda_kernel.shape[-1]
-        
+
        vram_used_cuda_kernel_total += torch.cuda.max_memory_allocated(device="cuda")
-        
+
        del data, audio_cuda_kernel
        torch.cuda.empty_cache()

@@ -175,8 +169,8 @@ if __name__ == "__main__":
    audio_second = audio_length_total / h.sampling_rate
    khz_original = audio_length_total / toc_total_original / 1000
    khz_cuda_kernel = audio_length_total / toc_total_cuda_kernel / 1000
-    vram_used_original_gb = vram_used_original_total / num_sample / (1024 ** 3)
-    vram_used_cuda_kernel_gb = vram_used_cuda_kernel_total / num_sample / (1024 ** 3)
+    vram_used_original_gb = vram_used_original_total / num_sample / (1024**3)
+    vram_used_cuda_kernel_gb = vram_used_cuda_kernel_total / num_sample / (1024**3)

    # Print results
    print(