Refactor: Format Code with Ruff and Update Deprecated G2PW Link (#2255)
* ruff check --fix * ruff format --line-length 120 --target-version py39 * Change the link for G2PW Model * update pytorch version and colab
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
# to import modules from parent_dir
|
||||
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
sys.path.append(parent_dir)
|
||||
@@ -24,14 +25,10 @@ def test_anti_alias_activation():
|
||||
data = torch.rand((10, 10, 200), device="cuda")
|
||||
|
||||
# Check activations.Snake cuda vs. torch
|
||||
fused_anti_alias_activation = activation1d.Activation1d(
|
||||
activation=Snake(10), fused=True
|
||||
).cuda()
|
||||
fused_anti_alias_activation = activation1d.Activation1d(activation=Snake(10), fused=True).cuda()
|
||||
fused_activation_output = fused_anti_alias_activation(data)
|
||||
|
||||
torch_anti_alias_activation = activation1d.Activation1d(
|
||||
activation=Snake(10), fused=False
|
||||
).cuda()
|
||||
torch_anti_alias_activation = activation1d.Activation1d(activation=Snake(10), fused=False).cuda()
|
||||
torch_activation_output = torch_anti_alias_activation(data)
|
||||
|
||||
test_result = (fused_activation_output - torch_activation_output).abs()
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
# to import modules from parent_dir
|
||||
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
sys.path.append(parent_dir)
|
||||
@@ -24,14 +25,10 @@ def test_anti_alias_activation():
|
||||
data = torch.rand((10, 10, 200), device="cuda")
|
||||
|
||||
# Check activations, Snake CUDA vs. Torch
|
||||
fused_anti_alias_activation = activation1d.Activation1d(
|
||||
activation=SnakeBeta(10), fused=True
|
||||
).cuda()
|
||||
fused_anti_alias_activation = activation1d.Activation1d(activation=SnakeBeta(10), fused=True).cuda()
|
||||
fused_activation_output = fused_anti_alias_activation(data)
|
||||
|
||||
torch_anti_alias_activation = activation1d.Activation1d(
|
||||
activation=SnakeBeta(10), fused=False
|
||||
).cuda()
|
||||
torch_anti_alias_activation = activation1d.Activation1d(activation=SnakeBeta(10), fused=False).cuda()
|
||||
torch_activation_output = torch_anti_alias_activation(data)
|
||||
|
||||
test_result = (fused_activation_output - torch_activation_output).abs()
|
||||
@@ -57,7 +54,6 @@ def test_anti_alias_activation():
|
||||
)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from alias_free_activation.cuda import load
|
||||
|
||||
|
||||
@@ -42,9 +42,7 @@ def generate_soundwave(duration=5.0, sr=24000):
|
||||
|
||||
|
||||
def get_mel(x, h):
|
||||
return mel_spectrogram(
|
||||
x, h.n_fft, h.num_mels, h.sampling_rate, h.hop_size, h.win_size, h.fmin, h.fmax
|
||||
)
|
||||
return mel_spectrogram(x, h.n_fft, h.num_mels, h.sampling_rate, h.hop_size, h.win_size, h.fmin, h.fmax)
|
||||
|
||||
|
||||
def load_checkpoint(filepath, device):
|
||||
@@ -56,9 +54,7 @@ def load_checkpoint(filepath, device):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Test script to check CUDA kernel correctness."
|
||||
)
|
||||
parser = argparse.ArgumentParser(description="Test script to check CUDA kernel correctness.")
|
||||
parser.add_argument(
|
||||
"--checkpoint_file",
|
||||
type=str,
|
||||
@@ -91,27 +87,25 @@ if __name__ == "__main__":
|
||||
# define number of samples and length of mel frame to benchmark
|
||||
num_sample = 10
|
||||
num_mel_frame = 16384
|
||||
|
||||
|
||||
# CUDA kernel correctness check
|
||||
diff = 0.0
|
||||
for i in tqdm(range(num_sample)):
|
||||
# Random mel
|
||||
data = torch.rand((1, h.num_mels, num_mel_frame), device="cuda")
|
||||
|
||||
|
||||
with torch.inference_mode():
|
||||
audio_original = generator_original(data)
|
||||
|
||||
|
||||
with torch.inference_mode():
|
||||
audio_cuda_kernel = generator_cuda_kernel(data)
|
||||
|
||||
# Both outputs should be (almost) the same
|
||||
test_result = (audio_original - audio_cuda_kernel).abs()
|
||||
diff += test_result.mean(dim=-1).item()
|
||||
|
||||
|
||||
diff /= num_sample
|
||||
if (
|
||||
diff <= 2e-3
|
||||
): # We can expect a small difference (~1e-3) which does not affect perceptual quality
|
||||
if diff <= 2e-3: # We can expect a small difference (~1e-3) which does not affect perceptual quality
|
||||
print(
|
||||
f"\n[Success] test CUDA fused vs. plain torch BigVGAN inference"
|
||||
f"\n > mean_difference={diff}"
|
||||
@@ -125,9 +119,9 @@ if __name__ == "__main__":
|
||||
f"\n > fused_values={audio_cuda_kernel[-1][-1][-30:].tolist()}, "
|
||||
f"\n > torch_values={audio_original[-1][-1][-30:].tolist()}"
|
||||
)
|
||||
|
||||
|
||||
del data, audio_original, audio_cuda_kernel
|
||||
|
||||
|
||||
# Variables for tracking total time and VRAM usage
|
||||
toc_total_original = 0
|
||||
toc_total_cuda_kernel = 0
|
||||
@@ -145,10 +139,10 @@ if __name__ == "__main__":
|
||||
audio_original = generator_original(data)
|
||||
torch.cuda.synchronize()
|
||||
toc = time() - tic
|
||||
toc_total_original += toc
|
||||
toc_total_original += toc
|
||||
|
||||
vram_used_original_total += torch.cuda.max_memory_allocated(device="cuda")
|
||||
|
||||
|
||||
del data, audio_original
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
@@ -163,11 +157,11 @@ if __name__ == "__main__":
|
||||
torch.cuda.synchronize()
|
||||
toc = time() - tic
|
||||
toc_total_cuda_kernel += toc
|
||||
|
||||
|
||||
audio_length_total += audio_cuda_kernel.shape[-1]
|
||||
|
||||
|
||||
vram_used_cuda_kernel_total += torch.cuda.max_memory_allocated(device="cuda")
|
||||
|
||||
|
||||
del data, audio_cuda_kernel
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
@@ -175,8 +169,8 @@ if __name__ == "__main__":
|
||||
audio_second = audio_length_total / h.sampling_rate
|
||||
khz_original = audio_length_total / toc_total_original / 1000
|
||||
khz_cuda_kernel = audio_length_total / toc_total_cuda_kernel / 1000
|
||||
vram_used_original_gb = vram_used_original_total / num_sample / (1024 ** 3)
|
||||
vram_used_cuda_kernel_gb = vram_used_cuda_kernel_total / num_sample / (1024 ** 3)
|
||||
vram_used_original_gb = vram_used_original_total / num_sample / (1024**3)
|
||||
vram_used_cuda_kernel_gb = vram_used_cuda_kernel_total / num_sample / (1024**3)
|
||||
|
||||
# Print results
|
||||
print(
|
||||
|
||||
Reference in New Issue
Block a user