Refactor: Format Code with Ruff and Update Deprecated G2PW Link (#2255)

* ruff check --fix * ruff format --line-length 120 --target-version py39 * Change the link for G2PW Model * update pytorch version and colab
2025-04-07 09:42:47 +01:00
parent 9da7e17efe
commit 53cac93589
132 changed files with 8185 additions and 6648 deletions
--- a/GPT_SoVITS/BigVGAN/meldataset.py
+++ b/GPT_SoVITS/BigVGAN/meldataset.py
@@ -86,9 +86,7 @@ def mel_spectrogram(
    key = f"{n_fft}_{num_mels}_{sampling_rate}_{hop_size}_{win_size}_{fmin}_{fmax}_{device}"

    if key not in mel_basis_cache:
-        mel = librosa_mel_fn(
-            sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax
-        )
+        mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
        mel_basis_cache[key] = torch.from_numpy(mel).float().to(device)
        hann_window_cache[key] = torch.hann_window(win_size).to(device)

@@ -96,9 +94,7 @@ def mel_spectrogram(
    hann_window = hann_window_cache[key]

    padding = (n_fft - hop_size) // 2
-    y = torch.nn.functional.pad(
-        y.unsqueeze(1), (padding, padding), mode="reflect"
-    ).squeeze(1)
+    y = torch.nn.functional.pad(y.unsqueeze(1), (padding, padding), mode="reflect").squeeze(1)

    spec = torch.stft(
        y,
@@ -150,17 +146,13 @@ def get_dataset_filelist(a):

    with open(a.input_training_file, "r", encoding="utf-8") as fi:
        training_files = [
-            os.path.join(a.input_wavs_dir, x.split("|")[0] + ".wav")
-            for x in fi.read().split("\n")
-            if len(x) > 0
+            os.path.join(a.input_wavs_dir, x.split("|")[0] + ".wav") for x in fi.read().split("\n") if len(x) > 0
        ]
        print(f"first training file: {training_files[0]}")

    with open(a.input_validation_file, "r", encoding="utf-8") as fi:
        validation_files = [
-            os.path.join(a.input_wavs_dir, x.split("|")[0] + ".wav")
-            for x in fi.read().split("\n")
-            if len(x) > 0
+            os.path.join(a.input_wavs_dir, x.split("|")[0] + ".wav") for x in fi.read().split("\n") if len(x) > 0
        ]
        print(f"first validation file: {validation_files[0]}")

@@ -171,9 +163,7 @@ def get_dataset_filelist(a):
                for x in fi.read().split("\n")
                if len(x) > 0
            ]
-            print(
-                f"first unseen {i}th validation fileset: {unseen_validation_files[0]}"
-            )
+            print(f"first unseen {i}th validation fileset: {unseen_validation_files[0]}")
            list_unseen_validation_files.append(unseen_validation_files)

    return training_files, validation_files, list_unseen_validation_files
@@ -227,13 +217,9 @@ class MelDataset(torch.utils.data.Dataset):

        print("[INFO] checking dataset integrity...")
        for i in tqdm(range(len(self.audio_files))):
-            assert os.path.exists(
-                self.audio_files[i]
-            ), f"{self.audio_files[i]} not found"
+            assert os.path.exists(self.audio_files[i]), f"{self.audio_files[i]} not found"

-    def __getitem__(
-        self, index: int
-    ) -> Tuple[torch.Tensor, torch.Tensor, str, torch.Tensor]:
+    def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor, str, torch.Tensor]:
        try:
            filename = self.audio_files[index]

@@ -248,17 +234,12 @@ class MelDataset(torch.utils.data.Dataset):
                    # Obtain randomized audio chunk
                    if source_sampling_rate != self.sampling_rate:
                        # Adjust segment size to crop if the source sr is different
-                        target_segment_size = math.ceil(
-                            self.segment_size
-                            * (source_sampling_rate / self.sampling_rate)
-                        )
+                        target_segment_size = math.ceil(self.segment_size * (source_sampling_rate / self.sampling_rate))
                    else:
                        target_segment_size = self.segment_size

                    # Compute upper bound index for the random chunk
-                    random_chunk_upper_bound = max(
-                        0, audio.shape[0] - target_segment_size
-                    )
+                    random_chunk_upper_bound = max(0, audio.shape[0] - target_segment_size)

                    # Crop or pad audio to obtain random chunk with target_segment_size
                    if audio.shape[0] >= target_segment_size:
@@ -318,9 +299,9 @@ class MelDataset(torch.utils.data.Dataset):
            else:
                # For fine-tuning, assert that the waveform is in the defined sampling_rate
                # Fine-tuning won't support on-the-fly resampling to be fool-proof (the dataset should have been prepared properly)
-                assert (
-                    source_sampling_rate == self.sampling_rate
-                ), f"For fine_tuning, waveform must be in the spcified sampling rate {self.sampling_rate}, got {source_sampling_rate}"
+                assert source_sampling_rate == self.sampling_rate, (
+                    f"For fine_tuning, waveform must be in the spcified sampling rate {self.sampling_rate}, got {source_sampling_rate}"
+                )

                # Cast ndarray to torch tensor
                audio = torch.FloatTensor(audio)
@@ -346,20 +327,14 @@ class MelDataset(torch.utils.data.Dataset):
                        mel = mel[:, :, mel_start : mel_start + frames_per_seg]
                        audio = audio[
                            :,
-                            mel_start
-                            * self.hop_size : (mel_start + frames_per_seg)
-                            * self.hop_size,
+                            mel_start * self.hop_size : (mel_start + frames_per_seg) * self.hop_size,
                        ]

                    # Pad pre-computed mel and audio to match length to ensuring fine-tuning without error.
                    # NOTE: this may introduce a single-frame misalignment of the <pre-computed mel, audio>
                    # To remove possible misalignment, it is recommended to prepare the <pre-computed mel, audio> pair where the audio length is the integer multiple of self.hop_size
-                    mel = torch.nn.functional.pad(
-                        mel, (0, frames_per_seg - mel.size(2)), "constant"
-                    )
-                    audio = torch.nn.functional.pad(
-                        audio, (0, self.segment_size - audio.size(1)), "constant"
-                    )
+                    mel = torch.nn.functional.pad(mel, (0, frames_per_seg - mel.size(2)), "constant")
+                    audio = torch.nn.functional.pad(audio, (0, self.segment_size - audio.size(1)), "constant")

            # Compute mel_loss used by spectral regression objective. Uses self.fmax_loss instead (usually None)
            mel_loss = mel_spectrogram(
@@ -376,9 +351,10 @@ class MelDataset(torch.utils.data.Dataset):

            # Shape sanity checks
            assert (
-                audio.shape[1] == mel.shape[2] * self.hop_size
-                and audio.shape[1] == mel_loss.shape[2] * self.hop_size
-            ), f"Audio length must be mel frame length * hop_size. Got audio shape {audio.shape} mel shape {mel.shape} mel_loss shape {mel_loss.shape}"
+                audio.shape[1] == mel.shape[2] * self.hop_size and audio.shape[1] == mel_loss.shape[2] * self.hop_size
+            ), (
+                f"Audio length must be mel frame length * hop_size. Got audio shape {audio.shape} mel shape {mel.shape} mel_loss shape {mel_loss.shape}"
+            )

            return (mel.squeeze(), audio.squeeze(0), filename, mel_loss.squeeze())

@@ -387,9 +363,7 @@ class MelDataset(torch.utils.data.Dataset):
            if self.fine_tuning:
                raise e  # Terminate training if it is fine-tuning. The dataset should have been prepared properly.
            else:
-                print(
-                    f"[WARNING] Failed to load waveform, skipping! filename: {filename} Error: {e}"
-                )
+                print(f"[WARNING] Failed to load waveform, skipping! filename: {filename} Error: {e}")
                return self[random.randrange(len(self))]

    def __len__(self):