Add files via upload

2024-01-16 17:38:48 +08:00
parent 143d32f621
commit 41ca6028d6
65 changed files with 139856 additions and 0 deletions
--- a/GPT_SoVITS/configs/s1.yaml
+++ b/GPT_SoVITS/configs/s1.yaml
@@ -0,0 +1,31 @@
+train:
+    seed: 1234
+    epochs: 300
+    batch_size: 8
+    gradient_accumulation: 4
+    save_every_n_epoch: 1
+    precision: 16
+    gradient_clip: 1.0
+optimizer:
+    lr: 0.01
+    lr_init: 0.00001
+    lr_end: 0.0001
+    warmup_steps: 2000
+    decay_steps: 40000
+data:
+    max_eval_sample: 8
+    max_sec: 54
+    num_workers: 1
+    pad_val: 1024 # same with EOS in model
+model:
+    vocab_size: 1025
+    phoneme_vocab_size: 512
+    embedding_dim: 512
+    hidden_dim: 512
+    head: 16
+    linear_units: 2048
+    n_layer: 12
+    dropout: 0
+    EOS: 1024
+inference:
+    top_k: 5
--- a/GPT_SoVITS/configs/s1big.yaml
+++ b/GPT_SoVITS/configs/s1big.yaml
@@ -0,0 +1,31 @@
+train:
+    seed: 1234
+    epochs: 300
+    batch_size: 8
+    gradient_accumulation: 4
+    save_every_n_epoch: 1
+    precision: 16-mixed
+    gradient_clip: 1.0
+optimizer:
+    lr: 0.01
+    lr_init: 0.00001
+    lr_end: 0.0001
+    warmup_steps: 2000
+    decay_steps: 40000
+data:
+    max_eval_sample: 8
+    max_sec: 54
+    num_workers: 1
+    pad_val: 1024 # same with EOS in model
+model:
+    vocab_size: 1025
+    phoneme_vocab_size: 512
+    embedding_dim: 1024
+    hidden_dim: 1024
+    head: 16
+    linear_units: 2048
+    n_layer: 16
+    dropout: 0
+    EOS: 1024
+inference:
+    top_k: 5
--- a/GPT_SoVITS/configs/s1big2.yaml
+++ b/GPT_SoVITS/configs/s1big2.yaml
@@ -0,0 +1,31 @@
+train:
+    seed: 1234
+    epochs: 300
+    batch_size: 12
+    gradient_accumulation: 4
+    save_every_n_epoch: 1
+    precision: 16-mixed
+    gradient_clip: 1.0
+optimizer:
+    lr: 0.01
+    lr_init: 0.00001
+    lr_end: 0.0001
+    warmup_steps: 2000
+    decay_steps: 40000
+data:
+    max_eval_sample: 8
+    max_sec: 54
+    num_workers: 1
+    pad_val: 1024 # same with EOS in model
+model:
+    vocab_size: 1025
+    phoneme_vocab_size: 512
+    embedding_dim: 1024
+    hidden_dim: 1024
+    head: 16
+    linear_units: 2048
+    n_layer: 6
+    dropout: 0
+    EOS: 1024
+inference:
+    top_k: 5
--- a/GPT_SoVITS/configs/s1longer.yaml
+++ b/GPT_SoVITS/configs/s1longer.yaml
@@ -0,0 +1,31 @@
+train:
+    seed: 1234
+    epochs: 20
+    batch_size: 8
+    save_every_n_epoch: 1
+    precision: 16-mixed
+    gradient_clip: 1.0
+optimizer:
+    lr: 0.01
+    lr_init: 0.00001
+    lr_end: 0.0001
+    warmup_steps: 2000
+    decay_steps: 40000
+data:
+    max_eval_sample: 8
+    max_sec: 54
+    num_workers: 4
+    pad_val: 1024 # same with EOS in model
+model:
+    vocab_size: 1025
+    phoneme_vocab_size: 512
+    embedding_dim: 512
+    hidden_dim: 512
+    head: 16
+    linear_units: 2048
+    n_layer: 24
+    dropout: 0
+    EOS: 1024
+    random_bert: 0
+inference:
+    top_k: 5
--- a/GPT_SoVITS/configs/s1mq.yaml
+++ b/GPT_SoVITS/configs/s1mq.yaml
@@ -0,0 +1,77 @@
+train:
+    seed: 1234
+    epochs: 100
+    batch_size: 6
+    gradient_accumulation: 4
+    save_every_n_epoch: 1
+    precision: 32
+    gradient_clip: 1.0
+optimizer:
+    lr: 0.01
+    lr_init: 0.00001
+    lr_end: 0.0001
+    warmup_steps: 2000
+    decay_steps: 40000
+data:
+    max_eval_sample: 8
+    max_sec: 40
+    num_workers: 1
+    pad_val: 1024 # same with EOS in model
+model:
+    saving_path: "ckpt/"
+    resume_checkpoint: null
+    vocoder_config_path: "quantizer/new_ckpt/config.json"
+    vocoder_ckpt_path: "quantizer/new_ckpt/g_00600000"
+    datadir: "/home/liweiche/GigaSpeech/wavs"
+    metapath: "/home/liweiche/GigaSpeech/train2.json"
+    val_metapath: "/home/liweiche/GigaSpeech/dev2.json"
+    sampledir: "logs/"
+    pretrained_path: null
+    lr: 0.0001
+    batch_size: 200.0
+    train_bucket_size: 8192
+    training_step: 800000
+    optim_flat_percent: 0.0
+    warmup_step: 50
+    adam_beta1: 0.9
+    adam_beta2: 0.98
+    ffd_size: 3072
+    hidden_size: 768
+    enc_nlayers: 6
+    dec_nlayers: 6
+    nheads: 12
+    ar_layer: 4
+    ar_ffd_size: 1024
+    ar_hidden_size: 256
+    ar_nheads: 4
+    aligner_softmax_temp: 1.0
+    layer_norm_eps: 0.00001
+    speaker_embed_dropout: 0.05
+    label_smoothing: 0.0
+    val_check_interval: 5000
+    check_val_every_n_epoch: 1
+    precision: "fp16"
+    nworkers: 16
+    distributed: true
+    accelerator: "ddp"
+    version: null
+    accumulate_grad_batches: 1
+    use_repetition_token: true
+    use_repetition_gating: false
+    repetition_penalty: 1.0
+    sampling_temperature: 1.0
+    top_k: -1
+    min_top_k: 3
+    top_p: 0.8
+    sample_num: 4
+    length_penalty_max_length: 15000
+    length_penalty_max_prob: 0.95
+    max_input_length: 2048
+    max_output_length: 2000
+    sample_rate: 16000
+    n_codes: 1024
+    n_cluster_groups: 1
+    phone_context_window: 4
+    phoneset_size: 1000
+inference:
+    top_k: 5
--- a/GPT_SoVITS/configs/s2.json
+++ b/GPT_SoVITS/configs/s2.json
@@ -0,0 +1,90 @@
+{
+  "train": {
+    "log_interval": 100,
+    "eval_interval": 500,
+    "seed": 1234,
+    "epochs": 100,
+    "learning_rate": 0.0001,
+    "betas": [
+      0.8,
+      0.99
+    ],
+    "eps": 1e-09,
+    "batch_size": 32,
+    "fp16_run": true,
+    "lr_decay": 0.999875,
+    "segment_size": 20480,
+    "init_lr_ratio": 1,
+    "warmup_epochs": 0,
+    "c_mel": 45,
+    "c_kl": 1.0,
+    "text_low_lr_rate": 0.4
+  },
+  "data": {
+    "max_wav_value": 32768.0,
+    "sampling_rate": 32000,
+    "filter_length": 2048,
+    "hop_length": 640,
+    "win_length": 2048,
+    "n_mel_channels": 128,
+    "mel_fmin": 0.0,
+    "mel_fmax": null,
+    "add_blank": true,
+    "n_speakers": 300,
+    "cleaned_text": true
+  },
+  "model": {
+    "inter_channels": 192,
+    "hidden_channels": 192,
+    "filter_channels": 768,
+    "n_heads": 2,
+    "n_layers": 6,
+    "kernel_size": 3,
+    "p_dropout": 0.1,
+    "resblock": "1",
+    "resblock_kernel_sizes": [
+      3,
+      7,
+      11
+    ],
+    "resblock_dilation_sizes": [
+      [
+        1,
+        3,
+        5
+      ],
+      [
+        1,
+        3,
+        5
+      ],
+      [
+        1,
+        3,
+        5
+      ]
+    ],
+    "upsample_rates": [
+      10,
+      8,
+      2,
+      2,
+      2
+    ],
+    "upsample_initial_channel": 512,
+    "upsample_kernel_sizes": [
+      16,
+      16,
+      8,
+      2,
+      2
+    ],
+    "n_layers_q": 3,
+    "use_spectral_norm": false,
+    "gin_channels": 512,
+    "semantic_frame_rate": "25hz",
+    "freeze_quantizer": true
+  },
+  "s2_ckpt_dir": "logs/s2/big2k1",
+  "content_module": "cnhubert"
+}
--- a/GPT_SoVITS/configs/train.yaml
+++ b/GPT_SoVITS/configs/train.yaml
@@ -0,0 +1,32 @@
+gpu:
+    n_card: 1
+    n_process_per_card: 2
+io:
+    text_path: D:\RVC1006\GPT-SoVITS\GPT_SoVITS
+    save_every_n_epoch: 1
+    precision: 16-mixed
+    gradient_clip: 1.0
+optimizer:
+    lr: 0.01
+    lr_init: 0.00001
+    lr_end: 0.0001
+    warmup_steps: 2000
+    decay_steps: 40000
+data:
+    max_eval_sample: 8
+    max_sec: 54
+    num_workers: 1
+    pad_val: 1024 # same with EOS in model
+model:
+    vocab_size: 1025
+    phoneme_vocab_size: 512
+    embedding_dim: 512
+    hidden_dim: 512
+    head: 16
+    linear_units: 2048
+    n_layer: 24
+    dropout: 0
+    EOS: 1024
+    random_bert: 0
+inference:
+    top_k: 5