Add files via upload
This commit is contained in:
31
GPT_SoVITS/configs/s1.yaml
Normal file
31
GPT_SoVITS/configs/s1.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
train:
|
||||
seed: 1234
|
||||
epochs: 300
|
||||
batch_size: 8
|
||||
gradient_accumulation: 4
|
||||
save_every_n_epoch: 1
|
||||
precision: 16
|
||||
gradient_clip: 1.0
|
||||
optimizer:
|
||||
lr: 0.01
|
||||
lr_init: 0.00001
|
||||
lr_end: 0.0001
|
||||
warmup_steps: 2000
|
||||
decay_steps: 40000
|
||||
data:
|
||||
max_eval_sample: 8
|
||||
max_sec: 54
|
||||
num_workers: 1
|
||||
pad_val: 1024 # same with EOS in model
|
||||
model:
|
||||
vocab_size: 1025
|
||||
phoneme_vocab_size: 512
|
||||
embedding_dim: 512
|
||||
hidden_dim: 512
|
||||
head: 16
|
||||
linear_units: 2048
|
||||
n_layer: 12
|
||||
dropout: 0
|
||||
EOS: 1024
|
||||
inference:
|
||||
top_k: 5
|
||||
31
GPT_SoVITS/configs/s1big.yaml
Normal file
31
GPT_SoVITS/configs/s1big.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
train:
|
||||
seed: 1234
|
||||
epochs: 300
|
||||
batch_size: 8
|
||||
gradient_accumulation: 4
|
||||
save_every_n_epoch: 1
|
||||
precision: 16-mixed
|
||||
gradient_clip: 1.0
|
||||
optimizer:
|
||||
lr: 0.01
|
||||
lr_init: 0.00001
|
||||
lr_end: 0.0001
|
||||
warmup_steps: 2000
|
||||
decay_steps: 40000
|
||||
data:
|
||||
max_eval_sample: 8
|
||||
max_sec: 54
|
||||
num_workers: 1
|
||||
pad_val: 1024 # same with EOS in model
|
||||
model:
|
||||
vocab_size: 1025
|
||||
phoneme_vocab_size: 512
|
||||
embedding_dim: 1024
|
||||
hidden_dim: 1024
|
||||
head: 16
|
||||
linear_units: 2048
|
||||
n_layer: 16
|
||||
dropout: 0
|
||||
EOS: 1024
|
||||
inference:
|
||||
top_k: 5
|
||||
31
GPT_SoVITS/configs/s1big2.yaml
Normal file
31
GPT_SoVITS/configs/s1big2.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
train:
|
||||
seed: 1234
|
||||
epochs: 300
|
||||
batch_size: 12
|
||||
gradient_accumulation: 4
|
||||
save_every_n_epoch: 1
|
||||
precision: 16-mixed
|
||||
gradient_clip: 1.0
|
||||
optimizer:
|
||||
lr: 0.01
|
||||
lr_init: 0.00001
|
||||
lr_end: 0.0001
|
||||
warmup_steps: 2000
|
||||
decay_steps: 40000
|
||||
data:
|
||||
max_eval_sample: 8
|
||||
max_sec: 54
|
||||
num_workers: 1
|
||||
pad_val: 1024 # same with EOS in model
|
||||
model:
|
||||
vocab_size: 1025
|
||||
phoneme_vocab_size: 512
|
||||
embedding_dim: 1024
|
||||
hidden_dim: 1024
|
||||
head: 16
|
||||
linear_units: 2048
|
||||
n_layer: 6
|
||||
dropout: 0
|
||||
EOS: 1024
|
||||
inference:
|
||||
top_k: 5
|
||||
31
GPT_SoVITS/configs/s1longer.yaml
Normal file
31
GPT_SoVITS/configs/s1longer.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
train:
|
||||
seed: 1234
|
||||
epochs: 20
|
||||
batch_size: 8
|
||||
save_every_n_epoch: 1
|
||||
precision: 16-mixed
|
||||
gradient_clip: 1.0
|
||||
optimizer:
|
||||
lr: 0.01
|
||||
lr_init: 0.00001
|
||||
lr_end: 0.0001
|
||||
warmup_steps: 2000
|
||||
decay_steps: 40000
|
||||
data:
|
||||
max_eval_sample: 8
|
||||
max_sec: 54
|
||||
num_workers: 4
|
||||
pad_val: 1024 # same with EOS in model
|
||||
model:
|
||||
vocab_size: 1025
|
||||
phoneme_vocab_size: 512
|
||||
embedding_dim: 512
|
||||
hidden_dim: 512
|
||||
head: 16
|
||||
linear_units: 2048
|
||||
n_layer: 24
|
||||
dropout: 0
|
||||
EOS: 1024
|
||||
random_bert: 0
|
||||
inference:
|
||||
top_k: 5
|
||||
77
GPT_SoVITS/configs/s1mq.yaml
Normal file
77
GPT_SoVITS/configs/s1mq.yaml
Normal file
@@ -0,0 +1,77 @@
|
||||
train:
|
||||
seed: 1234
|
||||
epochs: 100
|
||||
batch_size: 6
|
||||
gradient_accumulation: 4
|
||||
save_every_n_epoch: 1
|
||||
precision: 32
|
||||
gradient_clip: 1.0
|
||||
optimizer:
|
||||
lr: 0.01
|
||||
lr_init: 0.00001
|
||||
lr_end: 0.0001
|
||||
warmup_steps: 2000
|
||||
decay_steps: 40000
|
||||
data:
|
||||
max_eval_sample: 8
|
||||
max_sec: 40
|
||||
num_workers: 1
|
||||
pad_val: 1024 # same with EOS in model
|
||||
model:
|
||||
saving_path: "ckpt/"
|
||||
resume_checkpoint: null
|
||||
vocoder_config_path: "quantizer/new_ckpt/config.json"
|
||||
vocoder_ckpt_path: "quantizer/new_ckpt/g_00600000"
|
||||
datadir: "/home/liweiche/GigaSpeech/wavs"
|
||||
metapath: "/home/liweiche/GigaSpeech/train2.json"
|
||||
val_metapath: "/home/liweiche/GigaSpeech/dev2.json"
|
||||
sampledir: "logs/"
|
||||
pretrained_path: null
|
||||
lr: 0.0001
|
||||
batch_size: 200.0
|
||||
train_bucket_size: 8192
|
||||
training_step: 800000
|
||||
optim_flat_percent: 0.0
|
||||
warmup_step: 50
|
||||
adam_beta1: 0.9
|
||||
adam_beta2: 0.98
|
||||
ffd_size: 3072
|
||||
hidden_size: 768
|
||||
enc_nlayers: 6
|
||||
dec_nlayers: 6
|
||||
nheads: 12
|
||||
ar_layer: 4
|
||||
ar_ffd_size: 1024
|
||||
ar_hidden_size: 256
|
||||
ar_nheads: 4
|
||||
aligner_softmax_temp: 1.0
|
||||
layer_norm_eps: 0.00001
|
||||
speaker_embed_dropout: 0.05
|
||||
label_smoothing: 0.0
|
||||
val_check_interval: 5000
|
||||
check_val_every_n_epoch: 1
|
||||
precision: "fp16"
|
||||
nworkers: 16
|
||||
distributed: true
|
||||
accelerator: "ddp"
|
||||
version: null
|
||||
accumulate_grad_batches: 1
|
||||
use_repetition_token: true
|
||||
use_repetition_gating: false
|
||||
repetition_penalty: 1.0
|
||||
sampling_temperature: 1.0
|
||||
top_k: -1
|
||||
min_top_k: 3
|
||||
top_p: 0.8
|
||||
sample_num: 4
|
||||
length_penalty_max_length: 15000
|
||||
length_penalty_max_prob: 0.95
|
||||
max_input_length: 2048
|
||||
max_output_length: 2000
|
||||
sample_rate: 16000
|
||||
n_codes: 1024
|
||||
n_cluster_groups: 1
|
||||
phone_context_window: 4
|
||||
phoneset_size: 1000
|
||||
inference:
|
||||
top_k: 5
|
||||
90
GPT_SoVITS/configs/s2.json
Normal file
90
GPT_SoVITS/configs/s2.json
Normal file
@@ -0,0 +1,90 @@
|
||||
{
|
||||
"train": {
|
||||
"log_interval": 100,
|
||||
"eval_interval": 500,
|
||||
"seed": 1234,
|
||||
"epochs": 100,
|
||||
"learning_rate": 0.0001,
|
||||
"betas": [
|
||||
0.8,
|
||||
0.99
|
||||
],
|
||||
"eps": 1e-09,
|
||||
"batch_size": 32,
|
||||
"fp16_run": true,
|
||||
"lr_decay": 0.999875,
|
||||
"segment_size": 20480,
|
||||
"init_lr_ratio": 1,
|
||||
"warmup_epochs": 0,
|
||||
"c_mel": 45,
|
||||
"c_kl": 1.0,
|
||||
"text_low_lr_rate": 0.4
|
||||
},
|
||||
"data": {
|
||||
"max_wav_value": 32768.0,
|
||||
"sampling_rate": 32000,
|
||||
"filter_length": 2048,
|
||||
"hop_length": 640,
|
||||
"win_length": 2048,
|
||||
"n_mel_channels": 128,
|
||||
"mel_fmin": 0.0,
|
||||
"mel_fmax": null,
|
||||
"add_blank": true,
|
||||
"n_speakers": 300,
|
||||
"cleaned_text": true
|
||||
},
|
||||
"model": {
|
||||
"inter_channels": 192,
|
||||
"hidden_channels": 192,
|
||||
"filter_channels": 768,
|
||||
"n_heads": 2,
|
||||
"n_layers": 6,
|
||||
"kernel_size": 3,
|
||||
"p_dropout": 0.1,
|
||||
"resblock": "1",
|
||||
"resblock_kernel_sizes": [
|
||||
3,
|
||||
7,
|
||||
11
|
||||
],
|
||||
"resblock_dilation_sizes": [
|
||||
[
|
||||
1,
|
||||
3,
|
||||
5
|
||||
],
|
||||
[
|
||||
1,
|
||||
3,
|
||||
5
|
||||
],
|
||||
[
|
||||
1,
|
||||
3,
|
||||
5
|
||||
]
|
||||
],
|
||||
"upsample_rates": [
|
||||
10,
|
||||
8,
|
||||
2,
|
||||
2,
|
||||
2
|
||||
],
|
||||
"upsample_initial_channel": 512,
|
||||
"upsample_kernel_sizes": [
|
||||
16,
|
||||
16,
|
||||
8,
|
||||
2,
|
||||
2
|
||||
],
|
||||
"n_layers_q": 3,
|
||||
"use_spectral_norm": false,
|
||||
"gin_channels": 512,
|
||||
"semantic_frame_rate": "25hz",
|
||||
"freeze_quantizer": true
|
||||
},
|
||||
"s2_ckpt_dir": "logs/s2/big2k1",
|
||||
"content_module": "cnhubert"
|
||||
}
|
||||
32
GPT_SoVITS/configs/train.yaml
Normal file
32
GPT_SoVITS/configs/train.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
gpu:
|
||||
n_card: 1
|
||||
n_process_per_card: 2
|
||||
io:
|
||||
text_path: D:\RVC1006\GPT-SoVITS\GPT_SoVITS
|
||||
save_every_n_epoch: 1
|
||||
precision: 16-mixed
|
||||
gradient_clip: 1.0
|
||||
optimizer:
|
||||
lr: 0.01
|
||||
lr_init: 0.00001
|
||||
lr_end: 0.0001
|
||||
warmup_steps: 2000
|
||||
decay_steps: 40000
|
||||
data:
|
||||
max_eval_sample: 8
|
||||
max_sec: 54
|
||||
num_workers: 1
|
||||
pad_val: 1024 # same with EOS in model
|
||||
model:
|
||||
vocab_size: 1025
|
||||
phoneme_vocab_size: 512
|
||||
embedding_dim: 512
|
||||
hidden_dim: 512
|
||||
head: 16
|
||||
linear_units: 2048
|
||||
n_layer: 24
|
||||
dropout: 0
|
||||
EOS: 1024
|
||||
random_bert: 0
|
||||
inference:
|
||||
top_k: 5
|
||||
Reference in New Issue
Block a user