more code refactor

This commit is contained in:
Blaise
2024-01-16 17:14:18 +01:00
parent 0d92575115
commit 0d3d47f3c3
44 changed files with 4516 additions and 2623 deletions

View File

@@ -1,31 +1,31 @@
train:
seed: 1234
epochs: 300
batch_size: 8
gradient_accumulation: 4
save_every_n_epoch: 1
precision: 16
gradient_clip: 1.0
seed: 1234
epochs: 300
batch_size: 8
gradient_accumulation: 4
save_every_n_epoch: 1
precision: 16
gradient_clip: 1.0
optimizer:
lr: 0.01
lr_init: 0.00001
lr_end: 0.0001
warmup_steps: 2000
decay_steps: 40000
lr: 0.01
lr_init: 0.00001
lr_end: 0.0001
warmup_steps: 2000
decay_steps: 40000
data:
max_eval_sample: 8
max_sec: 54
num_workers: 1
pad_val: 1024 # same with EOS in model
max_eval_sample: 8
max_sec: 54
num_workers: 1
pad_val: 1024 # same with EOS in model
model:
vocab_size: 1025
phoneme_vocab_size: 512
embedding_dim: 512
hidden_dim: 512
head: 16
linear_units: 2048
n_layer: 12
dropout: 0
EOS: 1024
vocab_size: 1025
phoneme_vocab_size: 512
embedding_dim: 512
hidden_dim: 512
head: 16
linear_units: 2048
n_layer: 12
dropout: 0
EOS: 1024
inference:
top_k: 5
top_k: 5

View File

@@ -1,31 +1,31 @@
train:
seed: 1234
epochs: 300
batch_size: 8
gradient_accumulation: 4
save_every_n_epoch: 1
precision: 16-mixed
gradient_clip: 1.0
seed: 1234
epochs: 300
batch_size: 8
gradient_accumulation: 4
save_every_n_epoch: 1
precision: 16-mixed
gradient_clip: 1.0
optimizer:
lr: 0.01
lr_init: 0.00001
lr_end: 0.0001
warmup_steps: 2000
decay_steps: 40000
lr: 0.01
lr_init: 0.00001
lr_end: 0.0001
warmup_steps: 2000
decay_steps: 40000
data:
max_eval_sample: 8
max_sec: 54
num_workers: 1
pad_val: 1024 # same with EOS in model
max_eval_sample: 8
max_sec: 54
num_workers: 1
pad_val: 1024 # same with EOS in model
model:
vocab_size: 1025
phoneme_vocab_size: 512
embedding_dim: 1024
hidden_dim: 1024
head: 16
linear_units: 2048
n_layer: 16
dropout: 0
EOS: 1024
vocab_size: 1025
phoneme_vocab_size: 512
embedding_dim: 1024
hidden_dim: 1024
head: 16
linear_units: 2048
n_layer: 16
dropout: 0
EOS: 1024
inference:
top_k: 5
top_k: 5

View File

@@ -1,31 +1,31 @@
train:
seed: 1234
epochs: 300
batch_size: 12
gradient_accumulation: 4
save_every_n_epoch: 1
precision: 16-mixed
gradient_clip: 1.0
seed: 1234
epochs: 300
batch_size: 12
gradient_accumulation: 4
save_every_n_epoch: 1
precision: 16-mixed
gradient_clip: 1.0
optimizer:
lr: 0.01
lr_init: 0.00001
lr_end: 0.0001
warmup_steps: 2000
decay_steps: 40000
lr: 0.01
lr_init: 0.00001
lr_end: 0.0001
warmup_steps: 2000
decay_steps: 40000
data:
max_eval_sample: 8
max_sec: 54
num_workers: 1
pad_val: 1024 # same with EOS in model
max_eval_sample: 8
max_sec: 54
num_workers: 1
pad_val: 1024 # same with EOS in model
model:
vocab_size: 1025
phoneme_vocab_size: 512
embedding_dim: 1024
hidden_dim: 1024
head: 16
linear_units: 2048
n_layer: 6
dropout: 0
EOS: 1024
vocab_size: 1025
phoneme_vocab_size: 512
embedding_dim: 1024
hidden_dim: 1024
head: 16
linear_units: 2048
n_layer: 6
dropout: 0
EOS: 1024
inference:
top_k: 5
top_k: 5

View File

@@ -1,31 +1,31 @@
train:
seed: 1234
epochs: 20
batch_size: 8
save_every_n_epoch: 1
precision: 16-mixed
gradient_clip: 1.0
seed: 1234
epochs: 20
batch_size: 8
save_every_n_epoch: 1
precision: 16-mixed
gradient_clip: 1.0
optimizer:
lr: 0.01
lr_init: 0.00001
lr_end: 0.0001
warmup_steps: 2000
decay_steps: 40000
lr: 0.01
lr_init: 0.00001
lr_end: 0.0001
warmup_steps: 2000
decay_steps: 40000
data:
max_eval_sample: 8
max_sec: 54
num_workers: 4
pad_val: 1024 # same with EOS in model
max_eval_sample: 8
max_sec: 54
num_workers: 4
pad_val: 1024 # same with EOS in model
model:
vocab_size: 1025
phoneme_vocab_size: 512
embedding_dim: 512
hidden_dim: 512
head: 16
linear_units: 2048
n_layer: 24
dropout: 0
EOS: 1024
random_bert: 0
vocab_size: 1025
phoneme_vocab_size: 512
embedding_dim: 512
hidden_dim: 512
head: 16
linear_units: 2048
n_layer: 24
dropout: 0
EOS: 1024
random_bert: 0
inference:
top_k: 5
top_k: 5

View File

@@ -1,77 +1,77 @@
train:
seed: 1234
epochs: 100
batch_size: 6
gradient_accumulation: 4
save_every_n_epoch: 1
precision: 32
gradient_clip: 1.0
seed: 1234
epochs: 100
batch_size: 6
gradient_accumulation: 4
save_every_n_epoch: 1
precision: 32
gradient_clip: 1.0
optimizer:
lr: 0.01
lr_init: 0.00001
lr_end: 0.0001
warmup_steps: 2000
decay_steps: 40000
lr: 0.01
lr_init: 0.00001
lr_end: 0.0001
warmup_steps: 2000
decay_steps: 40000
data:
max_eval_sample: 8
max_sec: 40
num_workers: 1
pad_val: 1024 # same with EOS in model
max_eval_sample: 8
max_sec: 40
num_workers: 1
pad_val: 1024 # same with EOS in model
model:
saving_path: "ckpt/"
resume_checkpoint: null
vocoder_config_path: "quantizer/new_ckpt/config.json"
vocoder_ckpt_path: "quantizer/new_ckpt/g_00600000"
datadir: "/home/liweiche/GigaSpeech/wavs"
metapath: "/home/liweiche/GigaSpeech/train2.json"
val_metapath: "/home/liweiche/GigaSpeech/dev2.json"
sampledir: "logs/"
pretrained_path: null
lr: 0.0001
batch_size: 200.0
train_bucket_size: 8192
training_step: 800000
optim_flat_percent: 0.0
warmup_step: 50
adam_beta1: 0.9
adam_beta2: 0.98
ffd_size: 3072
hidden_size: 768
enc_nlayers: 6
dec_nlayers: 6
nheads: 12
ar_layer: 4
ar_ffd_size: 1024
ar_hidden_size: 256
ar_nheads: 4
aligner_softmax_temp: 1.0
layer_norm_eps: 0.00001
speaker_embed_dropout: 0.05
label_smoothing: 0.0
val_check_interval: 5000
check_val_every_n_epoch: 1
precision: "fp16"
nworkers: 16
distributed: true
accelerator: "ddp"
version: null
accumulate_grad_batches: 1
use_repetition_token: true
use_repetition_gating: false
repetition_penalty: 1.0
sampling_temperature: 1.0
top_k: -1
min_top_k: 3
top_p: 0.8
sample_num: 4
length_penalty_max_length: 15000
length_penalty_max_prob: 0.95
max_input_length: 2048
max_output_length: 2000
sample_rate: 16000
n_codes: 1024
n_cluster_groups: 1
phone_context_window: 4
phoneset_size: 1000
saving_path: "ckpt/"
resume_checkpoint: null
vocoder_config_path: "quantizer/new_ckpt/config.json"
vocoder_ckpt_path: "quantizer/new_ckpt/g_00600000"
datadir: "/home/liweiche/GigaSpeech/wavs"
metapath: "/home/liweiche/GigaSpeech/train2.json"
val_metapath: "/home/liweiche/GigaSpeech/dev2.json"
sampledir: "logs/"
pretrained_path: null
lr: 0.0001
batch_size: 200.0
train_bucket_size: 8192
training_step: 800000
optim_flat_percent: 0.0
warmup_step: 50
adam_beta1: 0.9
adam_beta2: 0.98
ffd_size: 3072
hidden_size: 768
enc_nlayers: 6
dec_nlayers: 6
nheads: 12
ar_layer: 4
ar_ffd_size: 1024
ar_hidden_size: 256
ar_nheads: 4
aligner_softmax_temp: 1.0
layer_norm_eps: 0.00001
speaker_embed_dropout: 0.05
label_smoothing: 0.0
val_check_interval: 5000
check_val_every_n_epoch: 1
precision: "fp16"
nworkers: 16
distributed: true
accelerator: "ddp"
version: null
accumulate_grad_batches: 1
use_repetition_token: true
use_repetition_gating: false
repetition_penalty: 1.0
sampling_temperature: 1.0
top_k: -1
min_top_k: 3
top_p: 0.8
sample_num: 4
length_penalty_max_length: 15000
length_penalty_max_prob: 0.95
max_input_length: 2048
max_output_length: 2000
sample_rate: 16000
n_codes: 1024
n_cluster_groups: 1
phone_context_window: 4
phoneset_size: 1000
inference:
top_k: 5
top_k: 5

View File

@@ -1,32 +1,32 @@
gpu:
n_card: 1
n_process_per_card: 2
n_card: 1
n_process_per_card: 2
io:
text_path: D:\RVC1006\GPT-SoVITS\GPT_SoVITS
save_every_n_epoch: 1
precision: 16-mixed
gradient_clip: 1.0
text_path: D:\RVC1006\GPT-SoVITS\GPT_SoVITS
save_every_n_epoch: 1
precision: 16-mixed
gradient_clip: 1.0
optimizer:
lr: 0.01
lr_init: 0.00001
lr_end: 0.0001
warmup_steps: 2000
decay_steps: 40000
lr: 0.01
lr_init: 0.00001
lr_end: 0.0001
warmup_steps: 2000
decay_steps: 40000
data:
max_eval_sample: 8
max_sec: 54
num_workers: 1
pad_val: 1024 # same with EOS in model
max_eval_sample: 8
max_sec: 54
num_workers: 1
pad_val: 1024 # same with EOS in model
model:
vocab_size: 1025
phoneme_vocab_size: 512
embedding_dim: 512
hidden_dim: 512
head: 16
linear_units: 2048
n_layer: 24
dropout: 0
EOS: 1024
random_bert: 0
vocab_size: 1025
phoneme_vocab_size: 512
embedding_dim: 512
hidden_dim: 512
head: 16
linear_units: 2048
n_layer: 24
dropout: 0
EOS: 1024
random_bert: 0
inference:
top_k: 5
top_k: 5