mirror of
https://www.modelscope.cn/speech_tts/speech_sambert-hifigan_tts_chuangirl_Sichuan_16k.git
synced 2026-04-02 18:42:52 +08:00
80 lines
2.4 KiB
YAML
80 lines
2.4 KiB
YAML
Loss:
|
|
MelReconLoss:
|
|
enable: true
|
|
params: {loss_type: mae}
|
|
ProsodyReconLoss:
|
|
enable: true
|
|
params: {loss_type: mae}
|
|
Model:
|
|
KanTtsSAMBERT:
|
|
optimizer:
|
|
params:
|
|
betas: [0.9, 0.98]
|
|
eps: 1.0e-09
|
|
lr: 0.001
|
|
weight_decay: 0.0
|
|
type: Adam
|
|
params:
|
|
MAS: false
|
|
decoder_attention_dropout: 0.1
|
|
decoder_dropout: 0.1
|
|
decoder_ffn_inner_dim: 1024
|
|
decoder_num_heads: 8
|
|
decoder_num_layers: 12
|
|
decoder_num_units: 128
|
|
decoder_prenet_units: [256, 256]
|
|
decoder_relu_dropout: 0.1
|
|
dur_pred_lstm_units: 128
|
|
dur_pred_prenet_units: [128, 128]
|
|
embedding_dim: 512
|
|
emotion_units: 32
|
|
encoder_attention_dropout: 0.1
|
|
encoder_dropout: 0.1
|
|
encoder_ffn_inner_dim: 1024
|
|
encoder_num_heads: 8
|
|
encoder_num_layers: 8
|
|
encoder_num_units: 128
|
|
encoder_projection_units: 32
|
|
encoder_relu_dropout: 0.1
|
|
max_len: 800
|
|
num_mels: 80
|
|
outputs_per_step: 3
|
|
postnet_dropout: 0.1
|
|
postnet_ffn_inner_dim: 512
|
|
postnet_filter_size: 41
|
|
postnet_fsmn_num_layers: 4
|
|
postnet_lstm_units: 128
|
|
postnet_num_memory_units: 256
|
|
postnet_shift: 17
|
|
predictor_dropout: 0.1
|
|
predictor_ffn_inner_dim: 256
|
|
predictor_filter_size: 41
|
|
predictor_fsmn_num_layers: 3
|
|
predictor_lstm_units: 128
|
|
predictor_num_memory_units: 128
|
|
predictor_shift: 0
|
|
speaker_units: 32
|
|
scheduler:
|
|
params: {warmup_steps: 4000}
|
|
type: NoamLR
|
|
allow_cache: true
|
|
audio_config: {fmax: 8000.0, fmin: 0.0, hop_length: 200, max_norm: 1.0, min_level_db: -100.0,
|
|
n_fft: 2048, n_mels: 80, norm_type: mean_std, num_workers: 16, phone_level_feature: true,
|
|
preemphasize: false, ref_level_db: 20, sampling_rate: 16000, symmetric: false, trim_silence: true,
|
|
trim_silence_threshold_db: 60, wav_normalize: true, win_length: 1000}
|
|
batch_size: 32
|
|
create_time: '2022-12-26 11:05:43'
|
|
eval_interval_steps: 10000
|
|
git_revision_hash: 388243c0c173756d1eb34783c02cec4c302cdc25
|
|
grad_norm: 1.0
|
|
linguistic_unit: {cleaners: english_cleaners, language: Sichuan, lfeat_type_list: 'sy,tone,syllable_flag,word_segment,emo_category,speaker_category',
|
|
speaker_list: F7}
|
|
log_interval_steps: 1000
|
|
model_type: sambert
|
|
num_save_intermediate_results: 4
|
|
num_workers: 4
|
|
pin_memory: false
|
|
remove_short_samples: false
|
|
save_interval_steps: 20000
|
|
train_max_steps: 1000000
|