This commit is contained in:
jiaqi.sjq
2022-12-14 16:34:44 +08:00
parent 517f2712ec
commit d551729afa
53 changed files with 2136 additions and 0 deletions

BIN
voices/zhizhe_emo/am/ckpt/checkpoint_0.pth (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,105 @@
model_type: sambert
Model:
#########################################################
# SAMBERT NETWORK ARCHITECTURE SETTING #
#########################################################
KanTtsSAMBERT:
params:
max_len: 800
embedding_dim: 512
encoder_num_layers: 8
encoder_num_heads: 8
encoder_num_units: 128
encoder_ffn_inner_dim: 1024
encoder_dropout: 0.1
encoder_attention_dropout: 0.1
encoder_relu_dropout: 0.1
encoder_projection_units: 32
speaker_units: 32
emotion_units: 32
predictor_filter_size: 41
predictor_fsmn_num_layers: 3
predictor_num_memory_units: 128
predictor_ffn_inner_dim: 256
predictor_dropout: 0.1
predictor_shift: 0
predictor_lstm_units: 128
dur_pred_prenet_units: [128, 128]
dur_pred_lstm_units: 128
decoder_prenet_units: [256, 256]
decoder_num_layers: 12
decoder_num_heads: 8
decoder_num_units: 128
decoder_ffn_inner_dim: 1024
decoder_dropout: 0.1
decoder_attention_dropout: 0.1
decoder_relu_dropout: 0.1
outputs_per_step: 3
num_mels: 80
postnet_filter_size: 41
postnet_fsmn_num_layers: 4
postnet_num_memory_units: 256
postnet_ffn_inner_dim: 512
postnet_dropout: 0.1
postnet_shift: 17
postnet_lstm_units: 128
MAS: False
optimizer:
type: Adam
params:
lr: 0.001
betas: [0.9, 0.98]
eps: 1.0e-9
weight_decay: 0.0
scheduler:
type: NoamLR
params:
warmup_steps: 4000
linguistic_unit:
cleaners: english_cleaners
lfeat_type_list: sy,tone,syllable_flag,word_segment,emo_category,speaker_category
speaker_list: F7,F74,FBYN,FRXL,M7,xiaoyu
####################################################
# LOSS SETTING #
####################################################
Loss:
MelReconLoss:
enable: True
params:
loss_type: mae
ProsodyReconLoss:
enable: True
params:
loss_type: mae
###########################################################
# DATA LOADER SETTING #
###########################################################
batch_size: 32
pin_memory: False
num_workers: 4 # FIXME: set > 0 may stuck on macos
remove_short_samples: False
allow_cache: True
grad_norm: 1.0
###########################################################
# INTERVAL SETTING #
###########################################################
train_max_steps: 1000000 # Number of training steps.
save_interval_steps: 20000 # Interval steps to save checkpoint.
eval_interval_steps: 10000 # Interval steps to evaluate the network.
log_interval_steps: 1000 # Interval steps to record the training log.
###########################################################
# OTHER SETTING #
###########################################################
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.

BIN
voices/zhizhe_emo/am/pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.