update

2026-04-03 02:42:54 +08:00 · 2022-12-14 16:34:44 +08:00
parent 517f2712ec
commit d551729afa
53 changed files with 2136 additions and 0 deletions
--- a/voices/zhibei_emo/am/config.yaml
+++ b/voices/zhibei_emo/am/config.yaml
@ -0,0 +1,105 @@
+model_type: sambert
+Model:
+#########################################################
+#         SAMBERT NETWORK ARCHITECTURE SETTING          #
+#########################################################
+  KanTtsSAMBERT:
+    params:
+        max_len: 800
+
+        embedding_dim: 512 
+        encoder_num_layers: 8
+        encoder_num_heads: 8
+        encoder_num_units: 128
+        encoder_ffn_inner_dim: 1024
+        encoder_dropout: 0.1
+        encoder_attention_dropout: 0.1
+        encoder_relu_dropout: 0.1
+        encoder_projection_units: 32
+
+        speaker_units: 32
+        emotion_units: 32
+
+        predictor_filter_size: 41
+        predictor_fsmn_num_layers: 3
+        predictor_num_memory_units: 128
+        predictor_ffn_inner_dim: 256
+        predictor_dropout: 0.1
+        predictor_shift: 0
+        predictor_lstm_units: 128
+        dur_pred_prenet_units: [128, 128]
+        dur_pred_lstm_units: 128
+
+        decoder_prenet_units: [256, 256]
+        decoder_num_layers: 12
+        decoder_num_heads: 8
+        decoder_num_units: 128
+        decoder_ffn_inner_dim: 1024
+        decoder_dropout: 0.1
+        decoder_attention_dropout: 0.1
+        decoder_relu_dropout: 0.1
+
+        outputs_per_step: 3
+        num_mels: 80
+
+        postnet_filter_size: 41
+        postnet_fsmn_num_layers: 4
+        postnet_num_memory_units: 256
+        postnet_ffn_inner_dim: 512
+        postnet_dropout: 0.1
+        postnet_shift: 17
+        postnet_lstm_units: 128
+        MAS: False
+
+    optimizer:
+      type: Adam
+      params:
+        lr: 0.001
+        betas: [0.9, 0.98]
+        eps: 1.0e-9
+        weight_decay: 0.0
+    scheduler:
+      type: NoamLR
+      params:
+        warmup_steps: 4000
+
+linguistic_unit: 
+  cleaners: english_cleaners
+  lfeat_type_list: sy,tone,syllable_flag,word_segment,emo_category,speaker_category
+  speaker_list: F7,F74,FBYN,FRXL,M7,xiaoyu
+####################################################
+#                   LOSS SETTING                   #
+####################################################
+Loss:
+  MelReconLoss:
+    enable: True
+    params:
+      loss_type: mae
+
+  ProsodyReconLoss:
+    enable: True
+    params:
+      loss_type: mae
+
+###########################################################
+#                  DATA LOADER SETTING                    #
+###########################################################
+batch_size: 32              
+pin_memory: False            
+num_workers: 4 # FIXME: set > 0 may stuck on macos              
+remove_short_samples: False 
+allow_cache: True           
+grad_norm: 1.0
+
+###########################################################
+#                    INTERVAL SETTING                     #
+###########################################################
+train_max_steps: 1000000           # Number of training steps.
+save_interval_steps: 20000         # Interval steps to save checkpoint.
+eval_interval_steps: 10000          # Interval steps to evaluate the network.
+log_interval_steps: 1000            # Interval steps to record the training log.
+
+###########################################################
+#                     OTHER SETTING                       #
+###########################################################
+num_save_intermediate_results: 4  # Number of results to be saved as intermediate results.