Merge branch 'pytorch_am'

2026-05-19 21:22:53 +08:00 · 2022-10-17 19:52:46 +08:00
parent 6aa160ec99 d64726031f
commit 156431071b
2 changed files with 69 additions and 67 deletions
--- a/configuration.json
+++ b/configuration.json
@ -1,87 +1,89 @@
 {
-  "framework": "tensorflow",
+  "framework": "pytorch",
  "task" : "text-to-speech",
  "model" : {
    "type" : "sambert-hifigan",
    "lang_type" : "zhcn",
    "sample_rate" : 16000,
    "am": {
-      "cleaners":"english_cleaners",
+      "am": {
-
+        "max_len": 800,
      "num_mels":80,
      "sample_rate":16000,
      "frame_shift_ms":12.5,
        "embedding_dim": 512, 
      "encoder_n_conv_layers":3,
      "encoder_filters":256,
      "encoder_kernel_size":5,
        "encoder_num_layers": 8,
      "encoder_num_units":128,
        "encoder_num_heads": 8,
        "encoder_num_units": 128,
        "encoder_ffn_inner_dim": 1024,
        "encoder_dropout": 0.1,
        "encoder_attention_dropout": 0.1,
        "encoder_relu_dropout": 0.1,
        "encoder_projection_units": 32,
        "speaker_units": 32,
        "emotion_units": 32,
        "predictor_filter_size": 41,
        "predictor_fsmn_num_layers": 3,
      "predictor_dnn_num_layers":0,
        "predictor_num_memory_units": 128,
        "predictor_ffn_inner_dim": 256,
        "predictor_dropout": 0.1,
        "predictor_shift": 0,
      "predictor_prenet_units":[128, 128],
        "predictor_lstm_units": 128,
        "dur_pred_prenet_units": [128, 128],
        "dur_pred_lstm_units": 128,
-      "prenet_units":[256, 256],
+        "decoder_prenet_units": [256, 256],
      "prenet_proj_units":128,
        "decoder_num_layers": 12,
      "decoder_num_units":128,
        "decoder_num_heads": 8,
        "decoder_num_units": 128,
        "decoder_ffn_inner_dim": 1024,
        "decoder_dropout": 0.1,
        "decoder_attention_dropout": 0.1,
        "decoder_relu_dropout": 0.1,
        "outputs_per_step": 3,
        "num_mels": 80,
        "postnet_filter_size": 41,
        "postnet_fsmn_num_layers": 4,
      "postnet_dnn_num_layers":0,
        "postnet_num_memory_units": 256,
        "postnet_ffn_inner_dim": 512,
        "postnet_dropout": 0.1,
        "postnet_shift": 17,
-      "postnet_lstm_units":128, 
+        "postnet_lstm_units": 128
      },
-      "dur_scale":1.0,
+      "audio": {
          "frame_shift_ms": 12.5
      },
      "linguistic_unit": {
        "cleaners": "english_cleaners",
        "lfeat_type_list": "sy,tone,syllable_flag,word_segment,emo_category,speaker_category",
        "sy": "dict/sy_dict.txt",
        "tone": "dict/tone_dict.txt",
        "syllable_flag": "dict/syllable_flag_dict.txt",
        "word_segment": "dict/word_segment_dict.txt",
        "emo_category": "dict/emo_category_dict.txt",
        "speaker_category": "dict/speaker_dict.txt"
      },
      "num_gpus": 1,
      "batch_size": 32,
-      "adam_beta1":0.9,
+      "group_size": 1024,
-      "adam_beta2":0.999,
+      "learning_rate": 0.001,
-      "initial_learning_rate":0.002,
+      "adam_b1": 0.9,
-      "decay_learning_rate":true,
+      "adam_b2": 0.98,
-      "use_cmudict":false,                 
+      "seed": 1234,
-      "lfeat_type_list":"sy,tone,syllable_flag,word_segment,emo_category,speaker",
+      "num_workers": 4,
-      "guided_attention":false,
+      "dist_config": {
-      "guided_attention_2g_squared":0.08,
+          "dist_backend": "nccl",
-      "guided_attention_loss_weight":1.0,
+          "dist_url": "tcp://localhost:11111",
-
+          "world_size": 1
-      "free_run":false,
+      }
      "X_band_width":40,
      "H_band_width":40,
      "max_len":900
    },
    "vocoder" : {
      "resblock": "1",
--- a/voices.zip
+++ b/voices.zip