{ "framework": "tensorflow", "task" : "text-to-speech", "model" : { "type" : "sambert-hifigan", "lang_type" : "zhcn", "sample_rate" : 16000, "am": { "cleaners":"english_cleaners", "num_mels":80, "sample_rate":16000, "frame_shift_ms":12.5, "embedding_dim":512, "encoder_n_conv_layers":3, "encoder_filters":256, "encoder_kernel_size":5, "encoder_num_layers":8, "encoder_num_units":128, "encoder_num_heads":8, "encoder_ffn_inner_dim":1024, "encoder_dropout":0.1, "encoder_attention_dropout":0.1, "encoder_relu_dropout":0.1, "encoder_projection_units":32, "predictor_filter_size":41, "predictor_fsmn_num_layers":3, "predictor_dnn_num_layers":0, "predictor_num_memory_units":128, "predictor_ffn_inner_dim":256, "predictor_dropout":0.1, "predictor_shift":0, "predictor_prenet_units":[128, 128], "predictor_lstm_units":128, "prenet_units":[256, 256], "prenet_proj_units":128, "decoder_num_layers":12, "decoder_num_units":128, "decoder_num_heads":8, "decoder_ffn_inner_dim":1024, "decoder_dropout":0.1, "decoder_attention_dropout":0.1, "decoder_relu_dropout":0.1, "outputs_per_step":3, "postnet_filter_size":41, "postnet_fsmn_num_layers":4, "postnet_dnn_num_layers":0, "postnet_num_memory_units":256, "postnet_ffn_inner_dim":512, "postnet_dropout":0.1, "postnet_shift":17, "postnet_lstm_units":128, "dur_scale":1.0, "batch_size":32, "adam_beta1":0.9, "adam_beta2":0.999, "initial_learning_rate":0.002, "decay_learning_rate":true, "use_cmudict":false, "lfeat_type_list":"sy,tone,syllable_flag,word_segment,emo_category,speaker", "guided_attention":false, "guided_attention_2g_squared":0.08, "guided_attention_loss_weight":1.0, "free_run":false, "X_band_width":40, "H_band_width":40, "max_len":900 }, "vocoder" : { "resblock": "1", "num_gpus": 1, "batch_size": 16, "learning_rate": 0.0002, "adam_b1": 0.8, "adam_b2": 0.99, "lr_decay": 0.999, "seed": 1234, "upsample_rates": [10,5,2,2], "upsample_kernel_sizes": [20,11,4,4], "upsample_initial_channel": 256, "resblock_kernel_sizes": [3,7,11], "resblock_dilation_sizes": [[1,3,5,7], [1,3,5,7], [1,3,5,7]], "segment_size": 6400, "num_mels": 80, "num_freq": 1025, "n_fft": 2048, "hop_size": 200, "win_size": 1000, "sampling_rate": 16000, "fmin": 0, "fmax": 8000, "fmax_for_loss": null, "num_workers": 4, "dist_config": { "dist_backend": "nccl", "dist_url": "tcp://localhost:54312", "world_size": 1 } } }, "pipeline": { "type": "sambert-hifigan-tts" } }