mirror of
https://www.modelscope.cn/iic/speech_sambert-hifigan_tts_zh-cn_16k.git
synced 2026-04-02 18:32:53 +08:00
Merge branch 'pytorch_am'
This commit is contained in:
@ -1,87 +1,89 @@
|
|||||||
{
|
{
|
||||||
"framework": "tensorflow",
|
"framework": "pytorch",
|
||||||
"task" : "text-to-speech",
|
"task" : "text-to-speech",
|
||||||
"model" : {
|
"model" : {
|
||||||
"type" : "sambert-hifigan",
|
"type" : "sambert-hifigan",
|
||||||
"lang_type" : "zhcn",
|
"lang_type" : "zhcn",
|
||||||
"sample_rate" : 16000,
|
"sample_rate" : 16000,
|
||||||
"am": {
|
"am": {
|
||||||
"cleaners":"english_cleaners",
|
"am": {
|
||||||
|
"max_len": 800,
|
||||||
|
|
||||||
"num_mels":80,
|
"embedding_dim": 512,
|
||||||
"sample_rate":16000,
|
"encoder_num_layers": 8,
|
||||||
"frame_shift_ms":12.5,
|
"encoder_num_heads": 8,
|
||||||
|
"encoder_num_units": 128,
|
||||||
|
"encoder_ffn_inner_dim": 1024,
|
||||||
|
"encoder_dropout": 0.1,
|
||||||
|
"encoder_attention_dropout": 0.1,
|
||||||
|
"encoder_relu_dropout": 0.1,
|
||||||
|
"encoder_projection_units": 32,
|
||||||
|
|
||||||
|
"speaker_units": 32,
|
||||||
|
"emotion_units": 32,
|
||||||
|
|
||||||
"embedding_dim":512,
|
"predictor_filter_size": 41,
|
||||||
"encoder_n_conv_layers":3,
|
"predictor_fsmn_num_layers": 3,
|
||||||
"encoder_filters":256,
|
"predictor_num_memory_units": 128,
|
||||||
"encoder_kernel_size":5,
|
"predictor_ffn_inner_dim": 256,
|
||||||
|
"predictor_dropout": 0.1,
|
||||||
|
"predictor_shift": 0,
|
||||||
|
"predictor_lstm_units": 128,
|
||||||
|
"dur_pred_prenet_units": [128, 128],
|
||||||
|
"dur_pred_lstm_units": 128,
|
||||||
|
|
||||||
"encoder_num_layers":8,
|
"decoder_prenet_units": [256, 256],
|
||||||
"encoder_num_units":128,
|
"decoder_num_layers": 12,
|
||||||
"encoder_num_heads":8,
|
"decoder_num_heads": 8,
|
||||||
"encoder_ffn_inner_dim":1024,
|
"decoder_num_units": 128,
|
||||||
"encoder_dropout":0.1,
|
"decoder_ffn_inner_dim": 1024,
|
||||||
"encoder_attention_dropout":0.1,
|
"decoder_dropout": 0.1,
|
||||||
"encoder_relu_dropout":0.1,
|
"decoder_attention_dropout": 0.1,
|
||||||
"encoder_projection_units":32,
|
"decoder_relu_dropout": 0.1,
|
||||||
|
|
||||||
"predictor_filter_size":41,
|
"outputs_per_step": 3,
|
||||||
"predictor_fsmn_num_layers":3,
|
"num_mels": 80,
|
||||||
"predictor_dnn_num_layers":0,
|
|
||||||
"predictor_num_memory_units":128,
|
|
||||||
"predictor_ffn_inner_dim":256,
|
|
||||||
"predictor_dropout":0.1,
|
|
||||||
"predictor_shift":0,
|
|
||||||
|
|
||||||
"predictor_prenet_units":[128, 128],
|
"postnet_filter_size": 41,
|
||||||
"predictor_lstm_units":128,
|
"postnet_fsmn_num_layers": 4,
|
||||||
|
"postnet_num_memory_units": 256,
|
||||||
|
"postnet_ffn_inner_dim": 512,
|
||||||
|
"postnet_dropout": 0.1,
|
||||||
|
"postnet_shift": 17,
|
||||||
|
"postnet_lstm_units": 128
|
||||||
|
|
||||||
"prenet_units":[256, 256],
|
},
|
||||||
"prenet_proj_units":128,
|
|
||||||
|
|
||||||
"decoder_num_layers":12,
|
"audio": {
|
||||||
"decoder_num_units":128,
|
"frame_shift_ms": 12.5
|
||||||
"decoder_num_heads":8,
|
},
|
||||||
"decoder_ffn_inner_dim":1024,
|
|
||||||
"decoder_dropout":0.1,
|
|
||||||
"decoder_attention_dropout":0.1,
|
|
||||||
"decoder_relu_dropout":0.1,
|
|
||||||
|
|
||||||
"outputs_per_step":3,
|
"linguistic_unit": {
|
||||||
|
"cleaners": "english_cleaners",
|
||||||
|
"lfeat_type_list": "sy,tone,syllable_flag,word_segment,emo_category,speaker_category",
|
||||||
|
"sy": "dict/sy_dict.txt",
|
||||||
|
"tone": "dict/tone_dict.txt",
|
||||||
|
"syllable_flag": "dict/syllable_flag_dict.txt",
|
||||||
|
"word_segment": "dict/word_segment_dict.txt",
|
||||||
|
"emo_category": "dict/emo_category_dict.txt",
|
||||||
|
"speaker_category": "dict/speaker_dict.txt"
|
||||||
|
},
|
||||||
|
|
||||||
"postnet_filter_size":41,
|
"num_gpus": 1,
|
||||||
"postnet_fsmn_num_layers":4,
|
"batch_size": 32,
|
||||||
"postnet_dnn_num_layers":0,
|
"group_size": 1024,
|
||||||
"postnet_num_memory_units":256,
|
"learning_rate": 0.001,
|
||||||
"postnet_ffn_inner_dim":512,
|
"adam_b1": 0.9,
|
||||||
"postnet_dropout":0.1,
|
"adam_b2": 0.98,
|
||||||
"postnet_shift":17,
|
"seed": 1234,
|
||||||
"postnet_lstm_units":128,
|
|
||||||
|
|
||||||
|
"num_workers": 4,
|
||||||
|
|
||||||
"dur_scale":1.0,
|
"dist_config": {
|
||||||
|
"dist_backend": "nccl",
|
||||||
"batch_size":32,
|
"dist_url": "tcp://localhost:11111",
|
||||||
"adam_beta1":0.9,
|
"world_size": 1
|
||||||
"adam_beta2":0.999,
|
}
|
||||||
"initial_learning_rate":0.002,
|
|
||||||
"decay_learning_rate":true,
|
|
||||||
"use_cmudict":false,
|
|
||||||
|
|
||||||
"lfeat_type_list":"sy,tone,syllable_flag,word_segment,emo_category,speaker",
|
|
||||||
|
|
||||||
"guided_attention":false,
|
|
||||||
"guided_attention_2g_squared":0.08,
|
|
||||||
"guided_attention_loss_weight":1.0,
|
|
||||||
|
|
||||||
"free_run":false,
|
|
||||||
|
|
||||||
"X_band_width":40,
|
|
||||||
"H_band_width":40,
|
|
||||||
|
|
||||||
"max_len":900
|
|
||||||
},
|
},
|
||||||
"vocoder" : {
|
"vocoder" : {
|
||||||
"resblock": "1",
|
"resblock": "1",
|
||||||
|
|||||||
BIN
voices.zip
(Stored with Git LFS)
BIN
voices.zip
(Stored with Git LFS)
Binary file not shown.
Reference in New Issue
Block a user