From 9ccdc580f304710734f3a86a90b43c35361b178f Mon Sep 17 00:00:00 2001 From: "jiaqi.sjq" Date: Wed, 4 Jan 2023 15:49:38 +0800 Subject: [PATCH] update --- configuration.json | 123 +-- voices.zip | 3 - voices/F7/am/ckpt/checkpoint_980000.pth | 3 + voices/F7/am/config.yaml | 79 ++ voices/F7/audio_config.yaml | 27 + voices/F7/dict/Sichuan/En2ChPhoneMap.txt | 2 + voices/F7/dict/Sichuan/PhoneSet.xml | 984 +++++++++++++++++++++++ voices/F7/dict/Sichuan/PosSet.xml | 147 ++++ voices/F7/dict/Sichuan/py2phoneMap.txt | 551 +++++++++++++ voices/F7/dict/Sichuan/tonelist.txt | 7 + voices/F7/voc/ckpt/checkpoint_340000.pth | 3 + voices/F7/voc/config.yaml | 131 +++ voices/voices.json | 5 + 13 files changed, 1942 insertions(+), 123 deletions(-) delete mode 100644 voices.zip create mode 100644 voices/F7/am/ckpt/checkpoint_980000.pth create mode 100644 voices/F7/am/config.yaml create mode 100644 voices/F7/audio_config.yaml create mode 100644 voices/F7/dict/Sichuan/En2ChPhoneMap.txt create mode 100644 voices/F7/dict/Sichuan/PhoneSet.xml create mode 100644 voices/F7/dict/Sichuan/PosSet.xml create mode 100644 voices/F7/dict/Sichuan/py2phoneMap.txt create mode 100644 voices/F7/dict/Sichuan/tonelist.txt create mode 100644 voices/F7/voc/ckpt/checkpoint_340000.pth create mode 100644 voices/F7/voc/config.yaml create mode 100644 voices/voices.json diff --git a/configuration.json b/configuration.json index c8a1fc5..19569e2 100644 --- a/configuration.json +++ b/configuration.json @@ -1,127 +1,10 @@ { - "framework": "Tensorflow", + "framework": "pytorch", "task" : "text-to-speech", "model" : { "type" : "sambert-hifigan", - "lang_type" : "zhcn", - "sample_rate" : 16000, - "am": { - "am": { - "max_len": 800, - - "embedding_dim": 512, - "encoder_num_layers": 8, - "encoder_num_heads": 8, - "encoder_num_units": 128, - "encoder_ffn_inner_dim": 1024, - "encoder_dropout": 0.1, - "encoder_attention_dropout": 0.1, - "encoder_relu_dropout": 0.1, - "encoder_projection_units": 32, - - "speaker_units": 32, - "emotion_units": 32, - - "predictor_filter_size": 41, - "predictor_fsmn_num_layers": 3, - "predictor_num_memory_units": 128, - "predictor_ffn_inner_dim": 256, - "predictor_dropout": 0.1, - "predictor_shift": 0, - "predictor_lstm_units": 128, - "dur_pred_prenet_units": [128, 128], - "dur_pred_lstm_units": 128, - - "decoder_prenet_units": [256, 256], - "decoder_num_layers": 12, - "decoder_num_heads": 8, - "decoder_num_units": 128, - "decoder_ffn_inner_dim": 1024, - "decoder_dropout": 0.1, - "decoder_attention_dropout": 0.1, - "decoder_relu_dropout": 0.1, - - "outputs_per_step": 3, - "num_mels": 80, - - "postnet_filter_size": 41, - "postnet_fsmn_num_layers": 4, - "postnet_num_memory_units": 256, - "postnet_ffn_inner_dim": 512, - "postnet_dropout": 0.1, - "postnet_shift": 17, - "postnet_lstm_units": 128 - }, - - "audio": { - "frame_shift_ms": 12.5 - }, - - "linguistic_unit": { - "cleaners": "english_cleaners", - "lfeat_type_list": "sy,tone,syllable_flag,word_segment,emo_category,speaker_category", - "sy": "dict/sy_dict.txt", - "tone": "dict/tone_dict.txt", - "syllable_flag": "dict/syllable_flag_dict.txt", - "word_segment": "dict/word_segment_dict.txt", - "emo_category": "dict/emo_category_dict.txt", - "speaker_category": "dict/speaker_dict.txt" - }, - - "num_gpus": 1, - "batch_size": 32, - "group_size": 1024, - "learning_rate": 0.001, - "adam_b1": 0.9, - "adam_b2": 0.98, - "seed": 1234, - - "num_workers": 4, - - "dist_config": { - "dist_backend": "nccl", - "dist_url": "tcp://localhost:11111", - "world_size": 1 - } - - }, - "vocoder" : { - "resblock": "1", - "num_gpus": 1, - "batch_size": 16, - "learning_rate": 0.0002, - "adam_b1": 0.8, - "adam_b2": 0.99, - "lr_decay": 0.999, - "seed": 1234, - - "upsample_rates": [10,5,2,2], - "upsample_kernel_sizes": [20,10,4,4], - "upsample_initial_channel": 256, - "resblock_kernel_sizes": [3,7,11], - "resblock_dilation_sizes": [[1,3,5,7], [1,3,5,7], [1,3,5,7]], - - "segment_size": 6400, - "num_mels": 80, - "num_freq": 1025, - "n_fft": 2048, - "hop_size": 200, - "win_size": 1000, - - "sampling_rate": 16000, - - "fmin": 0, - "fmax": 8000, - "fmax_for_loss": null, - - "num_workers": 4, - - "dist_config": { - "dist_backend": "nccl", - "dist_url": "tcp://localhost:54312", - "world_size": 1 - } - } + "lang_type" : "sichuan", + "sample_rate" : 16000 }, "pipeline": { "type": "sambert-hifigan-tts" diff --git a/voices.zip b/voices.zip deleted file mode 100644 index 7c5f091..0000000 --- a/voices.zip +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4e198a316c6235d15286639fbec1a73668d9957a46279e4e39661c69d6a9f41f -size 89586408 diff --git a/voices/F7/am/ckpt/checkpoint_980000.pth b/voices/F7/am/ckpt/checkpoint_980000.pth new file mode 100644 index 0000000..a7feb62 --- /dev/null +++ b/voices/F7/am/ckpt/checkpoint_980000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc4e9e6baa9a4d1db663183003ff568a28d0e89c05b296e4a83ad4ca7102b36 +size 149428316 diff --git a/voices/F7/am/config.yaml b/voices/F7/am/config.yaml new file mode 100644 index 0000000..48becac --- /dev/null +++ b/voices/F7/am/config.yaml @@ -0,0 +1,79 @@ +Loss: + MelReconLoss: + enable: true + params: {loss_type: mae} + ProsodyReconLoss: + enable: true + params: {loss_type: mae} +Model: + KanTtsSAMBERT: + optimizer: + params: + betas: [0.9, 0.98] + eps: 1.0e-09 + lr: 0.001 + weight_decay: 0.0 + type: Adam + params: + MAS: false + decoder_attention_dropout: 0.1 + decoder_dropout: 0.1 + decoder_ffn_inner_dim: 1024 + decoder_num_heads: 8 + decoder_num_layers: 12 + decoder_num_units: 128 + decoder_prenet_units: [256, 256] + decoder_relu_dropout: 0.1 + dur_pred_lstm_units: 128 + dur_pred_prenet_units: [128, 128] + embedding_dim: 512 + emotion_units: 32 + encoder_attention_dropout: 0.1 + encoder_dropout: 0.1 + encoder_ffn_inner_dim: 1024 + encoder_num_heads: 8 + encoder_num_layers: 8 + encoder_num_units: 128 + encoder_projection_units: 32 + encoder_relu_dropout: 0.1 + max_len: 800 + num_mels: 80 + outputs_per_step: 3 + postnet_dropout: 0.1 + postnet_ffn_inner_dim: 512 + postnet_filter_size: 41 + postnet_fsmn_num_layers: 4 + postnet_lstm_units: 128 + postnet_num_memory_units: 256 + postnet_shift: 17 + predictor_dropout: 0.1 + predictor_ffn_inner_dim: 256 + predictor_filter_size: 41 + predictor_fsmn_num_layers: 3 + predictor_lstm_units: 128 + predictor_num_memory_units: 128 + predictor_shift: 0 + speaker_units: 32 + scheduler: + params: {warmup_steps: 4000} + type: NoamLR +allow_cache: true +audio_config: {fmax: 8000.0, fmin: 0.0, hop_length: 200, max_norm: 1.0, min_level_db: -100.0, + n_fft: 2048, n_mels: 80, norm_type: mean_std, num_workers: 16, phone_level_feature: true, + preemphasize: false, ref_level_db: 20, sampling_rate: 16000, symmetric: false, trim_silence: true, + trim_silence_threshold_db: 60, wav_normalize: true, win_length: 1000} +batch_size: 32 +create_time: '2022-12-26 11:05:43' +eval_interval_steps: 10000 +git_revision_hash: 388243c0c173756d1eb34783c02cec4c302cdc25 +grad_norm: 1.0 +linguistic_unit: {cleaners: english_cleaners, language: Sichuan, lfeat_type_list: 'sy,tone,syllable_flag,word_segment,emo_category,speaker_category', + speaker_list: F7} +log_interval_steps: 1000 +model_type: sambert +num_save_intermediate_results: 4 +num_workers: 4 +pin_memory: false +remove_short_samples: false +save_interval_steps: 20000 +train_max_steps: 1000000 diff --git a/voices/F7/audio_config.yaml b/voices/F7/audio_config.yaml new file mode 100644 index 0000000..233817c --- /dev/null +++ b/voices/F7/audio_config.yaml @@ -0,0 +1,27 @@ +# Audio processing configs + +audio_config: + # Preprocess + wav_normalize: True + trim_silence: True + trim_silence_threshold_db: 60 + preemphasize: False + + # Feature extraction + sampling_rate: 16000 + hop_length: 200 + win_length: 1000 + n_fft: 2048 + n_mels: 80 + fmin: 0.0 + fmax: 8000.0 + phone_level_feature: True + + # Normalization + norm_type: "mean_std" # "mean_std" or "global" + max_norm: 1.0 + symmetric: False + min_level_db: -100.0 + ref_level_db: 20 + + num_workers: 16 diff --git a/voices/F7/dict/Sichuan/En2ChPhoneMap.txt b/voices/F7/dict/Sichuan/En2ChPhoneMap.txt new file mode 100644 index 0000000..db04f11 --- /dev/null +++ b/voices/F7/dict/Sichuan/En2ChPhoneMap.txt @@ -0,0 +1,2 @@ +wu w +yi y diff --git a/voices/F7/dict/Sichuan/PhoneSet.xml b/voices/F7/dict/Sichuan/PhoneSet.xml new file mode 100644 index 0000000..3d3b763 --- /dev/null +++ b/voices/F7/dict/Sichuan/PhoneSet.xml @@ -0,0 +1,984 @@ + + + + 0 + a_c + vowel + final + voiced + low + open + + + 1 + ai_c + vowel + final + voiced + low + open + + + 2 + an_c + vowel + final + voiced + low + open + + + 3 + ang_c + vowel + final + voiced + low + open + + + 4 + ao_c + vowel + final + voiced + low + open + + + 5 + b_c + vowel + initial + unvoiced + low + open + + + 6 + c_c + vowel + initial + unvoiced + low + open + + + 7 + ch_c + vowel + initial + unvoiced + low + open + + + 8 + d_c + vowel + initial + unvoiced + low + open + + + 9 + e_c + vowel + final + voiced + low + open + + + 10 + ei_c + vowel + final + voiced + low + open + + + 11 + en_c + vowel + final + voiced + low + open + + + 12 + eng_c + vowel + final + voiced + doublelips + stop + + + 13 + er_c + vowel + final + voiced + doublelips + stop + + + 14 + f_c + vowel + initial + unvoiced + doublelips + stop + + + 15 + g_c + vowel + initial + unvoiced + fronttongue + affricative + + + 16 + h_c + vowel + initial + unvoiced + backtongue + affricative + + + 17 + i_c + vowel + final + voiced + backtongue + affricative + + + 18 + ia_c + vowel + final + voiced + fronttongue + affricative + + + 19 + ian_c + vowel + final + voiced + centraltongue + stop + + + 20 + iang_c + vowel + final + voiced + centraltongue + stop + + + 21 + iao_c + vowel + final + voiced + centraltongue + stop + + + 22 + ie_c + vowel + final + voiced + middle + open + + + 23 + ih_c + vowel + final + voiced + middle + open + + + 24 + ii_c + vowel + final + voiced + middle + open + + + 25 + in_c + vowel + final + voiced + middle + open + + + 26 + ing_c + vowel + final + voiced + middle + open + + + 27 + ioo_c + vowel + final + voiced + middle + open + + + 28 + iong_c + vowel + final + voiced + middle + open + + + 29 + iou_c + vowel + final + voiced + middle + open + + + 30 + j_c + vowel + initial + unvoiced + middle + open + + + 31 + k_c + vowel + initial + unvoiced + middle + open + + + 32 + l_c + vowel + initial + voiced + middle + open + + + 33 + m_c + vowel + initial + voiced + middle + open + + + 34 + n_c + vowel + initial + voiced + middle + open + + + 35 + o_c + vowel + final + voiced + middle + open + + + 36 + ong_c + vowel + final + voiced + middle + open + + + 37 + ou_c + vowel + final + voiced + middle + open + + + 38 + p_c + vowel + initial + unvoiced + liptooth + fricative + + + 39 + q_c + vowel + initial + unvoiced + liptooth + fricative + + + 40 + r_c + vowel + initial + voiced + velar + stop + + + 41 + s_c + vowel + initial + unvoiced + low + open + + + 42 + sh_c + vowel + initial + unvoiced + middle + open + + + 43 + t_c + vowel + initial + unvoiced + middle + open + + + 44 + u_c + vowel + final + voiced + velar + stop + + + 45 + ua_c + vowel + final + voiced + velar + fricative + + + 46 + uai_c + vowel + final + voiced + velar + fricative + + + 47 + uan_c + vowel + final + voiced + high + close + + + 48 + uang_c + vowel + final + voiced + high + close + + + 49 + uei_c + vowel + final + voiced + high + close + + + 50 + uen_c + vowel + final + voiced + high + open + + + 51 + ueng_c + vowel + final + voiced + high + open + + + 52 + uo_c + vowel + final + voiced + high + open + + + 53 + v_c + vowel + final + voiced + high + open + + + 54 + van_c + vowel + final + voiced + high + open + + + 55 + ve_c + vowel + final + voiced + high + open + + + 56 + vn_c + vowel + final + voiced + high + open + + + 57 + xx_c + vowel + initial + unvoiced + high + close + + + 58 + z_c + vowel + initial + unvoiced + high + close + + + 59 + zh_c + vowel + initial + unvoiced + high + close + + + 60 + w_c + consonant + initial + unvoiced + high + close + + + 61 + y_c + consonant + initial + unvoiced + high + close + + + 62 + ng_c + consonant + initial + voiced + centraltongue + lateral + + + 63 + iai_c + consonant + final + voiced + centraltongue + lateral + + + 64 + io_c + consonant + final + voiced + centraltongue + lateral + + + 65 + ue_c + vowel + final + voiced + fronttongue + affricative + + + 66 + ga + consonant + initial + voiced + centraltongue + lateral + + + 67 + ge + consonant + initial + voiced + centraltongue + lateral + + + 68 + go + consonant + initial + voiced + centraltongue + lateral + + + 69 + aa + vowel + final + voiced + fronttongue + affricative + + + 70 + ae + vowel + final + voiced + fronttongue + affricative + + + 71 + ah + vowel + final + voiced + fronttongue + affricative + + + 72 + ao + vowel + final + voiced + fronttongue + affricative + + + 73 + aw + vowel + final + voiced + fronttongue + affricative + + + 74 + ay + vowel + final + voiced + fronttongue + affricative + + + 75 + b + consonant + initial + unvoiced + doublelips + stop + + + 76 + ch + consonant + initial + unvoiced + backtongue + affricative + + + 77 + d + consonant + initial + unvoiced + centraltongue + stop + + + 78 + dh + consonant + initial + unvoiced + fronttongue + affricative + + + 79 + eh + vowel + final + voiced + fronttongue + affricative + + + 80 + er + vowel + final + voiced + fronttongue + affricative + + + 81 + ey + vowel + final + voiced + fronttongue + affricative + + + 82 + f + consonant + initial + unvoiced + liptooth + fricative + + + 83 + g + consonant + initial + unvoiced + velar + stop + + + 84 + hh + consonant + initial + unvoiced + fronttongue + affricative + + + 85 + ih + vowel + final + voiced + fronttongue + affricative + + + 86 + iy + vowel + final + voiced + fronttongue + affricative + + + 87 + jh + consonant + initial + unvoiced + fronttongue + affricative + + + 88 + k + consonant + initial + unvoiced + velar + stop + + + 89 + l + consonant + initial + voiced + centraltongue + lateral + + + 90 + m + consonant + initial + voiced + doublelips + nasal + + + 91 + n + consonant + initial + voiced + centraltongue + nasal + + + 92 + ng + consonant + final + voiced + fronttongue + affricative + + + 93 + ow + vowel + final + voiced + fronttongue + affricative + + + 94 + oy + vowel + final + voiced + fronttongue + affricative + + + 95 + p + consonant + initial + unvoiced + doublelips + stop + + + 96 + r + consonant + initial + voiced + backtongue + fricative + + + 97 + s + consonant + initial + unvoiced + fronttongue + fricative + + + 98 + sh + consonant + initial + unvoiced + backtongue + fricative + + + 99 + t + consonant + initial + unvoiced + centraltongue + stop + + + 100 + th + consonant + initial + unvoiced + fronttongue + affricative + + + 101 + uh + vowel + final + voiced + fronttongue + affricative + + + 102 + uw + vowel + final + voiced + fronttongue + affricative + + + 103 + v + consonant + initial + unvoiced + fronttongue + affricative + + + 104 + w + consonant + initial + unvoiced + fronttongue + affricative + + + 105 + y + consonant + final + voiced + fronttongue + affricative + + + 106 + z + consonant + initial + unvoiced + fronttongue + affricative + + + 107 + zh + consonant + initial + unvoiced + backtongue + affricative + + + 146 + pau + consonant + initial + unvoiced + high + close + + diff --git a/voices/F7/dict/Sichuan/PosSet.xml b/voices/F7/dict/Sichuan/PosSet.xml new file mode 100644 index 0000000..92d12dd --- /dev/null +++ b/voices/F7/dict/Sichuan/PosSet.xml @@ -0,0 +1,147 @@ + + + + 1 + a + todo + + + 2 + b + todo + + + 3 + c + todo + + + 4 + d + todo + + + 5 + e + todo + + + 6 + f + todo + + + 7 + g + todo + + + 8 + gb + todo + + + + + 9 + h + todo + + + 10 + i + todo + + + 11 + j + todo + + + 12 + k + todo + + + 13 + l + todo + + + 14 + m + todo + + + 15 + n + todo + + + 16 + nz + todo + + + + + 17 + o + todo + + + 18 + p + todo + + + 19 + q + todo + + + 20 + r + todo + + + 21 + s + todo + + + 22 + t + todo + + + 23 + u + todo + + + 24 + v + todo + + + 25 + w + todo + + + 26 + x + todo + + + 27 + y + todo + + + 28 + z + todo + + diff --git a/voices/F7/dict/Sichuan/py2phoneMap.txt b/voices/F7/dict/Sichuan/py2phoneMap.txt new file mode 100644 index 0000000..bb0b671 --- /dev/null +++ b/voices/F7/dict/Sichuan/py2phoneMap.txt @@ -0,0 +1,551 @@ +a ga a_c +ai ga ai_c +an ga an_c +ao ga ao_c +e ge e_c +er ge er_c +o go o_c +ong go ong_c +ba b_c a_c +bai b_c ai_c +ban b_c an_c +bang b_c ang_c +bao b_c ao_c +be b_c e_c +bei b_c ei_c +ben b_c en_c +bi b_c i_c +bia b_c ia_c +bian b_c ian_c +biao b_c iao_c +bie b_c ie_c +bin b_c in_c +bo b_c o_c +bong b_c ong_c +bu b_c u_c +ca c_c a_c +cai c_c ai_c +can c_c an_c +cang c_c ang_c +cao c_c ao_c +ce c_c e_c +cen c_c en_c +ceng c_c eng_c +ci c_c ii_c +co c_c o_c +cong c_c ong_c +cou c_c ou_c +cu c_c u_c +cuai c_c uai_c +cuan c_c uan_c +cuang c_c uang_c +cui c_c uei_c +cun c_c uen_c +da d_c a_c +dai d_c ai_c +dan d_c an_c +dang d_c ang_c +dao d_c ao_c +de d_c e_c +dei d_c ei_c +den d_c en_c +deng d_c eng_c +di d_c i_c +dian d_c ian_c +diao d_c iao_c +die d_c ie_c +din d_c in_c +ding d_c ing_c +diu d_c iou_c +do d_c o_c +dong d_c ong_c +dou d_c ou_c +du d_c u_c +duan d_c uan_c +dui d_c uei_c +dun d_c uen_c +fa f_c a_c +fai f_c ai_c +fan f_c an_c +fang f_c ang_c +fei f_c ei_c +fen f_c en_c +feng f_c eng_c +fong f_c ong_c +fu f_c u_c +ga g_c a_c +gai g_c ai_c +gan g_c an_c +gang g_c ang_c +gao g_c ao_c +gua g_c ua_c +ge g_c e_c +gen g_c en_c +go g_c o_c +gon g_c iai_c +gong g_c ong_c +gou g_c ou_c +gu g_c u_c +guai g_c uai_c +guan g_c uan_c +guang g_c uang_c +gui g_c uei_c +gun g_c uen_c +ha h_c a_c +hai h_c ai_c +han h_c an_c +hang h_c ang_c +hao h_c ao_c +he h_c e_c +hen h_c en_c +ho h_c o_c +hong h_c ong_c +hou h_c ou_c +hu h_c u_c +hua h_c ua_c +huai h_c uai_c +huan h_c uan_c +huang h_c uang_c +hui h_c uei_c +hun h_c uen_c +huo h_c uo_c +ji j_c i_c +jia j_c ia_c +jiai j_c ia_c +jian j_c ian_c +jiang j_c iang_c +jiao j_c iao_c +jie j_c ie_c +jin j_c in_c +jiu j_c iou_c +ju j_c u_c +juan j_c van_c +jue j_c ve_c +juo j_c uo_c +ka k_c a_c +kai k_c ai_c +kan k_c an_c +kang k_c ang_c +kao k_c ao_c +ke k_c e_c +ken k_c en_c +ko k_c o_c +kong k_c ong_c +kou k_c ou_c +ku k_c u_c +kua k_c ua_c +kuai k_c uai_c +kuan k_c uan_c +kuang k_c uang_c +kue k_c ve_c +kui k_c uei_c +kun k_c uen_c +la l_c a_c +na n_c a_c +lai l_c ai_c +nai n_c ai_c +lan l_c an_c +nan n_c an_c +lang l_c ang_c +nang n_c ang_c +lao l_c ao_c +nao n_c ao_c +len l_c en_c +nen n_c en_c +li l_c i_c +ni n_c i_c +lian l_c ian_c +nian n_c ian_c +liang l_c iang_c +niang n_c iang_c +liao l_c iao_c +niao n_c iao_c +lie l_c ie_c +nie n_c ie_c +lin l_c in_c +nin n_c in_c +liu l_c iou_c +niu n_c iou_c +lo l_c o_c +no n_c o_c +long l_c ong_c +nong n_c ong_c +lou l_c ou_c +nou n_c ou_c +lu l_c u_c +nu n_c u_c +luan l_c uan_c +nuan n_c uan_c +lue l_c ve_c +nue n_c ve_c +lui l_c uei_c +nui n_c uei_c +lun l_c uen_c +nun n_c uen_c +luo l_c uo_c +nuo n_c uo_c +lv l_c v_c +nv n_c v_c +ma m_c a_c +mai m_c ai_c +man m_c an_c +mang m_c ang_c +mao m_c ao_c +me m_c e_c +mei m_c ei_c +men m_c en_c +meng m_c eng_c +mi m_c i_c +mian m_c ian_c +miao m_c iao_c +mie m_c ie_c +min m_c in_c +mo m_c o_c +mong m_c ong_c +mu m_c u_c +ne n_c e_c +nei n_c ei_c +pa p_c a_c +pai p_c ai_c +pan p_c an_c +pang p_c ang_c +pao p_c ao_c +pe p_c e_c +pei p_c ei_c +pen p_c en_c +peng p_c eng_c +pi p_c i_c +pian p_c ian_c +piao p_c iao_c +pie p_c ie_c +pin p_c in_c +po p_c o_c +pong p_c ong_c +pu p_c u_c +qi q_c i_c +qia q_c ia_c +qian q_c ian_c +qiang q_c iang_c +qiao q_c iao_c +qie q_c ie_c +qin q_c in_c +qing q_c ing_c +qiong q_c iong_c +qiu q_c iou_c +qu q_c u_c +quan q_c van_c +que q_c ve_c +qun q_c vn_c +quo q_c uo_c +ran r_c an_c +rang r_c ang_c +rao r_c ao_c +re r_c e_c +ren r_c en_c +ri r_c ih_c +rong r_c ong_c +rou r_c ou_c +ru r_c u_c +rua r_c ua_c +ruan r_c uan_c +sa s_c a_c +sai s_c ai_c +san s_c an_c +sang s_c ang_c +sao s_c ao_c +se s_c e_c +sen s_c en_c +si s_c ii_c +so s_c o_c +song s_c ong_c +sou s_c ou_c +su s_c u_c +sua s_c ua_c +suai s_c uai_c +suan s_c uan_c +suang s_c uang_c +sui s_c uei_c +sun s_c uen_c +ta t_c a_c +tai t_c ai_c +tan t_c an_c +tang t_c ang_c +tao t_c ao_c +ten t_c en_c +ti t_c i_c +tian t_c ian_c +tiao t_c iao_c +tie t_c ie_c +tin t_c in_c +to t_c o_c +tong t_c ong_c +tou t_c ou_c +tu t_c u_c +tuan t_c uan_c +tui t_c uei_c +tuo t_c uo_c +wa w_c a_c +wai w_c ai_c +wan w_c an_c +wang w_c ang_c +wei w_c ei_c +wen w_c en_c +wo w_c o_c +wu w_c u_c +xi xx_c i_c +xia xx_c ia_c +xian xx_c ian_c +xiang xx_c iang_c +xiao xx_c iao_c +xie xx_c ie_c +xin xx_c in_c +xing xx_c ing_c +xiong xx_c iong_c +xiu xx_c iou_c +xu xx_c u_c +xuan xx_c van_c +xue xx_c ve_c +xun xx_c vn_c +ya y_c a_c +yan y_c an_c +yang y_c ang_c +yao y_c ao_c +ye y_c e_c +yi y_c i_c +yin y_c in_c +yo y_c o_c +yong y_c ong_c +you y_c ou_c +yu y_c u_c +yuan y_c van_c +yue y_c ve_c +yun y_c vn_c +yuo y_c uo_c +za z_c a_c +zai z_c ai_c +zan z_c an_c +zang z_c ang_c +zao z_c ao_c +ze z_c e_c +zei z_c ei_c +zen z_c en_c +zi z_c ii_c +zo z_c o_c +zong z_c ong_c +zou z_c ou_c +zu z_c u_c +zua z_c ua_c +zuai z_c uai_c +zuan z_c uan_c +zuang z_c uang_c +zui z_c uei_c +zuo z_c uo_c +bing b_c ing_c +cer c_c er_c +ei ge ei_c +en ge en_c +fou f_c ou_c +gei g_c ei_c +geng g_c eng_c +heng h_c eng_c +huar h_c ua_c +huei h_c uei_c +jing j_c ing_c +jo j_c o_c +keng k_c eng_c +kuei k_c uei_c +le l_c e_c +leng l_c eng_c +neng n_c eng_c +ling l_c ing_c +ning n_c ing_c +ming m_c ing_c +nar n_c a_c +ngai ng_c ai_c +ngan ng_c an_c +ngao ng_c ao_c +ngen ng_c en_c +ngo ng_c o_c +xou xx_c ou_c +ping p_c ing_c +reng r_c eng_c +ro r_c o_c +run r_c uen_c +sei s_c ei_c +seng s_c eng_c +te t_c e_c +teng t_c eng_c +ting t_c ing_c +tun t_c uen_c +wong w_c ong_c +ying y_c ing_c +zeng z_c eng_c +zun z_c uen_c +ang ga ang_c +ou go ou_c +banr b_c an_c +benr b_c en_c +bianr b_c ian_c +dianr d_c ian_c +dunr d_c uen_c +fenr f_c en_c +fo f_c o_c +fur f_c u_c +gunr g_c uen_c +guo g_c uo_c +hair h_c ai_c +har h_c a_c +hei h_c ei_c +huir h_c uei_c +jianr j_c ian_c +jingr j_c ing_c +jiong j_c iong_c +kanr k_c an_c +kei k_c ei_c +kuo k_c uo_c +lar l_c a_c +lei l_c ei_c +lianr l_c ian_c +nianr n_c ian_c +luei l_c uei_c +nuei n_c uei_c +maor m_c ao_c +menr m_c en_c +mou m_c ou_c +nga ng_c a_c +ngang ng_c ang_c +ngei ng_c ei_c +nger ng_c er_c +ngong ng_c ong_c +ngou ng_c ou_c +ningr n_c ing_c +niur n_c iou_c +nvr n_c v_c +qio q_c io_c +qo q_c o_c +rui r_c uei_c +sengr s_c eng_c +ter t_c er_c +tour t_c ou_c +wanr w_c an_c +war w_c a_c +weng w_c eng_c +wenr w_c en_c +xingr xx_c ing_c +xo xx_c o_c +yangr y_c ang_c +yanr y_c an_c +yar y_c a_c +yuanr y_c van_c +yuer y_c ve_c +zeir z_c ei_c +zer z_c er_c +jun j_c vn_c +beir b_c ei_c +cei c_c ei_c +dengr d_c eng_c +far f_c a_c +genr g_c en_c +hor h_c o_c +kor k_c o_c +miu m_c iou_c +nia n_c ia_c +penr p_c en_c +xianr xx_c ian_c +gue g_c ve_c +hue h_c ve_c +bangr b_c ang_c +baor b_c ao_c +bar b_c a_c +bingr b_c ing_c +cangr c_c ang_c +car c_c a_c +cengr c_c eng_c +cuanr c_c uan_c +cuir c_c uei_c +cunr c_c uen_c +danr d_c an_c +dar d_c a_c +dour d_c ou_c +duir d_c uei_c +feir f_c ei_c +fengr f_c eng_c +ganr g_c an_c +gaor g_c ao_c +gar g_c a_c +gengr g_c eng_c +gor g_c o_c +gour g_c ou_c +guanr g_c uan_c +guar g_c ua_c +hanr h_c an_c +hunr h_c uen_c +hur h_c u_c +jiaor j_c iao_c +jiar j_c ia_c +juanr j_c van_c +junr j_c vn_c +kar k_c a_c +kour k_c ou_c +kuair k_c uai_c +laor l_c ao_c +naor n_c ao_c +leir l_c ei_c +neir n_c ei_c +liur l_c iou_c +lur l_c u_c +nur n_c u_c +mianr m_c ian_c +miaor m_c iao_c +mingr m_c ing_c +minr m_c in_c +mur m_c u_c +nge ng_c e_c +niaor n_c iao_c +or go o_c +pair p_c ai_c +paor p_c ao_c +pianr p_c ian_c +piaor p_c iao_c +pon p_c iai_c +pur p_c u_c +qianr q_c ian_c +qir q_c i_c +qiur q_c iou_c +quanr q_c van_c +rei r_c ei_c +ruo r_c uo_c +sir s_c ii_c +sour s_c ou_c +sunr s_c uen_c +suo s_c uo_c +tair t_c ai_c +tanr t_c an_c +tei t_c ei_c +tianr t_c ian_c +tir t_c i_c +wangr w_c ang_c +weir w_c ei_c +xiar xx_c ia_c +yei y_c ei_c +yingr y_c ing_c +zengr z_c eng_c +zir z_c ii_c +zuanr z_c uan_c +zuir z_c uei_c +zur z_c u_c +beng b_c eng_c +cua c_c ua_c +dia d_c ia_c +duo d_c uo_c +eng ge eng_c +pou p_c ou_c +xuo xx_c uo_c +shao sh_c ao_c +zhen zh_c en_c +shi sh_c i_c +zhe zh_c e_c +lia l_c ia_c +hiang h_c iang_c +cuo c_c uo_c +ngeng ng_c eng_c diff --git a/voices/F7/dict/Sichuan/tonelist.txt b/voices/F7/dict/Sichuan/tonelist.txt new file mode 100644 index 0000000..7741d23 --- /dev/null +++ b/voices/F7/dict/Sichuan/tonelist.txt @@ -0,0 +1,7 @@ +1 + +4 +2 +3 +5 +0 diff --git a/voices/F7/voc/ckpt/checkpoint_340000.pth b/voices/F7/voc/ckpt/checkpoint_340000.pth new file mode 100644 index 0000000..e936f4e --- /dev/null +++ b/voices/F7/voc/ckpt/checkpoint_340000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0231b7e43162142b6bec4ce0ca147e6e4e355154e27f79cf8e26527c905683a6 +size 907676870 diff --git a/voices/F7/voc/config.yaml b/voices/F7/voc/config.yaml new file mode 100644 index 0000000..4b59b91 --- /dev/null +++ b/voices/F7/voc/config.yaml @@ -0,0 +1,131 @@ +Loss: + discriminator_adv_loss: + enable: true + params: {average_by_discriminators: false} + weights: 1.0 + feat_match_loss: + enable: true + params: {average_by_discriminators: false, average_by_layers: false} + weights: 2.0 + generator_adv_loss: + enable: true + params: {average_by_discriminators: false} + weights: 1.0 + mel_loss: + enable: true + params: {fft_size: 2048, fmax: 8000, fmin: 0, fs: 16000, hop_size: 200, log_base: null, + num_mels: 80, win_length: 1000, window: hann} + weights: 45.0 + stft_loss: {enable: false} + subband_stft_loss: + enable: false + params: + fft_sizes: [384, 683, 171] + hop_sizes: [35, 75, 15] + win_lengths: [150, 300, 60] + window: hann_window +Model: + Generator: + optimizer: + params: + betas: [0.5, 0.9] + lr: 0.0002 + weight_decay: 0.0 + type: Adam + params: + bias: true + causal: false + channels: 256 + in_channels: 80 + kernel_size: 7 + nonlinear_activation: LeakyReLU + nonlinear_activation_params: {negative_slope: 0.1} + out_channels: 1 + resblock_dilations: + - [1, 3, 5, 7] + - [1, 3, 5, 7] + - [1, 3, 5, 7] + resblock_kernel_sizes: [3, 7, 11] + upsample_kernal_sizes: [20, 11, 4, 4] + upsample_scales: [10, 5, 2, 2] + use_weight_norm: true + scheduler: + params: + gamma: 0.5 + milestones: [200000, 400000, 600000, 800000] + type: MultiStepLR + MultiPeriodDiscriminator: + optimizer: + params: + betas: [0.5, 0.9] + lr: 0.0002 + weight_decay: 0.0 + type: Adam + params: + discriminator_params: + bias: true + channels: 32 + downsample_scales: [3, 3, 3, 3, 1] + in_channels: 1 + kernel_sizes: [5, 3] + max_downsample_channels: 1024 + nonlinear_activation: LeakyReLU + nonlinear_activation_params: {negative_slope: 0.1} + out_channels: 1 + use_spectral_norm: false + periods: [2, 3, 5, 7, 11] + scheduler: + params: + gamma: 0.5 + milestones: [200000, 400000, 600000, 800000] + type: MultiStepLR + MultiScaleDiscriminator: + optimizer: + params: + betas: [0.5, 0.9] + lr: 0.0002 + weight_decay: 0.0 + type: Adam + params: + discriminator_params: + bias: true + channels: 128 + downsample_scales: [4, 4, 4, 4, 1] + in_channels: 1 + kernel_sizes: [15, 41, 5, 3] + max_downsample_channels: 1024 + max_groups: 16 + nonlinear_activation: LeakyReLU + nonlinear_activation_params: {negative_slope: 0.1} + out_channels: 1 + downsample_pooling: DWT + downsample_pooling_params: {kernel_size: 4, padding: 2, stride: 2} + follow_official_norm: true + scales: 3 + scheduler: + params: + gamma: 0.5 + milestones: [200000, 400000, 600000, 800000] + type: MultiStepLR +allow_cache: true +audio_config: {fmax: 8000.0, fmin: 0.0, hop_length: 200, max_norm: 1.0, min_level_db: -100.0, + n_fft: 2048, n_mels: 80, norm_type: mean_std, num_workers: 16, phone_level_feature: true, + preemphasize: false, ref_level_db: 20, sampling_rate: 16000, symmetric: false, trim_silence: true, + trim_silence_threshold_db: 60, wav_normalize: true, win_length: 1000} +batch_max_steps: 9600 +batch_size: 16 +create_time: '2022-12-26 11:11:35' +discriminator_grad_norm: -1 +discriminator_train_start_steps: 0 +eval_interval_steps: 10000 +generator_grad_norm: -1 +generator_train_start_steps: 1 +git_revision_hash: 388243c0c173756d1eb34783c02cec4c302cdc25 +log_interval_steps: 1000 +model_type: hifigan +num_save_intermediate_results: 4 +num_workers: 2 +pin_memory: true +remove_short_samples: false +save_interval_steps: 20000 +train_max_steps: 2500000 diff --git a/voices/voices.json b/voices/voices.json new file mode 100644 index 0000000..f37e630 --- /dev/null +++ b/voices/voices.json @@ -0,0 +1,5 @@ +{ + "voices": [ + "F7" + ] +}