mirror of
https://www.modelscope.cn/iic/speech_sambert-hifigan_tts_zh-cn_16k.git
synced 2026-04-03 02:42:54 +08:00
Merge branch 'finetune'
This commit is contained in:
8
voices/voices.json
Normal file
8
voices/voices.json
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"voices": [
|
||||
"zhitian_emo",
|
||||
"zhibei_emo",
|
||||
"zhizhe_emo",
|
||||
"zhiyan_emo"
|
||||
]
|
||||
}
|
||||
BIN
voices/zhibei_emo/am/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
BIN
voices/zhibei_emo/am/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
Binary file not shown.
105
voices/zhibei_emo/am/config.yaml
Normal file
105
voices/zhibei_emo/am/config.yaml
Normal file
@ -0,0 +1,105 @@
|
||||
model_type: sambert
|
||||
Model:
|
||||
#########################################################
|
||||
# SAMBERT NETWORK ARCHITECTURE SETTING #
|
||||
#########################################################
|
||||
KanTtsSAMBERT:
|
||||
params:
|
||||
max_len: 800
|
||||
|
||||
embedding_dim: 512
|
||||
encoder_num_layers: 8
|
||||
encoder_num_heads: 8
|
||||
encoder_num_units: 128
|
||||
encoder_ffn_inner_dim: 1024
|
||||
encoder_dropout: 0.1
|
||||
encoder_attention_dropout: 0.1
|
||||
encoder_relu_dropout: 0.1
|
||||
encoder_projection_units: 32
|
||||
|
||||
speaker_units: 32
|
||||
emotion_units: 32
|
||||
|
||||
predictor_filter_size: 41
|
||||
predictor_fsmn_num_layers: 3
|
||||
predictor_num_memory_units: 128
|
||||
predictor_ffn_inner_dim: 256
|
||||
predictor_dropout: 0.1
|
||||
predictor_shift: 0
|
||||
predictor_lstm_units: 128
|
||||
dur_pred_prenet_units: [128, 128]
|
||||
dur_pred_lstm_units: 128
|
||||
|
||||
decoder_prenet_units: [256, 256]
|
||||
decoder_num_layers: 12
|
||||
decoder_num_heads: 8
|
||||
decoder_num_units: 128
|
||||
decoder_ffn_inner_dim: 1024
|
||||
decoder_dropout: 0.1
|
||||
decoder_attention_dropout: 0.1
|
||||
decoder_relu_dropout: 0.1
|
||||
|
||||
outputs_per_step: 3
|
||||
num_mels: 80
|
||||
|
||||
postnet_filter_size: 41
|
||||
postnet_fsmn_num_layers: 4
|
||||
postnet_num_memory_units: 256
|
||||
postnet_ffn_inner_dim: 512
|
||||
postnet_dropout: 0.1
|
||||
postnet_shift: 17
|
||||
postnet_lstm_units: 128
|
||||
MAS: False
|
||||
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 0.001
|
||||
betas: [0.9, 0.98]
|
||||
eps: 1.0e-9
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: NoamLR
|
||||
params:
|
||||
warmup_steps: 4000
|
||||
|
||||
linguistic_unit:
|
||||
cleaners: english_cleaners
|
||||
lfeat_type_list: sy,tone,syllable_flag,word_segment,emo_category,speaker_category
|
||||
speaker_list: F7,F74,FBYN,FRXL,M7,xiaoyu
|
||||
####################################################
|
||||
# LOSS SETTING #
|
||||
####################################################
|
||||
Loss:
|
||||
MelReconLoss:
|
||||
enable: True
|
||||
params:
|
||||
loss_type: mae
|
||||
|
||||
ProsodyReconLoss:
|
||||
enable: True
|
||||
params:
|
||||
loss_type: mae
|
||||
|
||||
###########################################################
|
||||
# DATA LOADER SETTING #
|
||||
###########################################################
|
||||
batch_size: 32
|
||||
pin_memory: False
|
||||
num_workers: 4 # FIXME: set > 0 may stuck on macos
|
||||
remove_short_samples: False
|
||||
allow_cache: True
|
||||
grad_norm: 1.0
|
||||
|
||||
###########################################################
|
||||
# INTERVAL SETTING #
|
||||
###########################################################
|
||||
train_max_steps: 1000000 # Number of training steps.
|
||||
save_interval_steps: 20000 # Interval steps to save checkpoint.
|
||||
eval_interval_steps: 10000 # Interval steps to evaluate the network.
|
||||
log_interval_steps: 1000 # Interval steps to record the training log.
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.
|
||||
BIN
voices/zhibei_emo/am/pytorch_model.bin
(Stored with Git LFS)
Normal file
BIN
voices/zhibei_emo/am/pytorch_model.bin
(Stored with Git LFS)
Normal file
Binary file not shown.
27
voices/zhibei_emo/audio_config.yaml
Normal file
27
voices/zhibei_emo/audio_config.yaml
Normal file
@ -0,0 +1,27 @@
|
||||
# Audio processing configs
|
||||
|
||||
audio_config:
|
||||
# Preprocess
|
||||
wav_normalize: True
|
||||
trim_silence: True
|
||||
trim_silence_threshold_db: 60
|
||||
preemphasize: False
|
||||
|
||||
# Feature extraction
|
||||
sampling_rate: 16000
|
||||
hop_length: 200
|
||||
win_length: 1000
|
||||
n_fft: 2048
|
||||
n_mels: 80
|
||||
fmin: 0.0
|
||||
fmax: 8000.0
|
||||
phone_level_feature: True
|
||||
|
||||
# Normalization
|
||||
norm_type: "mean_std" # "mean_std" or "global"
|
||||
max_norm: 1.0
|
||||
symmetric: False
|
||||
min_level_db: -100.0
|
||||
ref_level_db: 20
|
||||
|
||||
num_workers: 16
|
||||
2
voices/zhibei_emo/dict/PinYin/En2ChPhoneMap.txt
Normal file
2
voices/zhibei_emo/dict/PinYin/En2ChPhoneMap.txt
Normal file
@ -0,0 +1,2 @@
|
||||
wu w
|
||||
yi y
|
||||
1263
voices/zhibei_emo/dict/PinYin/PhoneSet.xml
Normal file
1263
voices/zhibei_emo/dict/PinYin/PhoneSet.xml
Normal file
File diff suppressed because it is too large
Load Diff
147
voices/zhibei_emo/dict/PinYin/PosSet.xml
Normal file
147
voices/zhibei_emo/dict/PinYin/PosSet.xml
Normal file
@ -0,0 +1,147 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<posSet xmlns="http://schemas.alibaba-inc.com/tts">
|
||||
<pos>
|
||||
<id>1</id>
|
||||
<name>a</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>2</id>
|
||||
<name>b</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>3</id>
|
||||
<name>c</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>4</id>
|
||||
<name>d</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>5</id>
|
||||
<name>e</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>6</id>
|
||||
<name>f</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>7</id>
|
||||
<name>g</name>
|
||||
<desc>todo</desc>
|
||||
<sub>
|
||||
<pos>
|
||||
<id>8</id>
|
||||
<name>gb</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
</sub>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>9</id>
|
||||
<name>h</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>10</id>
|
||||
<name>i</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>11</id>
|
||||
<name>j</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>12</id>
|
||||
<name>k</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>13</id>
|
||||
<name>l</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>14</id>
|
||||
<name>m</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>15</id>
|
||||
<name>n</name>
|
||||
<desc>todo</desc>
|
||||
<sub>
|
||||
<pos>
|
||||
<id>16</id>
|
||||
<name>nz</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
</sub>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>17</id>
|
||||
<name>o</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>18</id>
|
||||
<name>p</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>19</id>
|
||||
<name>q</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>20</id>
|
||||
<name>r</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>21</id>
|
||||
<name>s</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>22</id>
|
||||
<name>t</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>23</id>
|
||||
<name>u</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>24</id>
|
||||
<name>v</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>25</id>
|
||||
<name>w</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>26</id>
|
||||
<name>x</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>27</id>
|
||||
<name>y</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>28</id>
|
||||
<name>z</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
</posSet>
|
||||
661
voices/zhibei_emo/dict/PinYin/py2phoneMap.txt
Normal file
661
voices/zhibei_emo/dict/PinYin/py2phoneMap.txt
Normal file
@ -0,0 +1,661 @@
|
||||
a ga a_c
|
||||
ai ga ai_c
|
||||
an ga an_c
|
||||
ang ga ang_c
|
||||
ao ga ao_c
|
||||
ba b_c a_c
|
||||
bai b_c ai_c
|
||||
ban b_c an_c
|
||||
bang b_c ang_c
|
||||
bao b_c ao_c
|
||||
bei b_c ei_c
|
||||
ben b_c en_c
|
||||
beng b_c eng_c
|
||||
bi b_c i_c
|
||||
bian b_c ian_c
|
||||
biao b_c iao_c
|
||||
bie b_c ie_c
|
||||
bin b_c in_c
|
||||
bing b_c ing_c
|
||||
bo b_c o_c
|
||||
bu b_c u_c
|
||||
ca c_c a_c
|
||||
cai c_c ai_c
|
||||
can c_c an_c
|
||||
cang c_c ang_c
|
||||
cao c_c ao_c
|
||||
ce c_c e_c
|
||||
cen c_c en_c
|
||||
ceng c_c eng_c
|
||||
cha ch_c a_c
|
||||
chai ch_c ai_c
|
||||
chan ch_c an_c
|
||||
chang ch_c ang_c
|
||||
chao ch_c ao_c
|
||||
che ch_c e_c
|
||||
chen ch_c en_c
|
||||
cheng ch_c eng_c
|
||||
chi ch_c ih_c
|
||||
chong ch_c ong_c
|
||||
chou ch_c ou_c
|
||||
chu ch_c u_c
|
||||
chua ch_c ua_c
|
||||
chuai ch_c uai_c
|
||||
chuan ch_c uan_c
|
||||
chuang ch_c uang_c
|
||||
chui ch_c uei_c
|
||||
chun ch_c uen_c
|
||||
chuo ch_c uo_c
|
||||
ci c_c ii_c
|
||||
cong c_c ong_c
|
||||
cou c_c ou_c
|
||||
cu c_c u_c
|
||||
cuan c_c uan_c
|
||||
cui c_c uei_c
|
||||
cun c_c uen_c
|
||||
cuo c_c uo_c
|
||||
da d_c a_c
|
||||
dai d_c ai_c
|
||||
dan d_c an_c
|
||||
dang d_c ang_c
|
||||
dao d_c ao_c
|
||||
de d_c e_c
|
||||
dei d_c ei_c
|
||||
den d_c en_c
|
||||
deng d_c eng_c
|
||||
di d_c i_c
|
||||
dia d_c ia_c
|
||||
dian d_c ian_c
|
||||
diao d_c iao_c
|
||||
die d_c ie_c
|
||||
ding d_c ing_c
|
||||
diu d_c iou_c
|
||||
dong d_c ong_c
|
||||
dou d_c ou_c
|
||||
du d_c u_c
|
||||
duan d_c uan_c
|
||||
dui d_c uei_c
|
||||
dun d_c uen_c
|
||||
duo d_c uo_c
|
||||
e ge e_c
|
||||
ei ge ei_c
|
||||
en ge en_c
|
||||
eng ge eng_c
|
||||
er ge er_c
|
||||
fa f_c a_c
|
||||
fan f_c an_c
|
||||
fang f_c ang_c
|
||||
fei f_c ei_c
|
||||
fen f_c en_c
|
||||
feng f_c eng_c
|
||||
fo f_c o_c
|
||||
fou f_c ou_c
|
||||
fu f_c u_c
|
||||
ga g_c a_c
|
||||
gai g_c ai_c
|
||||
gan g_c an_c
|
||||
gang g_c ang_c
|
||||
gao g_c ao_c
|
||||
ge g_c e_c
|
||||
gei g_c ei_c
|
||||
gen g_c en_c
|
||||
geng g_c eng_c
|
||||
gong g_c ong_c
|
||||
gou g_c ou_c
|
||||
gu g_c u_c
|
||||
gua g_c ua_c
|
||||
guai g_c uai_c
|
||||
guan g_c uan_c
|
||||
guang g_c uang_c
|
||||
gui g_c uei_c
|
||||
gun g_c uen_c
|
||||
guo g_c uo_c
|
||||
ha h_c a_c
|
||||
hai h_c ai_c
|
||||
han h_c an_c
|
||||
hang h_c ang_c
|
||||
hao h_c ao_c
|
||||
he h_c e_c
|
||||
hei h_c ei_c
|
||||
hen h_c en_c
|
||||
heng h_c eng_c
|
||||
hong h_c ong_c
|
||||
hou h_c ou_c
|
||||
hu h_c u_c
|
||||
hua h_c ua_c
|
||||
huai h_c uai_c
|
||||
huan h_c uan_c
|
||||
huang h_c uang_c
|
||||
hui h_c uei_c
|
||||
hun h_c uen_c
|
||||
huo h_c uo_c
|
||||
ji j_c i_c
|
||||
jia j_c ia_c
|
||||
jian j_c ian_c
|
||||
jiang j_c iang_c
|
||||
jiao j_c iao_c
|
||||
jie j_c ie_c
|
||||
jin j_c in_c
|
||||
jing j_c ing_c
|
||||
jiong j_c iong_c
|
||||
jiu j_c iou_c
|
||||
jv j_c v_c
|
||||
jvan j_c van_c
|
||||
jve j_c ve_c
|
||||
jvn j_c vn_c
|
||||
ka k_c a_c
|
||||
kai k_c ai_c
|
||||
kan k_c an_c
|
||||
kang k_c ang_c
|
||||
kao k_c ao_c
|
||||
ke k_c e_c
|
||||
kei k_c ei_c
|
||||
ken k_c en_c
|
||||
keng k_c eng_c
|
||||
kong k_c ong_c
|
||||
kou k_c ou_c
|
||||
ku k_c u_c
|
||||
kua k_c ua_c
|
||||
kuai k_c uai_c
|
||||
kuan k_c uan_c
|
||||
kuang k_c uang_c
|
||||
kui k_c uei_c
|
||||
kun k_c uen_c
|
||||
kuo k_c uo_c
|
||||
la l_c a_c
|
||||
lai l_c ai_c
|
||||
lan l_c an_c
|
||||
lang l_c ang_c
|
||||
lao l_c ao_c
|
||||
le l_c e_c
|
||||
lei l_c ei_c
|
||||
leng l_c eng_c
|
||||
li l_c i_c
|
||||
lia l_c ia_c
|
||||
lian l_c ian_c
|
||||
liang l_c iang_c
|
||||
liao l_c iao_c
|
||||
lie l_c ie_c
|
||||
lin l_c in_c
|
||||
ling l_c ing_c
|
||||
liu l_c iou_c
|
||||
lo l_c o_c
|
||||
long l_c ong_c
|
||||
lou l_c ou_c
|
||||
lu l_c u_c
|
||||
luan l_c uan_c
|
||||
lun l_c uen_c
|
||||
luo l_c uo_c
|
||||
lv l_c v_c
|
||||
lve l_c ve_c
|
||||
ma m_c a_c
|
||||
mai m_c ai_c
|
||||
man m_c an_c
|
||||
mang m_c ang_c
|
||||
mao m_c ao_c
|
||||
me m_c e_c
|
||||
mei m_c ei_c
|
||||
men m_c en_c
|
||||
meng m_c eng_c
|
||||
mi m_c i_c
|
||||
mian m_c ian_c
|
||||
miao m_c iao_c
|
||||
mie m_c ie_c
|
||||
min m_c in_c
|
||||
ming m_c ing_c
|
||||
miu m_c iou_c
|
||||
mo m_c o_c
|
||||
mou m_c ou_c
|
||||
mu m_c u_c
|
||||
na n_c a_c
|
||||
nai n_c ai_c
|
||||
nan n_c an_c
|
||||
nang n_c ang_c
|
||||
nao n_c ao_c
|
||||
ne n_c e_c
|
||||
nei n_c ei_c
|
||||
nen n_c en_c
|
||||
neng n_c eng_c
|
||||
ni n_c i_c
|
||||
nian n_c ian_c
|
||||
niang n_c iang_c
|
||||
niao n_c iao_c
|
||||
nie n_c ie_c
|
||||
nin n_c in_c
|
||||
ning n_c ing_c
|
||||
niu n_c iou_c
|
||||
nong n_c ong_c
|
||||
nou n_c ou_c
|
||||
nu n_c u_c
|
||||
nuan n_c uan_c
|
||||
nun n_c uen_c
|
||||
nuo n_c uo_c
|
||||
nv n_c v_c
|
||||
nve n_c ve_c
|
||||
o go o_c
|
||||
ou go ou_c
|
||||
pa p_c a_c
|
||||
pai p_c ai_c
|
||||
pan p_c an_c
|
||||
pang p_c ang_c
|
||||
pao p_c ao_c
|
||||
pei p_c ei_c
|
||||
pen p_c en_c
|
||||
peng p_c eng_c
|
||||
pi p_c i_c
|
||||
pian p_c ian_c
|
||||
piao p_c iao_c
|
||||
pie p_c ie_c
|
||||
pin p_c in_c
|
||||
ping p_c ing_c
|
||||
po p_c o_c
|
||||
pou p_c ou_c
|
||||
pu p_c u_c
|
||||
qi q_c i_c
|
||||
qia q_c ia_c
|
||||
qian q_c ian_c
|
||||
qiang q_c iang_c
|
||||
qiao q_c iao_c
|
||||
qie q_c ie_c
|
||||
qin q_c in_c
|
||||
qing q_c ing_c
|
||||
qiong q_c iong_c
|
||||
qiu q_c iou_c
|
||||
qv q_c v_c
|
||||
qvan q_c van_c
|
||||
qve q_c ve_c
|
||||
qvn q_c vn_c
|
||||
ran r_c an_c
|
||||
rang r_c ang_c
|
||||
rao r_c ao_c
|
||||
re r_c e_c
|
||||
ren r_c en_c
|
||||
reng r_c eng_c
|
||||
ri r_c ih_c
|
||||
rong r_c ong_c
|
||||
rou r_c ou_c
|
||||
ru r_c u_c
|
||||
ruan r_c uan_c
|
||||
rui r_c uei_c
|
||||
run r_c uen_c
|
||||
ruo r_c uo_c
|
||||
sa s_c a_c
|
||||
sai s_c ai_c
|
||||
san s_c an_c
|
||||
sang s_c ang_c
|
||||
sao s_c ao_c
|
||||
se s_c e_c
|
||||
sen s_c en_c
|
||||
seng s_c eng_c
|
||||
sha sh_c a_c
|
||||
shai sh_c ai_c
|
||||
shan sh_c an_c
|
||||
shang sh_c ang_c
|
||||
shao sh_c ao_c
|
||||
she sh_c e_c
|
||||
shei sh_c ei_c
|
||||
shen sh_c en_c
|
||||
sheng sh_c eng_c
|
||||
shi sh_c ih_c
|
||||
shou sh_c ou_c
|
||||
shu sh_c u_c
|
||||
shua sh_c ua_c
|
||||
shuai sh_c uai_c
|
||||
shuan sh_c uan_c
|
||||
shuang sh_c uang_c
|
||||
shui sh_c uei_c
|
||||
shun sh_c uen_c
|
||||
shuo sh_c uo_c
|
||||
si s_c ii_c
|
||||
song s_c ong_c
|
||||
sou s_c ou_c
|
||||
su s_c u_c
|
||||
suan s_c uan_c
|
||||
sui s_c uei_c
|
||||
sun s_c uen_c
|
||||
suo s_c uo_c
|
||||
ta t_c a_c
|
||||
tai t_c ai_c
|
||||
tan t_c an_c
|
||||
tang t_c ang_c
|
||||
tao t_c ao_c
|
||||
te t_c e_c
|
||||
tei t_c ei_c
|
||||
teng t_c eng_c
|
||||
ti t_c i_c
|
||||
tian t_c ian_c
|
||||
tiao t_c iao_c
|
||||
tie t_c ie_c
|
||||
ting t_c ing_c
|
||||
tong t_c ong_c
|
||||
tou t_c ou_c
|
||||
tu t_c u_c
|
||||
tuan t_c uan_c
|
||||
tui t_c uei_c
|
||||
tun t_c uen_c
|
||||
tuo t_c uo_c
|
||||
wa w_c a_c
|
||||
wai w_c ai_c
|
||||
wan w_c an_c
|
||||
wang w_c ang_c
|
||||
wei w_c ei_c
|
||||
wen w_c en_c
|
||||
weng w_c eng_c
|
||||
wo w_c o_c
|
||||
wu w_c u_c
|
||||
xi xx_c i_c
|
||||
xia xx_c ia_c
|
||||
xian xx_c ian_c
|
||||
xiang xx_c iang_c
|
||||
xiao xx_c iao_c
|
||||
xie xx_c ie_c
|
||||
xin xx_c in_c
|
||||
xing xx_c ing_c
|
||||
xiong xx_c iong_c
|
||||
xiu xx_c iou_c
|
||||
xv xx_c v_c
|
||||
xvan xx_c van_c
|
||||
xve xx_c ve_c
|
||||
xvn xx_c vn_c
|
||||
ya y_c a_c
|
||||
yan y_c an_c
|
||||
yang y_c ang_c
|
||||
yao y_c ao_c
|
||||
ye y_c e_c
|
||||
yi y_c i_c
|
||||
yin y_c in_c
|
||||
ying y_c ing_c
|
||||
yo y_c o_c
|
||||
yong y_c ong_c
|
||||
you y_c ou_c
|
||||
yv y_c v_c
|
||||
yvan y_c van_c
|
||||
yve y_c ve_c
|
||||
yvn y_c vn_c
|
||||
za z_c a_c
|
||||
zai z_c ai_c
|
||||
zan z_c an_c
|
||||
zang z_c ang_c
|
||||
zao z_c ao_c
|
||||
ze z_c e_c
|
||||
zei z_c ei_c
|
||||
zen z_c en_c
|
||||
zeng z_c eng_c
|
||||
zha zh_c a_c
|
||||
zhai zh_c ai_c
|
||||
zhan zh_c an_c
|
||||
zhang zh_c ang_c
|
||||
zhao zh_c ao_c
|
||||
zhe zh_c e_c
|
||||
zhei zh_c ei_c
|
||||
zhen zh_c en_c
|
||||
zheng zh_c eng_c
|
||||
zhi zh_c ih_c
|
||||
zhong zh_c ong_c
|
||||
zhou zh_c ou_c
|
||||
zhu zh_c u_c
|
||||
zhua zh_c ua_c
|
||||
zhuai zh_c uai_c
|
||||
zhuan zh_c uan_c
|
||||
zhuang zh_c uang_c
|
||||
zhui zh_c uei_c
|
||||
zhun zh_c uen_c
|
||||
zhuo zh_c uo_c
|
||||
zi z_c ii_c
|
||||
zong z_c ong_c
|
||||
zou z_c ou_c
|
||||
zu z_c u_c
|
||||
zuan z_c uan_c
|
||||
zui z_c uei_c
|
||||
zun z_c uen_c
|
||||
zuo z_c uo_c
|
||||
bangr b_c angr_c
|
||||
banr b_c anr_c
|
||||
baor b_c aor_c
|
||||
bar b_c ar_c
|
||||
beir b_c eir_c
|
||||
bengr b_c engr_c
|
||||
benr b_c enr_c
|
||||
bianr b_c ianr_c
|
||||
biaor b_c iaor_c
|
||||
bingr b_c ingr_c
|
||||
bir b_c ir_c
|
||||
bor b_c or_c
|
||||
bur b_c ur_c
|
||||
caor c_c aor_c
|
||||
car c_c ar_c
|
||||
changr ch_c angr_c
|
||||
chaor ch_c aor_c
|
||||
char ch_c ar_c
|
||||
chengr ch_c engr_c
|
||||
cher ch_c er_c
|
||||
chir ch_c ihr_c
|
||||
chongr ch_c ongr_c
|
||||
chour ch_c our_c
|
||||
chuangr ch_c uangr_c
|
||||
chuanr ch_c uanr_c
|
||||
chuir ch_c ueir_c
|
||||
chunr ch_c uenr_c
|
||||
chuor ch_c uor_c
|
||||
chur ch_c ur_c
|
||||
cir c_c iir_c
|
||||
congr c_c ongr_c
|
||||
cuir c_c ueir_c
|
||||
cunr c_c uenr_c
|
||||
cuor c_c uor_c
|
||||
dair d_c air_c
|
||||
danr d_c anr_c
|
||||
dangr d_c angr_c
|
||||
daor d_c aor_c
|
||||
dengr d_c engr_c
|
||||
dianr d_c ianr_c
|
||||
diaor d_c iaor_c
|
||||
dier d_c ier_c
|
||||
dingr d_c ingr_c
|
||||
dir d_c ir_c
|
||||
dongr d_c ongr_c
|
||||
dour d_c our_c
|
||||
duanr d_c uanr_c
|
||||
duir d_c ueir_c
|
||||
dunr d_c uenr_c
|
||||
duor d_c uor_c
|
||||
dur d_c ur_c
|
||||
fangr f_c angr_c
|
||||
fanr f_c anr_c
|
||||
far f_c ar_c
|
||||
fengr f_c engr_c
|
||||
fenr f_c enr_c
|
||||
fur f_c ur_c
|
||||
gair g_c air_c
|
||||
ganr g_c anr_c
|
||||
gaor g_c aor_c
|
||||
gengr g_c engr_c
|
||||
genr g_c enr_c
|
||||
ger g_c er_c
|
||||
gongr g_c ongr_c
|
||||
gour g_c our_c
|
||||
guair g_c uair_c
|
||||
guanr g_c uanr_c
|
||||
guar g_c uar_c
|
||||
guir g_c ueir_c
|
||||
gunr g_c uenr_c
|
||||
guor g_c uor_c
|
||||
gur g_c ur_c
|
||||
hair h_c air_c
|
||||
hanr h_c anr_c
|
||||
haor h_c aor_c
|
||||
heir h_c eir_c
|
||||
her h_c er_c
|
||||
hour h_c our_c
|
||||
huanr h_c uanr_c
|
||||
huangr h_c uangr_c
|
||||
huar h_c uar_c
|
||||
huir h_c ueir_c
|
||||
hunr h_c uenr_c
|
||||
huor h_c uor_c
|
||||
hur h_c ur_c
|
||||
jianr j_c ianr_c
|
||||
jiaor j_c iaor_c
|
||||
jiar j_c iar_c
|
||||
jier j_c ier_c
|
||||
jingr j_c ingr_c
|
||||
jinr j_c inr_c
|
||||
jir j_c ir_c
|
||||
jiur j_c iour_c
|
||||
jvanr j_c vanr_c
|
||||
jver j_c ver_c
|
||||
jvnr j_c vnr_c
|
||||
kair k_c air_c
|
||||
kanr k_c anr_c
|
||||
kaor k_c aor_c
|
||||
kengr k_c engr_c
|
||||
ker k_c er_c
|
||||
kongr k_c ongr_c
|
||||
kour k_c our_c
|
||||
kuair k_c uair_c
|
||||
kuangr k_c uangr_c
|
||||
kuanr k_c uanr_c
|
||||
kunr k_c uenr_c
|
||||
lanr l_c anr_c
|
||||
laor l_c aor_c
|
||||
lar l_c ar_c
|
||||
leir l_c eir_c
|
||||
lengr l_c engr_c
|
||||
ler l_c er_c
|
||||
liangr l_c iangr_c
|
||||
lianr l_c ianr_c
|
||||
liaor l_c iaor_c
|
||||
liar l_c iar_c
|
||||
lingr l_c ingr_c
|
||||
lir l_c ir_c
|
||||
liur l_c iour_c
|
||||
lour l_c our_c
|
||||
luor l_c uor_c
|
||||
lunr l_c uenr_c
|
||||
lur l_c ur_c
|
||||
lvr l_c vr_c
|
||||
mair m_c air_c
|
||||
manr m_c anr_c
|
||||
mangr m_c angr_c
|
||||
maor m_c aor_c
|
||||
mar m_c ar_c
|
||||
meir m_c eir_c
|
||||
menr m_c enr_c
|
||||
mianr m_c ianr_c
|
||||
miaor m_c iaor_c
|
||||
mingr m_c ingr_c
|
||||
mir m_c ir_c
|
||||
mor m_c or_c
|
||||
naor n_c aor_c
|
||||
nar n_c ar_c
|
||||
niangr n_c iangr_c
|
||||
nianr n_c ianr_c
|
||||
niaor n_c iaor_c
|
||||
ningr n_c ingr_c
|
||||
nir n_c ir_c
|
||||
niur n_c iour_c
|
||||
nvr n_c vr_c
|
||||
pair p_c air_c
|
||||
pangr p_c angr_c
|
||||
panr p_c anr_c
|
||||
paor p_c aor_c
|
||||
penr p_c enr_c
|
||||
pianr p_c ianr_c
|
||||
piaor p_c iaor_c
|
||||
pier p_c ier_c
|
||||
pingr p_c ingr_c
|
||||
pir p_c ir_c
|
||||
por p_c or_c
|
||||
pur p_c ur_c
|
||||
qianr q_c ianr_c
|
||||
qiaor q_c iaor_c
|
||||
qingr q_c ingr_c
|
||||
qir q_c ir_c
|
||||
qiur q_c iour_c
|
||||
qvanr q_c vanr_c
|
||||
qvnr q_c vnr_c
|
||||
qvr q_c vr_c
|
||||
sar s_c ar_c
|
||||
rangr r_c angr_c
|
||||
renr r_c enr_c
|
||||
sair s_c air_c
|
||||
sanr s_c anr_c
|
||||
shair sh_c air_c
|
||||
shaor sh_c aor_c
|
||||
shengr sh_c engr_c
|
||||
shenr sh_c enr_c
|
||||
shir sh_c ihr_c
|
||||
shuair sh_c uair_c
|
||||
shour sh_c our_c
|
||||
shuar sh_c uar_c
|
||||
shuir sh_c ueir_c
|
||||
shunr sh_c uenr_c
|
||||
shuor sh_c uor_c
|
||||
shur sh_c ur_c
|
||||
sir s_c iir_c
|
||||
suir s_c ueir_c
|
||||
sunr s_c uenr_c
|
||||
tair t_c air_c
|
||||
tangr t_c angr_c
|
||||
tanr t_c anr_c
|
||||
taor t_c aor_c
|
||||
ter t_c er_c
|
||||
tianr t_c ianr_c
|
||||
tiaor t_c iaor_c
|
||||
tir t_c ir_c
|
||||
tingr t_c ingr_c
|
||||
tongr t_c ongr_c
|
||||
tour t_c our_c
|
||||
tuanr t_c uanr_c
|
||||
tuir t_c ueir_c
|
||||
tuor t_c uor_c
|
||||
tur t_c ur_c
|
||||
wanr w_c anr_c
|
||||
war w_c ar_c
|
||||
weir w_c eir_c
|
||||
wenr w_c enr_c
|
||||
wengr w_c engr_c
|
||||
wor w_c or_c
|
||||
wur w_c ur_c
|
||||
xiangr xx_c iangr_c
|
||||
xianr xx_c ianr_c
|
||||
xiar xx_c iar_c
|
||||
xier xx_c ier_c
|
||||
xingr xx_c ingr_c
|
||||
xir xx_c ir_c
|
||||
xinr xx_c inr_c
|
||||
xiongr xx_c iongr_c
|
||||
xiur xx_c iour_c
|
||||
yangr y_c angr_c
|
||||
yanr y_c anr_c
|
||||
yaor y_c aor_c
|
||||
yar y_c ar_c
|
||||
yer y_c er_c
|
||||
yingr y_c ingr_c
|
||||
yinr y_c inr_c
|
||||
yir y_c ir_c
|
||||
your y_c our_c
|
||||
yvanr y_c vanr_c
|
||||
zair z_c air_c
|
||||
yvr y_c vr_c
|
||||
yver y_c ver_c
|
||||
zaor z_c aor_c
|
||||
zar z_c ar_c
|
||||
zhangr zh_c angr_c
|
||||
zhanr zh_c anr_c
|
||||
zhaor zh_c aor_c
|
||||
zhar zh_c ar_c
|
||||
zhenr zh_c enr_c
|
||||
zher zh_c er_c
|
||||
zhir zh_c ihr_c
|
||||
zhongr zh_c ongr_c
|
||||
zhour zh_c our_c
|
||||
zhuar zh_c uar_c
|
||||
zhuanr zh_c uanr_c
|
||||
zhunr zh_c uenr_c
|
||||
zhuor zh_c uor_c
|
||||
zhur zh_c ur_c
|
||||
zir z_c iir_c
|
||||
zuanr z_c uanr_c
|
||||
zuir z_c ueir_c
|
||||
zuor z_c uor_c
|
||||
7
voices/zhibei_emo/dict/PinYin/tonelist.txt
Normal file
7
voices/zhibei_emo/dict/PinYin/tonelist.txt
Normal file
@ -0,0 +1,7 @@
|
||||
1
|
||||
|
||||
4
|
||||
2
|
||||
3
|
||||
5
|
||||
0
|
||||
33
voices/zhibei_emo/dict/emo_category_dict.txt
Executable file
33
voices/zhibei_emo/dict/emo_category_dict.txt
Executable file
@ -0,0 +1,33 @@
|
||||
emotion_none
|
||||
emotion_neutral
|
||||
emotion_angry
|
||||
emotion_disgust
|
||||
emotion_fear
|
||||
emotion_happy
|
||||
emotion_sad
|
||||
emotion_surprise
|
||||
emotion_calm
|
||||
emotion_gentle
|
||||
emotion_relax
|
||||
emotion_lyrical
|
||||
emotion_serious
|
||||
emotion_disgruntled
|
||||
emotion_satisfied
|
||||
emotion_disappointed
|
||||
emotion_excited
|
||||
emotion_anxiety
|
||||
emotion_jealousy
|
||||
emotion_hate
|
||||
emotion_pity
|
||||
emotion_pleasure
|
||||
emotion_arousal
|
||||
emotion_dominance
|
||||
emotion_placeholder1
|
||||
emotion_placeholder2
|
||||
emotion_placeholder3
|
||||
emotion_placeholder4
|
||||
emotion_placeholder5
|
||||
emotion_placeholder6
|
||||
emotion_placeholder7
|
||||
emotion_placeholder8
|
||||
emotion_placeholder9
|
||||
6
voices/zhibei_emo/dict/speaker_dict.txt
Executable file
6
voices/zhibei_emo/dict/speaker_dict.txt
Executable file
@ -0,0 +1,6 @@
|
||||
F7
|
||||
F74
|
||||
FBYN
|
||||
FRXL
|
||||
M7
|
||||
xiaoyu
|
||||
144
voices/zhibei_emo/dict/sy_dict.txt
Executable file
144
voices/zhibei_emo/dict/sy_dict.txt
Executable file
@ -0,0 +1,144 @@
|
||||
a_c
|
||||
ai_c
|
||||
an_c
|
||||
ang_c
|
||||
ao_c
|
||||
b_c
|
||||
c_c
|
||||
ch_c
|
||||
d_c
|
||||
e_c
|
||||
ei_c
|
||||
en_c
|
||||
eng_c
|
||||
er_c
|
||||
f_c
|
||||
g_c
|
||||
h_c
|
||||
i_c
|
||||
ia_c
|
||||
ian_c
|
||||
iang_c
|
||||
iao_c
|
||||
ie_c
|
||||
ih_c
|
||||
ii_c
|
||||
in_c
|
||||
ing_c
|
||||
io_c
|
||||
iong_c
|
||||
iou_c
|
||||
j_c
|
||||
k_c
|
||||
l_c
|
||||
m_c
|
||||
n_c
|
||||
o_c
|
||||
ong_c
|
||||
ou_c
|
||||
p_c
|
||||
q_c
|
||||
r_c
|
||||
s_c
|
||||
sh_c
|
||||
t_c
|
||||
u_c
|
||||
ua_c
|
||||
uai_c
|
||||
uan_c
|
||||
uang_c
|
||||
uei_c
|
||||
uen_c
|
||||
ueng_c
|
||||
uo_c
|
||||
v_c
|
||||
van_c
|
||||
ve_c
|
||||
vn_c
|
||||
xx_c
|
||||
z_c
|
||||
zh_c
|
||||
w_c
|
||||
y_c
|
||||
ga
|
||||
ge
|
||||
go
|
||||
aa
|
||||
ae
|
||||
ah
|
||||
ao
|
||||
aw
|
||||
ay
|
||||
b
|
||||
ch
|
||||
d
|
||||
dh
|
||||
eh
|
||||
er
|
||||
ey
|
||||
f
|
||||
g
|
||||
hh
|
||||
ih
|
||||
iy
|
||||
jh
|
||||
k
|
||||
l
|
||||
m
|
||||
n
|
||||
ng
|
||||
ow
|
||||
oy
|
||||
p
|
||||
r
|
||||
s
|
||||
sh
|
||||
t
|
||||
th
|
||||
uh
|
||||
uw
|
||||
v
|
||||
w
|
||||
y
|
||||
z
|
||||
zh
|
||||
air_c
|
||||
angr_c
|
||||
anr_c
|
||||
aor_c
|
||||
ar_c
|
||||
eir_c
|
||||
engr_c
|
||||
enr_c
|
||||
iangr_c
|
||||
ianr_c
|
||||
iaor_c
|
||||
iar_c
|
||||
ier_c
|
||||
ihr_c
|
||||
iir_c
|
||||
ingr_c
|
||||
inr_c
|
||||
iongr_c
|
||||
iour_c
|
||||
ir_c
|
||||
ongr_c
|
||||
or_c
|
||||
our_c
|
||||
uair_c
|
||||
uangr_c
|
||||
uanr_c
|
||||
uar_c
|
||||
ueir_c
|
||||
uenr_c
|
||||
uor_c
|
||||
ur_c
|
||||
vanr_c
|
||||
ver_c
|
||||
vnr_c
|
||||
vr_c
|
||||
pau
|
||||
#1
|
||||
#2
|
||||
#3
|
||||
#4
|
||||
5
voices/zhibei_emo/dict/syllable_flag_dict.txt
Executable file
5
voices/zhibei_emo/dict/syllable_flag_dict.txt
Executable file
@ -0,0 +1,5 @@
|
||||
s_begin
|
||||
s_end
|
||||
s_none
|
||||
s_both
|
||||
s_middle
|
||||
7
voices/zhibei_emo/dict/tone_dict.txt
Executable file
7
voices/zhibei_emo/dict/tone_dict.txt
Executable file
@ -0,0 +1,7 @@
|
||||
tone1
|
||||
tone_none
|
||||
tone4
|
||||
tone2
|
||||
tone3
|
||||
tone5
|
||||
tone0
|
||||
5
voices/zhibei_emo/dict/word_segment_dict.txt
Executable file
5
voices/zhibei_emo/dict/word_segment_dict.txt
Executable file
@ -0,0 +1,5 @@
|
||||
word_begin
|
||||
word_end
|
||||
word_middle
|
||||
word_both
|
||||
word_none
|
||||
BIN
voices/zhibei_emo/voc/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
BIN
voices/zhibei_emo/voc/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
voices/zhibei_emo/voc/ckpt/checkpoint_1.pth
(Stored with Git LFS)
Normal file
BIN
voices/zhibei_emo/voc/ckpt/checkpoint_1.pth
(Stored with Git LFS)
Normal file
Binary file not shown.
188
voices/zhibei_emo/voc/config.yaml
Normal file
188
voices/zhibei_emo/voc/config.yaml
Normal file
@ -0,0 +1,188 @@
|
||||
model_type: hifigan
|
||||
Model:
|
||||
###########################################################
|
||||
# GENERATOR NETWORK ARCHITECTURE SETTING #
|
||||
###########################################################
|
||||
Generator:
|
||||
params:
|
||||
in_channels: 80
|
||||
out_channels: 1
|
||||
channels: 256
|
||||
kernel_size: 7
|
||||
upsample_scales: [10, 5, 2, 2]
|
||||
upsample_kernal_sizes: [20, 11, 4, 4]
|
||||
resblock_kernel_sizes: [3, 7, 11]
|
||||
resblock_dilations:
|
||||
- [1, 3, 5, 7]
|
||||
- [1, 3, 5, 7]
|
||||
- [1, 3, 5, 7]
|
||||
bias: true
|
||||
causal: true
|
||||
nonlinear_activation: "LeakyReLU"
|
||||
nonlinear_activation_params:
|
||||
negative_slope: 0.1
|
||||
use_weight_norm: true
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 2.0e-4
|
||||
betas: [0.5, 0.9]
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: MultiStepLR
|
||||
params:
|
||||
gamma: 0.5
|
||||
milestones:
|
||||
- 200000
|
||||
- 400000
|
||||
- 600000
|
||||
- 800000
|
||||
|
||||
###########################################################
|
||||
# DISCRIMINATOR NETWORK ARCHITECTURE SETTING #
|
||||
###########################################################
|
||||
MultiScaleDiscriminator:
|
||||
params:
|
||||
scales: 3
|
||||
downsample_pooling: "DWT"
|
||||
downsample_pooling_params:
|
||||
kernel_size: 4
|
||||
stride: 2
|
||||
padding: 2
|
||||
discriminator_params:
|
||||
in_channels: 1
|
||||
out_channels: 1
|
||||
kernel_sizes: [15, 41, 5, 3]
|
||||
channels: 128
|
||||
max_downsample_channels: 1024
|
||||
max_groups: 16
|
||||
bias: true
|
||||
downsample_scales: [4, 4, 4, 4, 1]
|
||||
nonlinear_activation: "LeakyReLU"
|
||||
nonlinear_activation_params:
|
||||
negative_slope: 0.1
|
||||
follow_official_norm: true
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 2.0e-4
|
||||
betas: [0.5, 0.9]
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: MultiStepLR
|
||||
params:
|
||||
gamma: 0.5
|
||||
milestones:
|
||||
- 200000
|
||||
- 400000
|
||||
- 600000
|
||||
- 800000
|
||||
|
||||
MultiPeriodDiscriminator:
|
||||
params:
|
||||
periods: [2, 3, 5, 7, 11]
|
||||
discriminator_params:
|
||||
in_channels: 1
|
||||
out_channels: 1
|
||||
kernel_sizes: [5, 3]
|
||||
channels: 32
|
||||
downsample_scales: [3, 3, 3, 3, 1]
|
||||
max_downsample_channels: 1024
|
||||
bias: true
|
||||
nonlinear_activation: "LeakyReLU"
|
||||
nonlinear_activation_params:
|
||||
negative_slope: 0.1
|
||||
use_spectral_norm: false
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 2.0e-4
|
||||
betas: [0.5, 0.9]
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: MultiStepLR
|
||||
params:
|
||||
gamma: 0.5
|
||||
milestones:
|
||||
- 200000
|
||||
- 400000
|
||||
- 600000
|
||||
- 800000
|
||||
|
||||
####################################################
|
||||
# LOSS SETTING #
|
||||
####################################################
|
||||
Loss:
|
||||
generator_adv_loss:
|
||||
enable: True
|
||||
params:
|
||||
average_by_discriminators: False
|
||||
weights: 1.0
|
||||
|
||||
discriminator_adv_loss:
|
||||
enable: True
|
||||
params:
|
||||
average_by_discriminators: False
|
||||
weights: 1.0
|
||||
|
||||
stft_loss:
|
||||
enable: False # Whether to use multi-resolution STFT loss.
|
||||
|
||||
mel_loss:
|
||||
enable: True
|
||||
params:
|
||||
fs: 16000
|
||||
fft_size: 2048
|
||||
hop_size: 200
|
||||
win_length: 1000
|
||||
window: "hann"
|
||||
num_mels: 80
|
||||
fmin: 0
|
||||
fmax: 8000
|
||||
log_base: null
|
||||
weights: 45.0
|
||||
|
||||
subband_stft_loss:
|
||||
enable: False
|
||||
params:
|
||||
fft_sizes: [384, 683, 171] # List of FFT size for STFT-based loss.
|
||||
hop_sizes: [35, 75, 15] # List of hop size for STFT-based loss
|
||||
win_lengths: [150, 300, 60] # List of window length for STFT-based loss.
|
||||
window: "hann_window" # Window function for STFT-based loss
|
||||
|
||||
feat_match_loss:
|
||||
enable: True
|
||||
params:
|
||||
average_by_discriminators: false
|
||||
average_by_layers: false
|
||||
weights: 2.0
|
||||
|
||||
|
||||
###########################################################
|
||||
# DATA LOADER SETTING #
|
||||
###########################################################
|
||||
batch_size: 16
|
||||
batch_max_steps: 9600 # Length of each audio in batch. Make sure dividable by hop_size.
|
||||
pin_memory: True
|
||||
num_workers: 2 # FIXME: set > 0 may stuck on macos
|
||||
remove_short_samples: False
|
||||
allow_cache: True
|
||||
|
||||
generator_grad_norm: -1
|
||||
|
||||
discriminator_grad_norm: -1
|
||||
|
||||
###########################################################
|
||||
# INTERVAL SETTING #
|
||||
###########################################################
|
||||
generator_train_start_steps: 1 # Number of steps to start to train discriminator.
|
||||
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
|
||||
train_max_steps: 2500000 # Number of training steps.
|
||||
save_interval_steps: 20000 # Interval steps to save checkpoint.
|
||||
eval_interval_steps: 10000 # Interval steps to evaluate the network.
|
||||
log_interval_steps: 1000 # Interval steps to record the training log.
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.
|
||||
BIN
voices/zhibei_emo/vocoder/pytorch_model.bin
(Stored with Git LFS)
Normal file
BIN
voices/zhibei_emo/vocoder/pytorch_model.bin
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
voices/zhitian_emo/am/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
BIN
voices/zhitian_emo/am/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
Binary file not shown.
105
voices/zhitian_emo/am/config.yaml
Normal file
105
voices/zhitian_emo/am/config.yaml
Normal file
@ -0,0 +1,105 @@
|
||||
model_type: sambert
|
||||
Model:
|
||||
#########################################################
|
||||
# SAMBERT NETWORK ARCHITECTURE SETTING #
|
||||
#########################################################
|
||||
KanTtsSAMBERT:
|
||||
params:
|
||||
max_len: 800
|
||||
|
||||
embedding_dim: 512
|
||||
encoder_num_layers: 8
|
||||
encoder_num_heads: 8
|
||||
encoder_num_units: 128
|
||||
encoder_ffn_inner_dim: 1024
|
||||
encoder_dropout: 0.1
|
||||
encoder_attention_dropout: 0.1
|
||||
encoder_relu_dropout: 0.1
|
||||
encoder_projection_units: 32
|
||||
|
||||
speaker_units: 32
|
||||
emotion_units: 32
|
||||
|
||||
predictor_filter_size: 41
|
||||
predictor_fsmn_num_layers: 3
|
||||
predictor_num_memory_units: 128
|
||||
predictor_ffn_inner_dim: 256
|
||||
predictor_dropout: 0.1
|
||||
predictor_shift: 0
|
||||
predictor_lstm_units: 128
|
||||
dur_pred_prenet_units: [128, 128]
|
||||
dur_pred_lstm_units: 128
|
||||
|
||||
decoder_prenet_units: [256, 256]
|
||||
decoder_num_layers: 12
|
||||
decoder_num_heads: 8
|
||||
decoder_num_units: 128
|
||||
decoder_ffn_inner_dim: 1024
|
||||
decoder_dropout: 0.1
|
||||
decoder_attention_dropout: 0.1
|
||||
decoder_relu_dropout: 0.1
|
||||
|
||||
outputs_per_step: 3
|
||||
num_mels: 80
|
||||
|
||||
postnet_filter_size: 41
|
||||
postnet_fsmn_num_layers: 4
|
||||
postnet_num_memory_units: 256
|
||||
postnet_ffn_inner_dim: 512
|
||||
postnet_dropout: 0.1
|
||||
postnet_shift: 17
|
||||
postnet_lstm_units: 128
|
||||
MAS: False
|
||||
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 0.001
|
||||
betas: [0.9, 0.98]
|
||||
eps: 1.0e-9
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: NoamLR
|
||||
params:
|
||||
warmup_steps: 4000
|
||||
|
||||
linguistic_unit:
|
||||
cleaners: english_cleaners
|
||||
lfeat_type_list: sy,tone,syllable_flag,word_segment,emo_category,speaker_category
|
||||
speaker_list: F7,F74,FBYN,FRXL,M7,xiaoyu
|
||||
####################################################
|
||||
# LOSS SETTING #
|
||||
####################################################
|
||||
Loss:
|
||||
MelReconLoss:
|
||||
enable: True
|
||||
params:
|
||||
loss_type: mae
|
||||
|
||||
ProsodyReconLoss:
|
||||
enable: True
|
||||
params:
|
||||
loss_type: mae
|
||||
|
||||
###########################################################
|
||||
# DATA LOADER SETTING #
|
||||
###########################################################
|
||||
batch_size: 32
|
||||
pin_memory: False
|
||||
num_workers: 4 # FIXME: set > 0 may stuck on macos
|
||||
remove_short_samples: False
|
||||
allow_cache: True
|
||||
grad_norm: 1.0
|
||||
|
||||
###########################################################
|
||||
# INTERVAL SETTING #
|
||||
###########################################################
|
||||
train_max_steps: 1000000 # Number of training steps.
|
||||
save_interval_steps: 20000 # Interval steps to save checkpoint.
|
||||
eval_interval_steps: 10000 # Interval steps to evaluate the network.
|
||||
log_interval_steps: 1000 # Interval steps to record the training log.
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.
|
||||
BIN
voices/zhitian_emo/am/pytorch_model.bin
(Stored with Git LFS)
Normal file
BIN
voices/zhitian_emo/am/pytorch_model.bin
(Stored with Git LFS)
Normal file
Binary file not shown.
27
voices/zhitian_emo/audio_config.yaml
Normal file
27
voices/zhitian_emo/audio_config.yaml
Normal file
@ -0,0 +1,27 @@
|
||||
# Audio processing configs
|
||||
|
||||
audio_config:
|
||||
# Preprocess
|
||||
wav_normalize: True
|
||||
trim_silence: True
|
||||
trim_silence_threshold_db: 60
|
||||
preemphasize: False
|
||||
|
||||
# Feature extraction
|
||||
sampling_rate: 16000
|
||||
hop_length: 200
|
||||
win_length: 1000
|
||||
n_fft: 2048
|
||||
n_mels: 80
|
||||
fmin: 0.0
|
||||
fmax: 8000.0
|
||||
phone_level_feature: True
|
||||
|
||||
# Normalization
|
||||
norm_type: "mean_std" # "mean_std" or "global"
|
||||
max_norm: 1.0
|
||||
symmetric: False
|
||||
min_level_db: -100.0
|
||||
ref_level_db: 20
|
||||
|
||||
num_workers: 16
|
||||
2
voices/zhitian_emo/dict/PinYin/En2ChPhoneMap.txt
Normal file
2
voices/zhitian_emo/dict/PinYin/En2ChPhoneMap.txt
Normal file
@ -0,0 +1,2 @@
|
||||
wu w
|
||||
yi y
|
||||
1263
voices/zhitian_emo/dict/PinYin/PhoneSet.xml
Normal file
1263
voices/zhitian_emo/dict/PinYin/PhoneSet.xml
Normal file
File diff suppressed because it is too large
Load Diff
147
voices/zhitian_emo/dict/PinYin/PosSet.xml
Normal file
147
voices/zhitian_emo/dict/PinYin/PosSet.xml
Normal file
@ -0,0 +1,147 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<posSet xmlns="http://schemas.alibaba-inc.com/tts">
|
||||
<pos>
|
||||
<id>1</id>
|
||||
<name>a</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>2</id>
|
||||
<name>b</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>3</id>
|
||||
<name>c</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>4</id>
|
||||
<name>d</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>5</id>
|
||||
<name>e</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>6</id>
|
||||
<name>f</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>7</id>
|
||||
<name>g</name>
|
||||
<desc>todo</desc>
|
||||
<sub>
|
||||
<pos>
|
||||
<id>8</id>
|
||||
<name>gb</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
</sub>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>9</id>
|
||||
<name>h</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>10</id>
|
||||
<name>i</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>11</id>
|
||||
<name>j</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>12</id>
|
||||
<name>k</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>13</id>
|
||||
<name>l</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>14</id>
|
||||
<name>m</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>15</id>
|
||||
<name>n</name>
|
||||
<desc>todo</desc>
|
||||
<sub>
|
||||
<pos>
|
||||
<id>16</id>
|
||||
<name>nz</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
</sub>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>17</id>
|
||||
<name>o</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>18</id>
|
||||
<name>p</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>19</id>
|
||||
<name>q</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>20</id>
|
||||
<name>r</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>21</id>
|
||||
<name>s</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>22</id>
|
||||
<name>t</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>23</id>
|
||||
<name>u</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>24</id>
|
||||
<name>v</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>25</id>
|
||||
<name>w</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>26</id>
|
||||
<name>x</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>27</id>
|
||||
<name>y</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>28</id>
|
||||
<name>z</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
</posSet>
|
||||
661
voices/zhitian_emo/dict/PinYin/py2phoneMap.txt
Normal file
661
voices/zhitian_emo/dict/PinYin/py2phoneMap.txt
Normal file
@ -0,0 +1,661 @@
|
||||
a ga a_c
|
||||
ai ga ai_c
|
||||
an ga an_c
|
||||
ang ga ang_c
|
||||
ao ga ao_c
|
||||
ba b_c a_c
|
||||
bai b_c ai_c
|
||||
ban b_c an_c
|
||||
bang b_c ang_c
|
||||
bao b_c ao_c
|
||||
bei b_c ei_c
|
||||
ben b_c en_c
|
||||
beng b_c eng_c
|
||||
bi b_c i_c
|
||||
bian b_c ian_c
|
||||
biao b_c iao_c
|
||||
bie b_c ie_c
|
||||
bin b_c in_c
|
||||
bing b_c ing_c
|
||||
bo b_c o_c
|
||||
bu b_c u_c
|
||||
ca c_c a_c
|
||||
cai c_c ai_c
|
||||
can c_c an_c
|
||||
cang c_c ang_c
|
||||
cao c_c ao_c
|
||||
ce c_c e_c
|
||||
cen c_c en_c
|
||||
ceng c_c eng_c
|
||||
cha ch_c a_c
|
||||
chai ch_c ai_c
|
||||
chan ch_c an_c
|
||||
chang ch_c ang_c
|
||||
chao ch_c ao_c
|
||||
che ch_c e_c
|
||||
chen ch_c en_c
|
||||
cheng ch_c eng_c
|
||||
chi ch_c ih_c
|
||||
chong ch_c ong_c
|
||||
chou ch_c ou_c
|
||||
chu ch_c u_c
|
||||
chua ch_c ua_c
|
||||
chuai ch_c uai_c
|
||||
chuan ch_c uan_c
|
||||
chuang ch_c uang_c
|
||||
chui ch_c uei_c
|
||||
chun ch_c uen_c
|
||||
chuo ch_c uo_c
|
||||
ci c_c ii_c
|
||||
cong c_c ong_c
|
||||
cou c_c ou_c
|
||||
cu c_c u_c
|
||||
cuan c_c uan_c
|
||||
cui c_c uei_c
|
||||
cun c_c uen_c
|
||||
cuo c_c uo_c
|
||||
da d_c a_c
|
||||
dai d_c ai_c
|
||||
dan d_c an_c
|
||||
dang d_c ang_c
|
||||
dao d_c ao_c
|
||||
de d_c e_c
|
||||
dei d_c ei_c
|
||||
den d_c en_c
|
||||
deng d_c eng_c
|
||||
di d_c i_c
|
||||
dia d_c ia_c
|
||||
dian d_c ian_c
|
||||
diao d_c iao_c
|
||||
die d_c ie_c
|
||||
ding d_c ing_c
|
||||
diu d_c iou_c
|
||||
dong d_c ong_c
|
||||
dou d_c ou_c
|
||||
du d_c u_c
|
||||
duan d_c uan_c
|
||||
dui d_c uei_c
|
||||
dun d_c uen_c
|
||||
duo d_c uo_c
|
||||
e ge e_c
|
||||
ei ge ei_c
|
||||
en ge en_c
|
||||
eng ge eng_c
|
||||
er ge er_c
|
||||
fa f_c a_c
|
||||
fan f_c an_c
|
||||
fang f_c ang_c
|
||||
fei f_c ei_c
|
||||
fen f_c en_c
|
||||
feng f_c eng_c
|
||||
fo f_c o_c
|
||||
fou f_c ou_c
|
||||
fu f_c u_c
|
||||
ga g_c a_c
|
||||
gai g_c ai_c
|
||||
gan g_c an_c
|
||||
gang g_c ang_c
|
||||
gao g_c ao_c
|
||||
ge g_c e_c
|
||||
gei g_c ei_c
|
||||
gen g_c en_c
|
||||
geng g_c eng_c
|
||||
gong g_c ong_c
|
||||
gou g_c ou_c
|
||||
gu g_c u_c
|
||||
gua g_c ua_c
|
||||
guai g_c uai_c
|
||||
guan g_c uan_c
|
||||
guang g_c uang_c
|
||||
gui g_c uei_c
|
||||
gun g_c uen_c
|
||||
guo g_c uo_c
|
||||
ha h_c a_c
|
||||
hai h_c ai_c
|
||||
han h_c an_c
|
||||
hang h_c ang_c
|
||||
hao h_c ao_c
|
||||
he h_c e_c
|
||||
hei h_c ei_c
|
||||
hen h_c en_c
|
||||
heng h_c eng_c
|
||||
hong h_c ong_c
|
||||
hou h_c ou_c
|
||||
hu h_c u_c
|
||||
hua h_c ua_c
|
||||
huai h_c uai_c
|
||||
huan h_c uan_c
|
||||
huang h_c uang_c
|
||||
hui h_c uei_c
|
||||
hun h_c uen_c
|
||||
huo h_c uo_c
|
||||
ji j_c i_c
|
||||
jia j_c ia_c
|
||||
jian j_c ian_c
|
||||
jiang j_c iang_c
|
||||
jiao j_c iao_c
|
||||
jie j_c ie_c
|
||||
jin j_c in_c
|
||||
jing j_c ing_c
|
||||
jiong j_c iong_c
|
||||
jiu j_c iou_c
|
||||
jv j_c v_c
|
||||
jvan j_c van_c
|
||||
jve j_c ve_c
|
||||
jvn j_c vn_c
|
||||
ka k_c a_c
|
||||
kai k_c ai_c
|
||||
kan k_c an_c
|
||||
kang k_c ang_c
|
||||
kao k_c ao_c
|
||||
ke k_c e_c
|
||||
kei k_c ei_c
|
||||
ken k_c en_c
|
||||
keng k_c eng_c
|
||||
kong k_c ong_c
|
||||
kou k_c ou_c
|
||||
ku k_c u_c
|
||||
kua k_c ua_c
|
||||
kuai k_c uai_c
|
||||
kuan k_c uan_c
|
||||
kuang k_c uang_c
|
||||
kui k_c uei_c
|
||||
kun k_c uen_c
|
||||
kuo k_c uo_c
|
||||
la l_c a_c
|
||||
lai l_c ai_c
|
||||
lan l_c an_c
|
||||
lang l_c ang_c
|
||||
lao l_c ao_c
|
||||
le l_c e_c
|
||||
lei l_c ei_c
|
||||
leng l_c eng_c
|
||||
li l_c i_c
|
||||
lia l_c ia_c
|
||||
lian l_c ian_c
|
||||
liang l_c iang_c
|
||||
liao l_c iao_c
|
||||
lie l_c ie_c
|
||||
lin l_c in_c
|
||||
ling l_c ing_c
|
||||
liu l_c iou_c
|
||||
lo l_c o_c
|
||||
long l_c ong_c
|
||||
lou l_c ou_c
|
||||
lu l_c u_c
|
||||
luan l_c uan_c
|
||||
lun l_c uen_c
|
||||
luo l_c uo_c
|
||||
lv l_c v_c
|
||||
lve l_c ve_c
|
||||
ma m_c a_c
|
||||
mai m_c ai_c
|
||||
man m_c an_c
|
||||
mang m_c ang_c
|
||||
mao m_c ao_c
|
||||
me m_c e_c
|
||||
mei m_c ei_c
|
||||
men m_c en_c
|
||||
meng m_c eng_c
|
||||
mi m_c i_c
|
||||
mian m_c ian_c
|
||||
miao m_c iao_c
|
||||
mie m_c ie_c
|
||||
min m_c in_c
|
||||
ming m_c ing_c
|
||||
miu m_c iou_c
|
||||
mo m_c o_c
|
||||
mou m_c ou_c
|
||||
mu m_c u_c
|
||||
na n_c a_c
|
||||
nai n_c ai_c
|
||||
nan n_c an_c
|
||||
nang n_c ang_c
|
||||
nao n_c ao_c
|
||||
ne n_c e_c
|
||||
nei n_c ei_c
|
||||
nen n_c en_c
|
||||
neng n_c eng_c
|
||||
ni n_c i_c
|
||||
nian n_c ian_c
|
||||
niang n_c iang_c
|
||||
niao n_c iao_c
|
||||
nie n_c ie_c
|
||||
nin n_c in_c
|
||||
ning n_c ing_c
|
||||
niu n_c iou_c
|
||||
nong n_c ong_c
|
||||
nou n_c ou_c
|
||||
nu n_c u_c
|
||||
nuan n_c uan_c
|
||||
nun n_c uen_c
|
||||
nuo n_c uo_c
|
||||
nv n_c v_c
|
||||
nve n_c ve_c
|
||||
o go o_c
|
||||
ou go ou_c
|
||||
pa p_c a_c
|
||||
pai p_c ai_c
|
||||
pan p_c an_c
|
||||
pang p_c ang_c
|
||||
pao p_c ao_c
|
||||
pei p_c ei_c
|
||||
pen p_c en_c
|
||||
peng p_c eng_c
|
||||
pi p_c i_c
|
||||
pian p_c ian_c
|
||||
piao p_c iao_c
|
||||
pie p_c ie_c
|
||||
pin p_c in_c
|
||||
ping p_c ing_c
|
||||
po p_c o_c
|
||||
pou p_c ou_c
|
||||
pu p_c u_c
|
||||
qi q_c i_c
|
||||
qia q_c ia_c
|
||||
qian q_c ian_c
|
||||
qiang q_c iang_c
|
||||
qiao q_c iao_c
|
||||
qie q_c ie_c
|
||||
qin q_c in_c
|
||||
qing q_c ing_c
|
||||
qiong q_c iong_c
|
||||
qiu q_c iou_c
|
||||
qv q_c v_c
|
||||
qvan q_c van_c
|
||||
qve q_c ve_c
|
||||
qvn q_c vn_c
|
||||
ran r_c an_c
|
||||
rang r_c ang_c
|
||||
rao r_c ao_c
|
||||
re r_c e_c
|
||||
ren r_c en_c
|
||||
reng r_c eng_c
|
||||
ri r_c ih_c
|
||||
rong r_c ong_c
|
||||
rou r_c ou_c
|
||||
ru r_c u_c
|
||||
ruan r_c uan_c
|
||||
rui r_c uei_c
|
||||
run r_c uen_c
|
||||
ruo r_c uo_c
|
||||
sa s_c a_c
|
||||
sai s_c ai_c
|
||||
san s_c an_c
|
||||
sang s_c ang_c
|
||||
sao s_c ao_c
|
||||
se s_c e_c
|
||||
sen s_c en_c
|
||||
seng s_c eng_c
|
||||
sha sh_c a_c
|
||||
shai sh_c ai_c
|
||||
shan sh_c an_c
|
||||
shang sh_c ang_c
|
||||
shao sh_c ao_c
|
||||
she sh_c e_c
|
||||
shei sh_c ei_c
|
||||
shen sh_c en_c
|
||||
sheng sh_c eng_c
|
||||
shi sh_c ih_c
|
||||
shou sh_c ou_c
|
||||
shu sh_c u_c
|
||||
shua sh_c ua_c
|
||||
shuai sh_c uai_c
|
||||
shuan sh_c uan_c
|
||||
shuang sh_c uang_c
|
||||
shui sh_c uei_c
|
||||
shun sh_c uen_c
|
||||
shuo sh_c uo_c
|
||||
si s_c ii_c
|
||||
song s_c ong_c
|
||||
sou s_c ou_c
|
||||
su s_c u_c
|
||||
suan s_c uan_c
|
||||
sui s_c uei_c
|
||||
sun s_c uen_c
|
||||
suo s_c uo_c
|
||||
ta t_c a_c
|
||||
tai t_c ai_c
|
||||
tan t_c an_c
|
||||
tang t_c ang_c
|
||||
tao t_c ao_c
|
||||
te t_c e_c
|
||||
tei t_c ei_c
|
||||
teng t_c eng_c
|
||||
ti t_c i_c
|
||||
tian t_c ian_c
|
||||
tiao t_c iao_c
|
||||
tie t_c ie_c
|
||||
ting t_c ing_c
|
||||
tong t_c ong_c
|
||||
tou t_c ou_c
|
||||
tu t_c u_c
|
||||
tuan t_c uan_c
|
||||
tui t_c uei_c
|
||||
tun t_c uen_c
|
||||
tuo t_c uo_c
|
||||
wa w_c a_c
|
||||
wai w_c ai_c
|
||||
wan w_c an_c
|
||||
wang w_c ang_c
|
||||
wei w_c ei_c
|
||||
wen w_c en_c
|
||||
weng w_c eng_c
|
||||
wo w_c o_c
|
||||
wu w_c u_c
|
||||
xi xx_c i_c
|
||||
xia xx_c ia_c
|
||||
xian xx_c ian_c
|
||||
xiang xx_c iang_c
|
||||
xiao xx_c iao_c
|
||||
xie xx_c ie_c
|
||||
xin xx_c in_c
|
||||
xing xx_c ing_c
|
||||
xiong xx_c iong_c
|
||||
xiu xx_c iou_c
|
||||
xv xx_c v_c
|
||||
xvan xx_c van_c
|
||||
xve xx_c ve_c
|
||||
xvn xx_c vn_c
|
||||
ya y_c a_c
|
||||
yan y_c an_c
|
||||
yang y_c ang_c
|
||||
yao y_c ao_c
|
||||
ye y_c e_c
|
||||
yi y_c i_c
|
||||
yin y_c in_c
|
||||
ying y_c ing_c
|
||||
yo y_c o_c
|
||||
yong y_c ong_c
|
||||
you y_c ou_c
|
||||
yv y_c v_c
|
||||
yvan y_c van_c
|
||||
yve y_c ve_c
|
||||
yvn y_c vn_c
|
||||
za z_c a_c
|
||||
zai z_c ai_c
|
||||
zan z_c an_c
|
||||
zang z_c ang_c
|
||||
zao z_c ao_c
|
||||
ze z_c e_c
|
||||
zei z_c ei_c
|
||||
zen z_c en_c
|
||||
zeng z_c eng_c
|
||||
zha zh_c a_c
|
||||
zhai zh_c ai_c
|
||||
zhan zh_c an_c
|
||||
zhang zh_c ang_c
|
||||
zhao zh_c ao_c
|
||||
zhe zh_c e_c
|
||||
zhei zh_c ei_c
|
||||
zhen zh_c en_c
|
||||
zheng zh_c eng_c
|
||||
zhi zh_c ih_c
|
||||
zhong zh_c ong_c
|
||||
zhou zh_c ou_c
|
||||
zhu zh_c u_c
|
||||
zhua zh_c ua_c
|
||||
zhuai zh_c uai_c
|
||||
zhuan zh_c uan_c
|
||||
zhuang zh_c uang_c
|
||||
zhui zh_c uei_c
|
||||
zhun zh_c uen_c
|
||||
zhuo zh_c uo_c
|
||||
zi z_c ii_c
|
||||
zong z_c ong_c
|
||||
zou z_c ou_c
|
||||
zu z_c u_c
|
||||
zuan z_c uan_c
|
||||
zui z_c uei_c
|
||||
zun z_c uen_c
|
||||
zuo z_c uo_c
|
||||
bangr b_c angr_c
|
||||
banr b_c anr_c
|
||||
baor b_c aor_c
|
||||
bar b_c ar_c
|
||||
beir b_c eir_c
|
||||
bengr b_c engr_c
|
||||
benr b_c enr_c
|
||||
bianr b_c ianr_c
|
||||
biaor b_c iaor_c
|
||||
bingr b_c ingr_c
|
||||
bir b_c ir_c
|
||||
bor b_c or_c
|
||||
bur b_c ur_c
|
||||
caor c_c aor_c
|
||||
car c_c ar_c
|
||||
changr ch_c angr_c
|
||||
chaor ch_c aor_c
|
||||
char ch_c ar_c
|
||||
chengr ch_c engr_c
|
||||
cher ch_c er_c
|
||||
chir ch_c ihr_c
|
||||
chongr ch_c ongr_c
|
||||
chour ch_c our_c
|
||||
chuangr ch_c uangr_c
|
||||
chuanr ch_c uanr_c
|
||||
chuir ch_c ueir_c
|
||||
chunr ch_c uenr_c
|
||||
chuor ch_c uor_c
|
||||
chur ch_c ur_c
|
||||
cir c_c iir_c
|
||||
congr c_c ongr_c
|
||||
cuir c_c ueir_c
|
||||
cunr c_c uenr_c
|
||||
cuor c_c uor_c
|
||||
dair d_c air_c
|
||||
danr d_c anr_c
|
||||
dangr d_c angr_c
|
||||
daor d_c aor_c
|
||||
dengr d_c engr_c
|
||||
dianr d_c ianr_c
|
||||
diaor d_c iaor_c
|
||||
dier d_c ier_c
|
||||
dingr d_c ingr_c
|
||||
dir d_c ir_c
|
||||
dongr d_c ongr_c
|
||||
dour d_c our_c
|
||||
duanr d_c uanr_c
|
||||
duir d_c ueir_c
|
||||
dunr d_c uenr_c
|
||||
duor d_c uor_c
|
||||
dur d_c ur_c
|
||||
fangr f_c angr_c
|
||||
fanr f_c anr_c
|
||||
far f_c ar_c
|
||||
fengr f_c engr_c
|
||||
fenr f_c enr_c
|
||||
fur f_c ur_c
|
||||
gair g_c air_c
|
||||
ganr g_c anr_c
|
||||
gaor g_c aor_c
|
||||
gengr g_c engr_c
|
||||
genr g_c enr_c
|
||||
ger g_c er_c
|
||||
gongr g_c ongr_c
|
||||
gour g_c our_c
|
||||
guair g_c uair_c
|
||||
guanr g_c uanr_c
|
||||
guar g_c uar_c
|
||||
guir g_c ueir_c
|
||||
gunr g_c uenr_c
|
||||
guor g_c uor_c
|
||||
gur g_c ur_c
|
||||
hair h_c air_c
|
||||
hanr h_c anr_c
|
||||
haor h_c aor_c
|
||||
heir h_c eir_c
|
||||
her h_c er_c
|
||||
hour h_c our_c
|
||||
huanr h_c uanr_c
|
||||
huangr h_c uangr_c
|
||||
huar h_c uar_c
|
||||
huir h_c ueir_c
|
||||
hunr h_c uenr_c
|
||||
huor h_c uor_c
|
||||
hur h_c ur_c
|
||||
jianr j_c ianr_c
|
||||
jiaor j_c iaor_c
|
||||
jiar j_c iar_c
|
||||
jier j_c ier_c
|
||||
jingr j_c ingr_c
|
||||
jinr j_c inr_c
|
||||
jir j_c ir_c
|
||||
jiur j_c iour_c
|
||||
jvanr j_c vanr_c
|
||||
jver j_c ver_c
|
||||
jvnr j_c vnr_c
|
||||
kair k_c air_c
|
||||
kanr k_c anr_c
|
||||
kaor k_c aor_c
|
||||
kengr k_c engr_c
|
||||
ker k_c er_c
|
||||
kongr k_c ongr_c
|
||||
kour k_c our_c
|
||||
kuair k_c uair_c
|
||||
kuangr k_c uangr_c
|
||||
kuanr k_c uanr_c
|
||||
kunr k_c uenr_c
|
||||
lanr l_c anr_c
|
||||
laor l_c aor_c
|
||||
lar l_c ar_c
|
||||
leir l_c eir_c
|
||||
lengr l_c engr_c
|
||||
ler l_c er_c
|
||||
liangr l_c iangr_c
|
||||
lianr l_c ianr_c
|
||||
liaor l_c iaor_c
|
||||
liar l_c iar_c
|
||||
lingr l_c ingr_c
|
||||
lir l_c ir_c
|
||||
liur l_c iour_c
|
||||
lour l_c our_c
|
||||
luor l_c uor_c
|
||||
lunr l_c uenr_c
|
||||
lur l_c ur_c
|
||||
lvr l_c vr_c
|
||||
mair m_c air_c
|
||||
manr m_c anr_c
|
||||
mangr m_c angr_c
|
||||
maor m_c aor_c
|
||||
mar m_c ar_c
|
||||
meir m_c eir_c
|
||||
menr m_c enr_c
|
||||
mianr m_c ianr_c
|
||||
miaor m_c iaor_c
|
||||
mingr m_c ingr_c
|
||||
mir m_c ir_c
|
||||
mor m_c or_c
|
||||
naor n_c aor_c
|
||||
nar n_c ar_c
|
||||
niangr n_c iangr_c
|
||||
nianr n_c ianr_c
|
||||
niaor n_c iaor_c
|
||||
ningr n_c ingr_c
|
||||
nir n_c ir_c
|
||||
niur n_c iour_c
|
||||
nvr n_c vr_c
|
||||
pair p_c air_c
|
||||
pangr p_c angr_c
|
||||
panr p_c anr_c
|
||||
paor p_c aor_c
|
||||
penr p_c enr_c
|
||||
pianr p_c ianr_c
|
||||
piaor p_c iaor_c
|
||||
pier p_c ier_c
|
||||
pingr p_c ingr_c
|
||||
pir p_c ir_c
|
||||
por p_c or_c
|
||||
pur p_c ur_c
|
||||
qianr q_c ianr_c
|
||||
qiaor q_c iaor_c
|
||||
qingr q_c ingr_c
|
||||
qir q_c ir_c
|
||||
qiur q_c iour_c
|
||||
qvanr q_c vanr_c
|
||||
qvnr q_c vnr_c
|
||||
qvr q_c vr_c
|
||||
sar s_c ar_c
|
||||
rangr r_c angr_c
|
||||
renr r_c enr_c
|
||||
sair s_c air_c
|
||||
sanr s_c anr_c
|
||||
shair sh_c air_c
|
||||
shaor sh_c aor_c
|
||||
shengr sh_c engr_c
|
||||
shenr sh_c enr_c
|
||||
shir sh_c ihr_c
|
||||
shuair sh_c uair_c
|
||||
shour sh_c our_c
|
||||
shuar sh_c uar_c
|
||||
shuir sh_c ueir_c
|
||||
shunr sh_c uenr_c
|
||||
shuor sh_c uor_c
|
||||
shur sh_c ur_c
|
||||
sir s_c iir_c
|
||||
suir s_c ueir_c
|
||||
sunr s_c uenr_c
|
||||
tair t_c air_c
|
||||
tangr t_c angr_c
|
||||
tanr t_c anr_c
|
||||
taor t_c aor_c
|
||||
ter t_c er_c
|
||||
tianr t_c ianr_c
|
||||
tiaor t_c iaor_c
|
||||
tir t_c ir_c
|
||||
tingr t_c ingr_c
|
||||
tongr t_c ongr_c
|
||||
tour t_c our_c
|
||||
tuanr t_c uanr_c
|
||||
tuir t_c ueir_c
|
||||
tuor t_c uor_c
|
||||
tur t_c ur_c
|
||||
wanr w_c anr_c
|
||||
war w_c ar_c
|
||||
weir w_c eir_c
|
||||
wenr w_c enr_c
|
||||
wengr w_c engr_c
|
||||
wor w_c or_c
|
||||
wur w_c ur_c
|
||||
xiangr xx_c iangr_c
|
||||
xianr xx_c ianr_c
|
||||
xiar xx_c iar_c
|
||||
xier xx_c ier_c
|
||||
xingr xx_c ingr_c
|
||||
xir xx_c ir_c
|
||||
xinr xx_c inr_c
|
||||
xiongr xx_c iongr_c
|
||||
xiur xx_c iour_c
|
||||
yangr y_c angr_c
|
||||
yanr y_c anr_c
|
||||
yaor y_c aor_c
|
||||
yar y_c ar_c
|
||||
yer y_c er_c
|
||||
yingr y_c ingr_c
|
||||
yinr y_c inr_c
|
||||
yir y_c ir_c
|
||||
your y_c our_c
|
||||
yvanr y_c vanr_c
|
||||
zair z_c air_c
|
||||
yvr y_c vr_c
|
||||
yver y_c ver_c
|
||||
zaor z_c aor_c
|
||||
zar z_c ar_c
|
||||
zhangr zh_c angr_c
|
||||
zhanr zh_c anr_c
|
||||
zhaor zh_c aor_c
|
||||
zhar zh_c ar_c
|
||||
zhenr zh_c enr_c
|
||||
zher zh_c er_c
|
||||
zhir zh_c ihr_c
|
||||
zhongr zh_c ongr_c
|
||||
zhour zh_c our_c
|
||||
zhuar zh_c uar_c
|
||||
zhuanr zh_c uanr_c
|
||||
zhunr zh_c uenr_c
|
||||
zhuor zh_c uor_c
|
||||
zhur zh_c ur_c
|
||||
zir z_c iir_c
|
||||
zuanr z_c uanr_c
|
||||
zuir z_c ueir_c
|
||||
zuor z_c uor_c
|
||||
7
voices/zhitian_emo/dict/PinYin/tonelist.txt
Normal file
7
voices/zhitian_emo/dict/PinYin/tonelist.txt
Normal file
@ -0,0 +1,7 @@
|
||||
1
|
||||
|
||||
4
|
||||
2
|
||||
3
|
||||
5
|
||||
0
|
||||
33
voices/zhitian_emo/dict/emo_category_dict.txt
Executable file
33
voices/zhitian_emo/dict/emo_category_dict.txt
Executable file
@ -0,0 +1,33 @@
|
||||
emotion_none
|
||||
emotion_neutral
|
||||
emotion_angry
|
||||
emotion_disgust
|
||||
emotion_fear
|
||||
emotion_happy
|
||||
emotion_sad
|
||||
emotion_surprise
|
||||
emotion_calm
|
||||
emotion_gentle
|
||||
emotion_relax
|
||||
emotion_lyrical
|
||||
emotion_serious
|
||||
emotion_disgruntled
|
||||
emotion_satisfied
|
||||
emotion_disappointed
|
||||
emotion_excited
|
||||
emotion_anxiety
|
||||
emotion_jealousy
|
||||
emotion_hate
|
||||
emotion_pity
|
||||
emotion_pleasure
|
||||
emotion_arousal
|
||||
emotion_dominance
|
||||
emotion_placeholder1
|
||||
emotion_placeholder2
|
||||
emotion_placeholder3
|
||||
emotion_placeholder4
|
||||
emotion_placeholder5
|
||||
emotion_placeholder6
|
||||
emotion_placeholder7
|
||||
emotion_placeholder8
|
||||
emotion_placeholder9
|
||||
6
voices/zhitian_emo/dict/speaker_dict.txt
Executable file
6
voices/zhitian_emo/dict/speaker_dict.txt
Executable file
@ -0,0 +1,6 @@
|
||||
F7
|
||||
F74
|
||||
FBYN
|
||||
FRXL
|
||||
M7
|
||||
xiaoyu
|
||||
144
voices/zhitian_emo/dict/sy_dict.txt
Executable file
144
voices/zhitian_emo/dict/sy_dict.txt
Executable file
@ -0,0 +1,144 @@
|
||||
a_c
|
||||
ai_c
|
||||
an_c
|
||||
ang_c
|
||||
ao_c
|
||||
b_c
|
||||
c_c
|
||||
ch_c
|
||||
d_c
|
||||
e_c
|
||||
ei_c
|
||||
en_c
|
||||
eng_c
|
||||
er_c
|
||||
f_c
|
||||
g_c
|
||||
h_c
|
||||
i_c
|
||||
ia_c
|
||||
ian_c
|
||||
iang_c
|
||||
iao_c
|
||||
ie_c
|
||||
ih_c
|
||||
ii_c
|
||||
in_c
|
||||
ing_c
|
||||
io_c
|
||||
iong_c
|
||||
iou_c
|
||||
j_c
|
||||
k_c
|
||||
l_c
|
||||
m_c
|
||||
n_c
|
||||
o_c
|
||||
ong_c
|
||||
ou_c
|
||||
p_c
|
||||
q_c
|
||||
r_c
|
||||
s_c
|
||||
sh_c
|
||||
t_c
|
||||
u_c
|
||||
ua_c
|
||||
uai_c
|
||||
uan_c
|
||||
uang_c
|
||||
uei_c
|
||||
uen_c
|
||||
ueng_c
|
||||
uo_c
|
||||
v_c
|
||||
van_c
|
||||
ve_c
|
||||
vn_c
|
||||
xx_c
|
||||
z_c
|
||||
zh_c
|
||||
w_c
|
||||
y_c
|
||||
ga
|
||||
ge
|
||||
go
|
||||
aa
|
||||
ae
|
||||
ah
|
||||
ao
|
||||
aw
|
||||
ay
|
||||
b
|
||||
ch
|
||||
d
|
||||
dh
|
||||
eh
|
||||
er
|
||||
ey
|
||||
f
|
||||
g
|
||||
hh
|
||||
ih
|
||||
iy
|
||||
jh
|
||||
k
|
||||
l
|
||||
m
|
||||
n
|
||||
ng
|
||||
ow
|
||||
oy
|
||||
p
|
||||
r
|
||||
s
|
||||
sh
|
||||
t
|
||||
th
|
||||
uh
|
||||
uw
|
||||
v
|
||||
w
|
||||
y
|
||||
z
|
||||
zh
|
||||
air_c
|
||||
angr_c
|
||||
anr_c
|
||||
aor_c
|
||||
ar_c
|
||||
eir_c
|
||||
engr_c
|
||||
enr_c
|
||||
iangr_c
|
||||
ianr_c
|
||||
iaor_c
|
||||
iar_c
|
||||
ier_c
|
||||
ihr_c
|
||||
iir_c
|
||||
ingr_c
|
||||
inr_c
|
||||
iongr_c
|
||||
iour_c
|
||||
ir_c
|
||||
ongr_c
|
||||
or_c
|
||||
our_c
|
||||
uair_c
|
||||
uangr_c
|
||||
uanr_c
|
||||
uar_c
|
||||
ueir_c
|
||||
uenr_c
|
||||
uor_c
|
||||
ur_c
|
||||
vanr_c
|
||||
ver_c
|
||||
vnr_c
|
||||
vr_c
|
||||
pau
|
||||
#1
|
||||
#2
|
||||
#3
|
||||
#4
|
||||
5
voices/zhitian_emo/dict/syllable_flag_dict.txt
Executable file
5
voices/zhitian_emo/dict/syllable_flag_dict.txt
Executable file
@ -0,0 +1,5 @@
|
||||
s_begin
|
||||
s_end
|
||||
s_none
|
||||
s_both
|
||||
s_middle
|
||||
7
voices/zhitian_emo/dict/tone_dict.txt
Executable file
7
voices/zhitian_emo/dict/tone_dict.txt
Executable file
@ -0,0 +1,7 @@
|
||||
tone1
|
||||
tone_none
|
||||
tone4
|
||||
tone2
|
||||
tone3
|
||||
tone5
|
||||
tone0
|
||||
5
voices/zhitian_emo/dict/word_segment_dict.txt
Executable file
5
voices/zhitian_emo/dict/word_segment_dict.txt
Executable file
@ -0,0 +1,5 @@
|
||||
word_begin
|
||||
word_end
|
||||
word_middle
|
||||
word_both
|
||||
word_none
|
||||
BIN
voices/zhitian_emo/voc/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
BIN
voices/zhitian_emo/voc/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
voices/zhitian_emo/voc/ckpt/checkpoint_1.pth
(Stored with Git LFS)
Normal file
BIN
voices/zhitian_emo/voc/ckpt/checkpoint_1.pth
(Stored with Git LFS)
Normal file
Binary file not shown.
188
voices/zhitian_emo/voc/config.yaml
Normal file
188
voices/zhitian_emo/voc/config.yaml
Normal file
@ -0,0 +1,188 @@
|
||||
model_type: hifigan
|
||||
Model:
|
||||
###########################################################
|
||||
# GENERATOR NETWORK ARCHITECTURE SETTING #
|
||||
###########################################################
|
||||
Generator:
|
||||
params:
|
||||
in_channels: 80
|
||||
out_channels: 1
|
||||
channels: 256
|
||||
kernel_size: 7
|
||||
upsample_scales: [10, 5, 2, 2]
|
||||
upsample_kernal_sizes: [20, 11, 4, 4]
|
||||
resblock_kernel_sizes: [3, 7, 11]
|
||||
resblock_dilations:
|
||||
- [1, 3, 5, 7]
|
||||
- [1, 3, 5, 7]
|
||||
- [1, 3, 5, 7]
|
||||
bias: true
|
||||
causal: true
|
||||
nonlinear_activation: "LeakyReLU"
|
||||
nonlinear_activation_params:
|
||||
negative_slope: 0.1
|
||||
use_weight_norm: true
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 2.0e-4
|
||||
betas: [0.5, 0.9]
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: MultiStepLR
|
||||
params:
|
||||
gamma: 0.5
|
||||
milestones:
|
||||
- 200000
|
||||
- 400000
|
||||
- 600000
|
||||
- 800000
|
||||
|
||||
###########################################################
|
||||
# DISCRIMINATOR NETWORK ARCHITECTURE SETTING #
|
||||
###########################################################
|
||||
MultiScaleDiscriminator:
|
||||
params:
|
||||
scales: 3
|
||||
downsample_pooling: "DWT"
|
||||
downsample_pooling_params:
|
||||
kernel_size: 4
|
||||
stride: 2
|
||||
padding: 2
|
||||
discriminator_params:
|
||||
in_channels: 1
|
||||
out_channels: 1
|
||||
kernel_sizes: [15, 41, 5, 3]
|
||||
channels: 128
|
||||
max_downsample_channels: 1024
|
||||
max_groups: 16
|
||||
bias: true
|
||||
downsample_scales: [4, 4, 4, 4, 1]
|
||||
nonlinear_activation: "LeakyReLU"
|
||||
nonlinear_activation_params:
|
||||
negative_slope: 0.1
|
||||
follow_official_norm: true
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 2.0e-4
|
||||
betas: [0.5, 0.9]
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: MultiStepLR
|
||||
params:
|
||||
gamma: 0.5
|
||||
milestones:
|
||||
- 200000
|
||||
- 400000
|
||||
- 600000
|
||||
- 800000
|
||||
|
||||
MultiPeriodDiscriminator:
|
||||
params:
|
||||
periods: [2, 3, 5, 7, 11]
|
||||
discriminator_params:
|
||||
in_channels: 1
|
||||
out_channels: 1
|
||||
kernel_sizes: [5, 3]
|
||||
channels: 32
|
||||
downsample_scales: [3, 3, 3, 3, 1]
|
||||
max_downsample_channels: 1024
|
||||
bias: true
|
||||
nonlinear_activation: "LeakyReLU"
|
||||
nonlinear_activation_params:
|
||||
negative_slope: 0.1
|
||||
use_spectral_norm: false
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 2.0e-4
|
||||
betas: [0.5, 0.9]
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: MultiStepLR
|
||||
params:
|
||||
gamma: 0.5
|
||||
milestones:
|
||||
- 200000
|
||||
- 400000
|
||||
- 600000
|
||||
- 800000
|
||||
|
||||
####################################################
|
||||
# LOSS SETTING #
|
||||
####################################################
|
||||
Loss:
|
||||
generator_adv_loss:
|
||||
enable: True
|
||||
params:
|
||||
average_by_discriminators: False
|
||||
weights: 1.0
|
||||
|
||||
discriminator_adv_loss:
|
||||
enable: True
|
||||
params:
|
||||
average_by_discriminators: False
|
||||
weights: 1.0
|
||||
|
||||
stft_loss:
|
||||
enable: False # Whether to use multi-resolution STFT loss.
|
||||
|
||||
mel_loss:
|
||||
enable: True
|
||||
params:
|
||||
fs: 16000
|
||||
fft_size: 2048
|
||||
hop_size: 200
|
||||
win_length: 1000
|
||||
window: "hann"
|
||||
num_mels: 80
|
||||
fmin: 0
|
||||
fmax: 8000
|
||||
log_base: null
|
||||
weights: 45.0
|
||||
|
||||
subband_stft_loss:
|
||||
enable: False
|
||||
params:
|
||||
fft_sizes: [384, 683, 171] # List of FFT size for STFT-based loss.
|
||||
hop_sizes: [35, 75, 15] # List of hop size for STFT-based loss
|
||||
win_lengths: [150, 300, 60] # List of window length for STFT-based loss.
|
||||
window: "hann_window" # Window function for STFT-based loss
|
||||
|
||||
feat_match_loss:
|
||||
enable: True
|
||||
params:
|
||||
average_by_discriminators: false
|
||||
average_by_layers: false
|
||||
weights: 2.0
|
||||
|
||||
|
||||
###########################################################
|
||||
# DATA LOADER SETTING #
|
||||
###########################################################
|
||||
batch_size: 16
|
||||
batch_max_steps: 9600 # Length of each audio in batch. Make sure dividable by hop_size.
|
||||
pin_memory: True
|
||||
num_workers: 2 # FIXME: set > 0 may stuck on macos
|
||||
remove_short_samples: False
|
||||
allow_cache: True
|
||||
|
||||
generator_grad_norm: -1
|
||||
|
||||
discriminator_grad_norm: -1
|
||||
|
||||
###########################################################
|
||||
# INTERVAL SETTING #
|
||||
###########################################################
|
||||
generator_train_start_steps: 1 # Number of steps to start to train discriminator.
|
||||
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
|
||||
train_max_steps: 2500000 # Number of training steps.
|
||||
save_interval_steps: 20000 # Interval steps to save checkpoint.
|
||||
eval_interval_steps: 10000 # Interval steps to evaluate the network.
|
||||
log_interval_steps: 1000 # Interval steps to record the training log.
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.
|
||||
BIN
voices/zhitian_emo/vocoder/pytorch_model.bin
(Stored with Git LFS)
Normal file
BIN
voices/zhitian_emo/vocoder/pytorch_model.bin
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
voices/zhiyan_emo/am/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
BIN
voices/zhiyan_emo/am/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
Binary file not shown.
105
voices/zhiyan_emo/am/config.yaml
Normal file
105
voices/zhiyan_emo/am/config.yaml
Normal file
@ -0,0 +1,105 @@
|
||||
model_type: sambert
|
||||
Model:
|
||||
#########################################################
|
||||
# SAMBERT NETWORK ARCHITECTURE SETTING #
|
||||
#########################################################
|
||||
KanTtsSAMBERT:
|
||||
params:
|
||||
max_len: 800
|
||||
|
||||
embedding_dim: 512
|
||||
encoder_num_layers: 8
|
||||
encoder_num_heads: 8
|
||||
encoder_num_units: 128
|
||||
encoder_ffn_inner_dim: 1024
|
||||
encoder_dropout: 0.1
|
||||
encoder_attention_dropout: 0.1
|
||||
encoder_relu_dropout: 0.1
|
||||
encoder_projection_units: 32
|
||||
|
||||
speaker_units: 32
|
||||
emotion_units: 32
|
||||
|
||||
predictor_filter_size: 41
|
||||
predictor_fsmn_num_layers: 3
|
||||
predictor_num_memory_units: 128
|
||||
predictor_ffn_inner_dim: 256
|
||||
predictor_dropout: 0.1
|
||||
predictor_shift: 0
|
||||
predictor_lstm_units: 128
|
||||
dur_pred_prenet_units: [128, 128]
|
||||
dur_pred_lstm_units: 128
|
||||
|
||||
decoder_prenet_units: [256, 256]
|
||||
decoder_num_layers: 12
|
||||
decoder_num_heads: 8
|
||||
decoder_num_units: 128
|
||||
decoder_ffn_inner_dim: 1024
|
||||
decoder_dropout: 0.1
|
||||
decoder_attention_dropout: 0.1
|
||||
decoder_relu_dropout: 0.1
|
||||
|
||||
outputs_per_step: 3
|
||||
num_mels: 80
|
||||
|
||||
postnet_filter_size: 41
|
||||
postnet_fsmn_num_layers: 4
|
||||
postnet_num_memory_units: 256
|
||||
postnet_ffn_inner_dim: 512
|
||||
postnet_dropout: 0.1
|
||||
postnet_shift: 17
|
||||
postnet_lstm_units: 128
|
||||
MAS: False
|
||||
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 0.001
|
||||
betas: [0.9, 0.98]
|
||||
eps: 1.0e-9
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: NoamLR
|
||||
params:
|
||||
warmup_steps: 4000
|
||||
|
||||
linguistic_unit:
|
||||
cleaners: english_cleaners
|
||||
lfeat_type_list: sy,tone,syllable_flag,word_segment,emo_category,speaker_category
|
||||
speaker_list: F7,F74,FBYN,FRXL,M7,xiaoyu
|
||||
####################################################
|
||||
# LOSS SETTING #
|
||||
####################################################
|
||||
Loss:
|
||||
MelReconLoss:
|
||||
enable: True
|
||||
params:
|
||||
loss_type: mae
|
||||
|
||||
ProsodyReconLoss:
|
||||
enable: True
|
||||
params:
|
||||
loss_type: mae
|
||||
|
||||
###########################################################
|
||||
# DATA LOADER SETTING #
|
||||
###########################################################
|
||||
batch_size: 32
|
||||
pin_memory: False
|
||||
num_workers: 4 # FIXME: set > 0 may stuck on macos
|
||||
remove_short_samples: False
|
||||
allow_cache: True
|
||||
grad_norm: 1.0
|
||||
|
||||
###########################################################
|
||||
# INTERVAL SETTING #
|
||||
###########################################################
|
||||
train_max_steps: 1000000 # Number of training steps.
|
||||
save_interval_steps: 20000 # Interval steps to save checkpoint.
|
||||
eval_interval_steps: 10000 # Interval steps to evaluate the network.
|
||||
log_interval_steps: 1000 # Interval steps to record the training log.
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.
|
||||
BIN
voices/zhiyan_emo/am/pytorch_model.bin
(Stored with Git LFS)
Normal file
BIN
voices/zhiyan_emo/am/pytorch_model.bin
(Stored with Git LFS)
Normal file
Binary file not shown.
27
voices/zhiyan_emo/audio_config.yaml
Normal file
27
voices/zhiyan_emo/audio_config.yaml
Normal file
@ -0,0 +1,27 @@
|
||||
# Audio processing configs
|
||||
|
||||
audio_config:
|
||||
# Preprocess
|
||||
wav_normalize: True
|
||||
trim_silence: True
|
||||
trim_silence_threshold_db: 60
|
||||
preemphasize: False
|
||||
|
||||
# Feature extraction
|
||||
sampling_rate: 16000
|
||||
hop_length: 200
|
||||
win_length: 1000
|
||||
n_fft: 2048
|
||||
n_mels: 80
|
||||
fmin: 0.0
|
||||
fmax: 8000.0
|
||||
phone_level_feature: True
|
||||
|
||||
# Normalization
|
||||
norm_type: "mean_std" # "mean_std" or "global"
|
||||
max_norm: 1.0
|
||||
symmetric: False
|
||||
min_level_db: -100.0
|
||||
ref_level_db: 20
|
||||
|
||||
num_workers: 16
|
||||
2
voices/zhiyan_emo/dict/PinYin/En2ChPhoneMap.txt
Normal file
2
voices/zhiyan_emo/dict/PinYin/En2ChPhoneMap.txt
Normal file
@ -0,0 +1,2 @@
|
||||
wu w
|
||||
yi y
|
||||
1263
voices/zhiyan_emo/dict/PinYin/PhoneSet.xml
Normal file
1263
voices/zhiyan_emo/dict/PinYin/PhoneSet.xml
Normal file
File diff suppressed because it is too large
Load Diff
147
voices/zhiyan_emo/dict/PinYin/PosSet.xml
Normal file
147
voices/zhiyan_emo/dict/PinYin/PosSet.xml
Normal file
@ -0,0 +1,147 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<posSet xmlns="http://schemas.alibaba-inc.com/tts">
|
||||
<pos>
|
||||
<id>1</id>
|
||||
<name>a</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>2</id>
|
||||
<name>b</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>3</id>
|
||||
<name>c</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>4</id>
|
||||
<name>d</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>5</id>
|
||||
<name>e</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>6</id>
|
||||
<name>f</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>7</id>
|
||||
<name>g</name>
|
||||
<desc>todo</desc>
|
||||
<sub>
|
||||
<pos>
|
||||
<id>8</id>
|
||||
<name>gb</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
</sub>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>9</id>
|
||||
<name>h</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>10</id>
|
||||
<name>i</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>11</id>
|
||||
<name>j</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>12</id>
|
||||
<name>k</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>13</id>
|
||||
<name>l</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>14</id>
|
||||
<name>m</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>15</id>
|
||||
<name>n</name>
|
||||
<desc>todo</desc>
|
||||
<sub>
|
||||
<pos>
|
||||
<id>16</id>
|
||||
<name>nz</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
</sub>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>17</id>
|
||||
<name>o</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>18</id>
|
||||
<name>p</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>19</id>
|
||||
<name>q</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>20</id>
|
||||
<name>r</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>21</id>
|
||||
<name>s</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>22</id>
|
||||
<name>t</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>23</id>
|
||||
<name>u</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>24</id>
|
||||
<name>v</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>25</id>
|
||||
<name>w</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>26</id>
|
||||
<name>x</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>27</id>
|
||||
<name>y</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>28</id>
|
||||
<name>z</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
</posSet>
|
||||
661
voices/zhiyan_emo/dict/PinYin/py2phoneMap.txt
Normal file
661
voices/zhiyan_emo/dict/PinYin/py2phoneMap.txt
Normal file
@ -0,0 +1,661 @@
|
||||
a ga a_c
|
||||
ai ga ai_c
|
||||
an ga an_c
|
||||
ang ga ang_c
|
||||
ao ga ao_c
|
||||
ba b_c a_c
|
||||
bai b_c ai_c
|
||||
ban b_c an_c
|
||||
bang b_c ang_c
|
||||
bao b_c ao_c
|
||||
bei b_c ei_c
|
||||
ben b_c en_c
|
||||
beng b_c eng_c
|
||||
bi b_c i_c
|
||||
bian b_c ian_c
|
||||
biao b_c iao_c
|
||||
bie b_c ie_c
|
||||
bin b_c in_c
|
||||
bing b_c ing_c
|
||||
bo b_c o_c
|
||||
bu b_c u_c
|
||||
ca c_c a_c
|
||||
cai c_c ai_c
|
||||
can c_c an_c
|
||||
cang c_c ang_c
|
||||
cao c_c ao_c
|
||||
ce c_c e_c
|
||||
cen c_c en_c
|
||||
ceng c_c eng_c
|
||||
cha ch_c a_c
|
||||
chai ch_c ai_c
|
||||
chan ch_c an_c
|
||||
chang ch_c ang_c
|
||||
chao ch_c ao_c
|
||||
che ch_c e_c
|
||||
chen ch_c en_c
|
||||
cheng ch_c eng_c
|
||||
chi ch_c ih_c
|
||||
chong ch_c ong_c
|
||||
chou ch_c ou_c
|
||||
chu ch_c u_c
|
||||
chua ch_c ua_c
|
||||
chuai ch_c uai_c
|
||||
chuan ch_c uan_c
|
||||
chuang ch_c uang_c
|
||||
chui ch_c uei_c
|
||||
chun ch_c uen_c
|
||||
chuo ch_c uo_c
|
||||
ci c_c ii_c
|
||||
cong c_c ong_c
|
||||
cou c_c ou_c
|
||||
cu c_c u_c
|
||||
cuan c_c uan_c
|
||||
cui c_c uei_c
|
||||
cun c_c uen_c
|
||||
cuo c_c uo_c
|
||||
da d_c a_c
|
||||
dai d_c ai_c
|
||||
dan d_c an_c
|
||||
dang d_c ang_c
|
||||
dao d_c ao_c
|
||||
de d_c e_c
|
||||
dei d_c ei_c
|
||||
den d_c en_c
|
||||
deng d_c eng_c
|
||||
di d_c i_c
|
||||
dia d_c ia_c
|
||||
dian d_c ian_c
|
||||
diao d_c iao_c
|
||||
die d_c ie_c
|
||||
ding d_c ing_c
|
||||
diu d_c iou_c
|
||||
dong d_c ong_c
|
||||
dou d_c ou_c
|
||||
du d_c u_c
|
||||
duan d_c uan_c
|
||||
dui d_c uei_c
|
||||
dun d_c uen_c
|
||||
duo d_c uo_c
|
||||
e ge e_c
|
||||
ei ge ei_c
|
||||
en ge en_c
|
||||
eng ge eng_c
|
||||
er ge er_c
|
||||
fa f_c a_c
|
||||
fan f_c an_c
|
||||
fang f_c ang_c
|
||||
fei f_c ei_c
|
||||
fen f_c en_c
|
||||
feng f_c eng_c
|
||||
fo f_c o_c
|
||||
fou f_c ou_c
|
||||
fu f_c u_c
|
||||
ga g_c a_c
|
||||
gai g_c ai_c
|
||||
gan g_c an_c
|
||||
gang g_c ang_c
|
||||
gao g_c ao_c
|
||||
ge g_c e_c
|
||||
gei g_c ei_c
|
||||
gen g_c en_c
|
||||
geng g_c eng_c
|
||||
gong g_c ong_c
|
||||
gou g_c ou_c
|
||||
gu g_c u_c
|
||||
gua g_c ua_c
|
||||
guai g_c uai_c
|
||||
guan g_c uan_c
|
||||
guang g_c uang_c
|
||||
gui g_c uei_c
|
||||
gun g_c uen_c
|
||||
guo g_c uo_c
|
||||
ha h_c a_c
|
||||
hai h_c ai_c
|
||||
han h_c an_c
|
||||
hang h_c ang_c
|
||||
hao h_c ao_c
|
||||
he h_c e_c
|
||||
hei h_c ei_c
|
||||
hen h_c en_c
|
||||
heng h_c eng_c
|
||||
hong h_c ong_c
|
||||
hou h_c ou_c
|
||||
hu h_c u_c
|
||||
hua h_c ua_c
|
||||
huai h_c uai_c
|
||||
huan h_c uan_c
|
||||
huang h_c uang_c
|
||||
hui h_c uei_c
|
||||
hun h_c uen_c
|
||||
huo h_c uo_c
|
||||
ji j_c i_c
|
||||
jia j_c ia_c
|
||||
jian j_c ian_c
|
||||
jiang j_c iang_c
|
||||
jiao j_c iao_c
|
||||
jie j_c ie_c
|
||||
jin j_c in_c
|
||||
jing j_c ing_c
|
||||
jiong j_c iong_c
|
||||
jiu j_c iou_c
|
||||
jv j_c v_c
|
||||
jvan j_c van_c
|
||||
jve j_c ve_c
|
||||
jvn j_c vn_c
|
||||
ka k_c a_c
|
||||
kai k_c ai_c
|
||||
kan k_c an_c
|
||||
kang k_c ang_c
|
||||
kao k_c ao_c
|
||||
ke k_c e_c
|
||||
kei k_c ei_c
|
||||
ken k_c en_c
|
||||
keng k_c eng_c
|
||||
kong k_c ong_c
|
||||
kou k_c ou_c
|
||||
ku k_c u_c
|
||||
kua k_c ua_c
|
||||
kuai k_c uai_c
|
||||
kuan k_c uan_c
|
||||
kuang k_c uang_c
|
||||
kui k_c uei_c
|
||||
kun k_c uen_c
|
||||
kuo k_c uo_c
|
||||
la l_c a_c
|
||||
lai l_c ai_c
|
||||
lan l_c an_c
|
||||
lang l_c ang_c
|
||||
lao l_c ao_c
|
||||
le l_c e_c
|
||||
lei l_c ei_c
|
||||
leng l_c eng_c
|
||||
li l_c i_c
|
||||
lia l_c ia_c
|
||||
lian l_c ian_c
|
||||
liang l_c iang_c
|
||||
liao l_c iao_c
|
||||
lie l_c ie_c
|
||||
lin l_c in_c
|
||||
ling l_c ing_c
|
||||
liu l_c iou_c
|
||||
lo l_c o_c
|
||||
long l_c ong_c
|
||||
lou l_c ou_c
|
||||
lu l_c u_c
|
||||
luan l_c uan_c
|
||||
lun l_c uen_c
|
||||
luo l_c uo_c
|
||||
lv l_c v_c
|
||||
lve l_c ve_c
|
||||
ma m_c a_c
|
||||
mai m_c ai_c
|
||||
man m_c an_c
|
||||
mang m_c ang_c
|
||||
mao m_c ao_c
|
||||
me m_c e_c
|
||||
mei m_c ei_c
|
||||
men m_c en_c
|
||||
meng m_c eng_c
|
||||
mi m_c i_c
|
||||
mian m_c ian_c
|
||||
miao m_c iao_c
|
||||
mie m_c ie_c
|
||||
min m_c in_c
|
||||
ming m_c ing_c
|
||||
miu m_c iou_c
|
||||
mo m_c o_c
|
||||
mou m_c ou_c
|
||||
mu m_c u_c
|
||||
na n_c a_c
|
||||
nai n_c ai_c
|
||||
nan n_c an_c
|
||||
nang n_c ang_c
|
||||
nao n_c ao_c
|
||||
ne n_c e_c
|
||||
nei n_c ei_c
|
||||
nen n_c en_c
|
||||
neng n_c eng_c
|
||||
ni n_c i_c
|
||||
nian n_c ian_c
|
||||
niang n_c iang_c
|
||||
niao n_c iao_c
|
||||
nie n_c ie_c
|
||||
nin n_c in_c
|
||||
ning n_c ing_c
|
||||
niu n_c iou_c
|
||||
nong n_c ong_c
|
||||
nou n_c ou_c
|
||||
nu n_c u_c
|
||||
nuan n_c uan_c
|
||||
nun n_c uen_c
|
||||
nuo n_c uo_c
|
||||
nv n_c v_c
|
||||
nve n_c ve_c
|
||||
o go o_c
|
||||
ou go ou_c
|
||||
pa p_c a_c
|
||||
pai p_c ai_c
|
||||
pan p_c an_c
|
||||
pang p_c ang_c
|
||||
pao p_c ao_c
|
||||
pei p_c ei_c
|
||||
pen p_c en_c
|
||||
peng p_c eng_c
|
||||
pi p_c i_c
|
||||
pian p_c ian_c
|
||||
piao p_c iao_c
|
||||
pie p_c ie_c
|
||||
pin p_c in_c
|
||||
ping p_c ing_c
|
||||
po p_c o_c
|
||||
pou p_c ou_c
|
||||
pu p_c u_c
|
||||
qi q_c i_c
|
||||
qia q_c ia_c
|
||||
qian q_c ian_c
|
||||
qiang q_c iang_c
|
||||
qiao q_c iao_c
|
||||
qie q_c ie_c
|
||||
qin q_c in_c
|
||||
qing q_c ing_c
|
||||
qiong q_c iong_c
|
||||
qiu q_c iou_c
|
||||
qv q_c v_c
|
||||
qvan q_c van_c
|
||||
qve q_c ve_c
|
||||
qvn q_c vn_c
|
||||
ran r_c an_c
|
||||
rang r_c ang_c
|
||||
rao r_c ao_c
|
||||
re r_c e_c
|
||||
ren r_c en_c
|
||||
reng r_c eng_c
|
||||
ri r_c ih_c
|
||||
rong r_c ong_c
|
||||
rou r_c ou_c
|
||||
ru r_c u_c
|
||||
ruan r_c uan_c
|
||||
rui r_c uei_c
|
||||
run r_c uen_c
|
||||
ruo r_c uo_c
|
||||
sa s_c a_c
|
||||
sai s_c ai_c
|
||||
san s_c an_c
|
||||
sang s_c ang_c
|
||||
sao s_c ao_c
|
||||
se s_c e_c
|
||||
sen s_c en_c
|
||||
seng s_c eng_c
|
||||
sha sh_c a_c
|
||||
shai sh_c ai_c
|
||||
shan sh_c an_c
|
||||
shang sh_c ang_c
|
||||
shao sh_c ao_c
|
||||
she sh_c e_c
|
||||
shei sh_c ei_c
|
||||
shen sh_c en_c
|
||||
sheng sh_c eng_c
|
||||
shi sh_c ih_c
|
||||
shou sh_c ou_c
|
||||
shu sh_c u_c
|
||||
shua sh_c ua_c
|
||||
shuai sh_c uai_c
|
||||
shuan sh_c uan_c
|
||||
shuang sh_c uang_c
|
||||
shui sh_c uei_c
|
||||
shun sh_c uen_c
|
||||
shuo sh_c uo_c
|
||||
si s_c ii_c
|
||||
song s_c ong_c
|
||||
sou s_c ou_c
|
||||
su s_c u_c
|
||||
suan s_c uan_c
|
||||
sui s_c uei_c
|
||||
sun s_c uen_c
|
||||
suo s_c uo_c
|
||||
ta t_c a_c
|
||||
tai t_c ai_c
|
||||
tan t_c an_c
|
||||
tang t_c ang_c
|
||||
tao t_c ao_c
|
||||
te t_c e_c
|
||||
tei t_c ei_c
|
||||
teng t_c eng_c
|
||||
ti t_c i_c
|
||||
tian t_c ian_c
|
||||
tiao t_c iao_c
|
||||
tie t_c ie_c
|
||||
ting t_c ing_c
|
||||
tong t_c ong_c
|
||||
tou t_c ou_c
|
||||
tu t_c u_c
|
||||
tuan t_c uan_c
|
||||
tui t_c uei_c
|
||||
tun t_c uen_c
|
||||
tuo t_c uo_c
|
||||
wa w_c a_c
|
||||
wai w_c ai_c
|
||||
wan w_c an_c
|
||||
wang w_c ang_c
|
||||
wei w_c ei_c
|
||||
wen w_c en_c
|
||||
weng w_c eng_c
|
||||
wo w_c o_c
|
||||
wu w_c u_c
|
||||
xi xx_c i_c
|
||||
xia xx_c ia_c
|
||||
xian xx_c ian_c
|
||||
xiang xx_c iang_c
|
||||
xiao xx_c iao_c
|
||||
xie xx_c ie_c
|
||||
xin xx_c in_c
|
||||
xing xx_c ing_c
|
||||
xiong xx_c iong_c
|
||||
xiu xx_c iou_c
|
||||
xv xx_c v_c
|
||||
xvan xx_c van_c
|
||||
xve xx_c ve_c
|
||||
xvn xx_c vn_c
|
||||
ya y_c a_c
|
||||
yan y_c an_c
|
||||
yang y_c ang_c
|
||||
yao y_c ao_c
|
||||
ye y_c e_c
|
||||
yi y_c i_c
|
||||
yin y_c in_c
|
||||
ying y_c ing_c
|
||||
yo y_c o_c
|
||||
yong y_c ong_c
|
||||
you y_c ou_c
|
||||
yv y_c v_c
|
||||
yvan y_c van_c
|
||||
yve y_c ve_c
|
||||
yvn y_c vn_c
|
||||
za z_c a_c
|
||||
zai z_c ai_c
|
||||
zan z_c an_c
|
||||
zang z_c ang_c
|
||||
zao z_c ao_c
|
||||
ze z_c e_c
|
||||
zei z_c ei_c
|
||||
zen z_c en_c
|
||||
zeng z_c eng_c
|
||||
zha zh_c a_c
|
||||
zhai zh_c ai_c
|
||||
zhan zh_c an_c
|
||||
zhang zh_c ang_c
|
||||
zhao zh_c ao_c
|
||||
zhe zh_c e_c
|
||||
zhei zh_c ei_c
|
||||
zhen zh_c en_c
|
||||
zheng zh_c eng_c
|
||||
zhi zh_c ih_c
|
||||
zhong zh_c ong_c
|
||||
zhou zh_c ou_c
|
||||
zhu zh_c u_c
|
||||
zhua zh_c ua_c
|
||||
zhuai zh_c uai_c
|
||||
zhuan zh_c uan_c
|
||||
zhuang zh_c uang_c
|
||||
zhui zh_c uei_c
|
||||
zhun zh_c uen_c
|
||||
zhuo zh_c uo_c
|
||||
zi z_c ii_c
|
||||
zong z_c ong_c
|
||||
zou z_c ou_c
|
||||
zu z_c u_c
|
||||
zuan z_c uan_c
|
||||
zui z_c uei_c
|
||||
zun z_c uen_c
|
||||
zuo z_c uo_c
|
||||
bangr b_c angr_c
|
||||
banr b_c anr_c
|
||||
baor b_c aor_c
|
||||
bar b_c ar_c
|
||||
beir b_c eir_c
|
||||
bengr b_c engr_c
|
||||
benr b_c enr_c
|
||||
bianr b_c ianr_c
|
||||
biaor b_c iaor_c
|
||||
bingr b_c ingr_c
|
||||
bir b_c ir_c
|
||||
bor b_c or_c
|
||||
bur b_c ur_c
|
||||
caor c_c aor_c
|
||||
car c_c ar_c
|
||||
changr ch_c angr_c
|
||||
chaor ch_c aor_c
|
||||
char ch_c ar_c
|
||||
chengr ch_c engr_c
|
||||
cher ch_c er_c
|
||||
chir ch_c ihr_c
|
||||
chongr ch_c ongr_c
|
||||
chour ch_c our_c
|
||||
chuangr ch_c uangr_c
|
||||
chuanr ch_c uanr_c
|
||||
chuir ch_c ueir_c
|
||||
chunr ch_c uenr_c
|
||||
chuor ch_c uor_c
|
||||
chur ch_c ur_c
|
||||
cir c_c iir_c
|
||||
congr c_c ongr_c
|
||||
cuir c_c ueir_c
|
||||
cunr c_c uenr_c
|
||||
cuor c_c uor_c
|
||||
dair d_c air_c
|
||||
danr d_c anr_c
|
||||
dangr d_c angr_c
|
||||
daor d_c aor_c
|
||||
dengr d_c engr_c
|
||||
dianr d_c ianr_c
|
||||
diaor d_c iaor_c
|
||||
dier d_c ier_c
|
||||
dingr d_c ingr_c
|
||||
dir d_c ir_c
|
||||
dongr d_c ongr_c
|
||||
dour d_c our_c
|
||||
duanr d_c uanr_c
|
||||
duir d_c ueir_c
|
||||
dunr d_c uenr_c
|
||||
duor d_c uor_c
|
||||
dur d_c ur_c
|
||||
fangr f_c angr_c
|
||||
fanr f_c anr_c
|
||||
far f_c ar_c
|
||||
fengr f_c engr_c
|
||||
fenr f_c enr_c
|
||||
fur f_c ur_c
|
||||
gair g_c air_c
|
||||
ganr g_c anr_c
|
||||
gaor g_c aor_c
|
||||
gengr g_c engr_c
|
||||
genr g_c enr_c
|
||||
ger g_c er_c
|
||||
gongr g_c ongr_c
|
||||
gour g_c our_c
|
||||
guair g_c uair_c
|
||||
guanr g_c uanr_c
|
||||
guar g_c uar_c
|
||||
guir g_c ueir_c
|
||||
gunr g_c uenr_c
|
||||
guor g_c uor_c
|
||||
gur g_c ur_c
|
||||
hair h_c air_c
|
||||
hanr h_c anr_c
|
||||
haor h_c aor_c
|
||||
heir h_c eir_c
|
||||
her h_c er_c
|
||||
hour h_c our_c
|
||||
huanr h_c uanr_c
|
||||
huangr h_c uangr_c
|
||||
huar h_c uar_c
|
||||
huir h_c ueir_c
|
||||
hunr h_c uenr_c
|
||||
huor h_c uor_c
|
||||
hur h_c ur_c
|
||||
jianr j_c ianr_c
|
||||
jiaor j_c iaor_c
|
||||
jiar j_c iar_c
|
||||
jier j_c ier_c
|
||||
jingr j_c ingr_c
|
||||
jinr j_c inr_c
|
||||
jir j_c ir_c
|
||||
jiur j_c iour_c
|
||||
jvanr j_c vanr_c
|
||||
jver j_c ver_c
|
||||
jvnr j_c vnr_c
|
||||
kair k_c air_c
|
||||
kanr k_c anr_c
|
||||
kaor k_c aor_c
|
||||
kengr k_c engr_c
|
||||
ker k_c er_c
|
||||
kongr k_c ongr_c
|
||||
kour k_c our_c
|
||||
kuair k_c uair_c
|
||||
kuangr k_c uangr_c
|
||||
kuanr k_c uanr_c
|
||||
kunr k_c uenr_c
|
||||
lanr l_c anr_c
|
||||
laor l_c aor_c
|
||||
lar l_c ar_c
|
||||
leir l_c eir_c
|
||||
lengr l_c engr_c
|
||||
ler l_c er_c
|
||||
liangr l_c iangr_c
|
||||
lianr l_c ianr_c
|
||||
liaor l_c iaor_c
|
||||
liar l_c iar_c
|
||||
lingr l_c ingr_c
|
||||
lir l_c ir_c
|
||||
liur l_c iour_c
|
||||
lour l_c our_c
|
||||
luor l_c uor_c
|
||||
lunr l_c uenr_c
|
||||
lur l_c ur_c
|
||||
lvr l_c vr_c
|
||||
mair m_c air_c
|
||||
manr m_c anr_c
|
||||
mangr m_c angr_c
|
||||
maor m_c aor_c
|
||||
mar m_c ar_c
|
||||
meir m_c eir_c
|
||||
menr m_c enr_c
|
||||
mianr m_c ianr_c
|
||||
miaor m_c iaor_c
|
||||
mingr m_c ingr_c
|
||||
mir m_c ir_c
|
||||
mor m_c or_c
|
||||
naor n_c aor_c
|
||||
nar n_c ar_c
|
||||
niangr n_c iangr_c
|
||||
nianr n_c ianr_c
|
||||
niaor n_c iaor_c
|
||||
ningr n_c ingr_c
|
||||
nir n_c ir_c
|
||||
niur n_c iour_c
|
||||
nvr n_c vr_c
|
||||
pair p_c air_c
|
||||
pangr p_c angr_c
|
||||
panr p_c anr_c
|
||||
paor p_c aor_c
|
||||
penr p_c enr_c
|
||||
pianr p_c ianr_c
|
||||
piaor p_c iaor_c
|
||||
pier p_c ier_c
|
||||
pingr p_c ingr_c
|
||||
pir p_c ir_c
|
||||
por p_c or_c
|
||||
pur p_c ur_c
|
||||
qianr q_c ianr_c
|
||||
qiaor q_c iaor_c
|
||||
qingr q_c ingr_c
|
||||
qir q_c ir_c
|
||||
qiur q_c iour_c
|
||||
qvanr q_c vanr_c
|
||||
qvnr q_c vnr_c
|
||||
qvr q_c vr_c
|
||||
sar s_c ar_c
|
||||
rangr r_c angr_c
|
||||
renr r_c enr_c
|
||||
sair s_c air_c
|
||||
sanr s_c anr_c
|
||||
shair sh_c air_c
|
||||
shaor sh_c aor_c
|
||||
shengr sh_c engr_c
|
||||
shenr sh_c enr_c
|
||||
shir sh_c ihr_c
|
||||
shuair sh_c uair_c
|
||||
shour sh_c our_c
|
||||
shuar sh_c uar_c
|
||||
shuir sh_c ueir_c
|
||||
shunr sh_c uenr_c
|
||||
shuor sh_c uor_c
|
||||
shur sh_c ur_c
|
||||
sir s_c iir_c
|
||||
suir s_c ueir_c
|
||||
sunr s_c uenr_c
|
||||
tair t_c air_c
|
||||
tangr t_c angr_c
|
||||
tanr t_c anr_c
|
||||
taor t_c aor_c
|
||||
ter t_c er_c
|
||||
tianr t_c ianr_c
|
||||
tiaor t_c iaor_c
|
||||
tir t_c ir_c
|
||||
tingr t_c ingr_c
|
||||
tongr t_c ongr_c
|
||||
tour t_c our_c
|
||||
tuanr t_c uanr_c
|
||||
tuir t_c ueir_c
|
||||
tuor t_c uor_c
|
||||
tur t_c ur_c
|
||||
wanr w_c anr_c
|
||||
war w_c ar_c
|
||||
weir w_c eir_c
|
||||
wenr w_c enr_c
|
||||
wengr w_c engr_c
|
||||
wor w_c or_c
|
||||
wur w_c ur_c
|
||||
xiangr xx_c iangr_c
|
||||
xianr xx_c ianr_c
|
||||
xiar xx_c iar_c
|
||||
xier xx_c ier_c
|
||||
xingr xx_c ingr_c
|
||||
xir xx_c ir_c
|
||||
xinr xx_c inr_c
|
||||
xiongr xx_c iongr_c
|
||||
xiur xx_c iour_c
|
||||
yangr y_c angr_c
|
||||
yanr y_c anr_c
|
||||
yaor y_c aor_c
|
||||
yar y_c ar_c
|
||||
yer y_c er_c
|
||||
yingr y_c ingr_c
|
||||
yinr y_c inr_c
|
||||
yir y_c ir_c
|
||||
your y_c our_c
|
||||
yvanr y_c vanr_c
|
||||
zair z_c air_c
|
||||
yvr y_c vr_c
|
||||
yver y_c ver_c
|
||||
zaor z_c aor_c
|
||||
zar z_c ar_c
|
||||
zhangr zh_c angr_c
|
||||
zhanr zh_c anr_c
|
||||
zhaor zh_c aor_c
|
||||
zhar zh_c ar_c
|
||||
zhenr zh_c enr_c
|
||||
zher zh_c er_c
|
||||
zhir zh_c ihr_c
|
||||
zhongr zh_c ongr_c
|
||||
zhour zh_c our_c
|
||||
zhuar zh_c uar_c
|
||||
zhuanr zh_c uanr_c
|
||||
zhunr zh_c uenr_c
|
||||
zhuor zh_c uor_c
|
||||
zhur zh_c ur_c
|
||||
zir z_c iir_c
|
||||
zuanr z_c uanr_c
|
||||
zuir z_c ueir_c
|
||||
zuor z_c uor_c
|
||||
7
voices/zhiyan_emo/dict/PinYin/tonelist.txt
Normal file
7
voices/zhiyan_emo/dict/PinYin/tonelist.txt
Normal file
@ -0,0 +1,7 @@
|
||||
1
|
||||
|
||||
4
|
||||
2
|
||||
3
|
||||
5
|
||||
0
|
||||
33
voices/zhiyan_emo/dict/emo_category_dict.txt
Executable file
33
voices/zhiyan_emo/dict/emo_category_dict.txt
Executable file
@ -0,0 +1,33 @@
|
||||
emotion_none
|
||||
emotion_neutral
|
||||
emotion_angry
|
||||
emotion_disgust
|
||||
emotion_fear
|
||||
emotion_happy
|
||||
emotion_sad
|
||||
emotion_surprise
|
||||
emotion_calm
|
||||
emotion_gentle
|
||||
emotion_relax
|
||||
emotion_lyrical
|
||||
emotion_serious
|
||||
emotion_disgruntled
|
||||
emotion_satisfied
|
||||
emotion_disappointed
|
||||
emotion_excited
|
||||
emotion_anxiety
|
||||
emotion_jealousy
|
||||
emotion_hate
|
||||
emotion_pity
|
||||
emotion_pleasure
|
||||
emotion_arousal
|
||||
emotion_dominance
|
||||
emotion_placeholder1
|
||||
emotion_placeholder2
|
||||
emotion_placeholder3
|
||||
emotion_placeholder4
|
||||
emotion_placeholder5
|
||||
emotion_placeholder6
|
||||
emotion_placeholder7
|
||||
emotion_placeholder8
|
||||
emotion_placeholder9
|
||||
6
voices/zhiyan_emo/dict/speaker_dict.txt
Executable file
6
voices/zhiyan_emo/dict/speaker_dict.txt
Executable file
@ -0,0 +1,6 @@
|
||||
F7
|
||||
F74
|
||||
FBYN
|
||||
FRXL
|
||||
M7
|
||||
xiaoyu
|
||||
144
voices/zhiyan_emo/dict/sy_dict.txt
Executable file
144
voices/zhiyan_emo/dict/sy_dict.txt
Executable file
@ -0,0 +1,144 @@
|
||||
a_c
|
||||
ai_c
|
||||
an_c
|
||||
ang_c
|
||||
ao_c
|
||||
b_c
|
||||
c_c
|
||||
ch_c
|
||||
d_c
|
||||
e_c
|
||||
ei_c
|
||||
en_c
|
||||
eng_c
|
||||
er_c
|
||||
f_c
|
||||
g_c
|
||||
h_c
|
||||
i_c
|
||||
ia_c
|
||||
ian_c
|
||||
iang_c
|
||||
iao_c
|
||||
ie_c
|
||||
ih_c
|
||||
ii_c
|
||||
in_c
|
||||
ing_c
|
||||
io_c
|
||||
iong_c
|
||||
iou_c
|
||||
j_c
|
||||
k_c
|
||||
l_c
|
||||
m_c
|
||||
n_c
|
||||
o_c
|
||||
ong_c
|
||||
ou_c
|
||||
p_c
|
||||
q_c
|
||||
r_c
|
||||
s_c
|
||||
sh_c
|
||||
t_c
|
||||
u_c
|
||||
ua_c
|
||||
uai_c
|
||||
uan_c
|
||||
uang_c
|
||||
uei_c
|
||||
uen_c
|
||||
ueng_c
|
||||
uo_c
|
||||
v_c
|
||||
van_c
|
||||
ve_c
|
||||
vn_c
|
||||
xx_c
|
||||
z_c
|
||||
zh_c
|
||||
w_c
|
||||
y_c
|
||||
ga
|
||||
ge
|
||||
go
|
||||
aa
|
||||
ae
|
||||
ah
|
||||
ao
|
||||
aw
|
||||
ay
|
||||
b
|
||||
ch
|
||||
d
|
||||
dh
|
||||
eh
|
||||
er
|
||||
ey
|
||||
f
|
||||
g
|
||||
hh
|
||||
ih
|
||||
iy
|
||||
jh
|
||||
k
|
||||
l
|
||||
m
|
||||
n
|
||||
ng
|
||||
ow
|
||||
oy
|
||||
p
|
||||
r
|
||||
s
|
||||
sh
|
||||
t
|
||||
th
|
||||
uh
|
||||
uw
|
||||
v
|
||||
w
|
||||
y
|
||||
z
|
||||
zh
|
||||
air_c
|
||||
angr_c
|
||||
anr_c
|
||||
aor_c
|
||||
ar_c
|
||||
eir_c
|
||||
engr_c
|
||||
enr_c
|
||||
iangr_c
|
||||
ianr_c
|
||||
iaor_c
|
||||
iar_c
|
||||
ier_c
|
||||
ihr_c
|
||||
iir_c
|
||||
ingr_c
|
||||
inr_c
|
||||
iongr_c
|
||||
iour_c
|
||||
ir_c
|
||||
ongr_c
|
||||
or_c
|
||||
our_c
|
||||
uair_c
|
||||
uangr_c
|
||||
uanr_c
|
||||
uar_c
|
||||
ueir_c
|
||||
uenr_c
|
||||
uor_c
|
||||
ur_c
|
||||
vanr_c
|
||||
ver_c
|
||||
vnr_c
|
||||
vr_c
|
||||
pau
|
||||
#1
|
||||
#2
|
||||
#3
|
||||
#4
|
||||
5
voices/zhiyan_emo/dict/syllable_flag_dict.txt
Executable file
5
voices/zhiyan_emo/dict/syllable_flag_dict.txt
Executable file
@ -0,0 +1,5 @@
|
||||
s_begin
|
||||
s_end
|
||||
s_none
|
||||
s_both
|
||||
s_middle
|
||||
7
voices/zhiyan_emo/dict/tone_dict.txt
Executable file
7
voices/zhiyan_emo/dict/tone_dict.txt
Executable file
@ -0,0 +1,7 @@
|
||||
tone1
|
||||
tone_none
|
||||
tone4
|
||||
tone2
|
||||
tone3
|
||||
tone5
|
||||
tone0
|
||||
5
voices/zhiyan_emo/dict/word_segment_dict.txt
Executable file
5
voices/zhiyan_emo/dict/word_segment_dict.txt
Executable file
@ -0,0 +1,5 @@
|
||||
word_begin
|
||||
word_end
|
||||
word_middle
|
||||
word_both
|
||||
word_none
|
||||
BIN
voices/zhiyan_emo/voc/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
BIN
voices/zhiyan_emo/voc/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
voices/zhiyan_emo/voc/ckpt/checkpoint_1.pth
(Stored with Git LFS)
Normal file
BIN
voices/zhiyan_emo/voc/ckpt/checkpoint_1.pth
(Stored with Git LFS)
Normal file
Binary file not shown.
188
voices/zhiyan_emo/voc/config.yaml
Normal file
188
voices/zhiyan_emo/voc/config.yaml
Normal file
@ -0,0 +1,188 @@
|
||||
model_type: hifigan
|
||||
Model:
|
||||
###########################################################
|
||||
# GENERATOR NETWORK ARCHITECTURE SETTING #
|
||||
###########################################################
|
||||
Generator:
|
||||
params:
|
||||
in_channels: 80
|
||||
out_channels: 1
|
||||
channels: 256
|
||||
kernel_size: 7
|
||||
upsample_scales: [10, 5, 2, 2]
|
||||
upsample_kernal_sizes: [20, 11, 4, 4]
|
||||
resblock_kernel_sizes: [3, 7, 11]
|
||||
resblock_dilations:
|
||||
- [1, 3, 5, 7]
|
||||
- [1, 3, 5, 7]
|
||||
- [1, 3, 5, 7]
|
||||
bias: true
|
||||
causal: true
|
||||
nonlinear_activation: "LeakyReLU"
|
||||
nonlinear_activation_params:
|
||||
negative_slope: 0.1
|
||||
use_weight_norm: true
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 2.0e-4
|
||||
betas: [0.5, 0.9]
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: MultiStepLR
|
||||
params:
|
||||
gamma: 0.5
|
||||
milestones:
|
||||
- 200000
|
||||
- 400000
|
||||
- 600000
|
||||
- 800000
|
||||
|
||||
###########################################################
|
||||
# DISCRIMINATOR NETWORK ARCHITECTURE SETTING #
|
||||
###########################################################
|
||||
MultiScaleDiscriminator:
|
||||
params:
|
||||
scales: 3
|
||||
downsample_pooling: "DWT"
|
||||
downsample_pooling_params:
|
||||
kernel_size: 4
|
||||
stride: 2
|
||||
padding: 2
|
||||
discriminator_params:
|
||||
in_channels: 1
|
||||
out_channels: 1
|
||||
kernel_sizes: [15, 41, 5, 3]
|
||||
channels: 128
|
||||
max_downsample_channels: 1024
|
||||
max_groups: 16
|
||||
bias: true
|
||||
downsample_scales: [4, 4, 4, 4, 1]
|
||||
nonlinear_activation: "LeakyReLU"
|
||||
nonlinear_activation_params:
|
||||
negative_slope: 0.1
|
||||
follow_official_norm: true
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 2.0e-4
|
||||
betas: [0.5, 0.9]
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: MultiStepLR
|
||||
params:
|
||||
gamma: 0.5
|
||||
milestones:
|
||||
- 200000
|
||||
- 400000
|
||||
- 600000
|
||||
- 800000
|
||||
|
||||
MultiPeriodDiscriminator:
|
||||
params:
|
||||
periods: [2, 3, 5, 7, 11]
|
||||
discriminator_params:
|
||||
in_channels: 1
|
||||
out_channels: 1
|
||||
kernel_sizes: [5, 3]
|
||||
channels: 32
|
||||
downsample_scales: [3, 3, 3, 3, 1]
|
||||
max_downsample_channels: 1024
|
||||
bias: true
|
||||
nonlinear_activation: "LeakyReLU"
|
||||
nonlinear_activation_params:
|
||||
negative_slope: 0.1
|
||||
use_spectral_norm: false
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 2.0e-4
|
||||
betas: [0.5, 0.9]
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: MultiStepLR
|
||||
params:
|
||||
gamma: 0.5
|
||||
milestones:
|
||||
- 200000
|
||||
- 400000
|
||||
- 600000
|
||||
- 800000
|
||||
|
||||
####################################################
|
||||
# LOSS SETTING #
|
||||
####################################################
|
||||
Loss:
|
||||
generator_adv_loss:
|
||||
enable: True
|
||||
params:
|
||||
average_by_discriminators: False
|
||||
weights: 1.0
|
||||
|
||||
discriminator_adv_loss:
|
||||
enable: True
|
||||
params:
|
||||
average_by_discriminators: False
|
||||
weights: 1.0
|
||||
|
||||
stft_loss:
|
||||
enable: False # Whether to use multi-resolution STFT loss.
|
||||
|
||||
mel_loss:
|
||||
enable: True
|
||||
params:
|
||||
fs: 16000
|
||||
fft_size: 2048
|
||||
hop_size: 200
|
||||
win_length: 1000
|
||||
window: "hann"
|
||||
num_mels: 80
|
||||
fmin: 0
|
||||
fmax: 8000
|
||||
log_base: null
|
||||
weights: 45.0
|
||||
|
||||
subband_stft_loss:
|
||||
enable: False
|
||||
params:
|
||||
fft_sizes: [384, 683, 171] # List of FFT size for STFT-based loss.
|
||||
hop_sizes: [35, 75, 15] # List of hop size for STFT-based loss
|
||||
win_lengths: [150, 300, 60] # List of window length for STFT-based loss.
|
||||
window: "hann_window" # Window function for STFT-based loss
|
||||
|
||||
feat_match_loss:
|
||||
enable: True
|
||||
params:
|
||||
average_by_discriminators: false
|
||||
average_by_layers: false
|
||||
weights: 2.0
|
||||
|
||||
|
||||
###########################################################
|
||||
# DATA LOADER SETTING #
|
||||
###########################################################
|
||||
batch_size: 16
|
||||
batch_max_steps: 9600 # Length of each audio in batch. Make sure dividable by hop_size.
|
||||
pin_memory: True
|
||||
num_workers: 2 # FIXME: set > 0 may stuck on macos
|
||||
remove_short_samples: False
|
||||
allow_cache: True
|
||||
|
||||
generator_grad_norm: -1
|
||||
|
||||
discriminator_grad_norm: -1
|
||||
|
||||
###########################################################
|
||||
# INTERVAL SETTING #
|
||||
###########################################################
|
||||
generator_train_start_steps: 1 # Number of steps to start to train discriminator.
|
||||
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
|
||||
train_max_steps: 2500000 # Number of training steps.
|
||||
save_interval_steps: 20000 # Interval steps to save checkpoint.
|
||||
eval_interval_steps: 10000 # Interval steps to evaluate the network.
|
||||
log_interval_steps: 1000 # Interval steps to record the training log.
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.
|
||||
BIN
voices/zhiyan_emo/vocoder/pytorch_model.bin
(Stored with Git LFS)
Normal file
BIN
voices/zhiyan_emo/vocoder/pytorch_model.bin
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
voices/zhizhe_emo/am/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
BIN
voices/zhizhe_emo/am/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
Binary file not shown.
105
voices/zhizhe_emo/am/config.yaml
Normal file
105
voices/zhizhe_emo/am/config.yaml
Normal file
@ -0,0 +1,105 @@
|
||||
model_type: sambert
|
||||
Model:
|
||||
#########################################################
|
||||
# SAMBERT NETWORK ARCHITECTURE SETTING #
|
||||
#########################################################
|
||||
KanTtsSAMBERT:
|
||||
params:
|
||||
max_len: 800
|
||||
|
||||
embedding_dim: 512
|
||||
encoder_num_layers: 8
|
||||
encoder_num_heads: 8
|
||||
encoder_num_units: 128
|
||||
encoder_ffn_inner_dim: 1024
|
||||
encoder_dropout: 0.1
|
||||
encoder_attention_dropout: 0.1
|
||||
encoder_relu_dropout: 0.1
|
||||
encoder_projection_units: 32
|
||||
|
||||
speaker_units: 32
|
||||
emotion_units: 32
|
||||
|
||||
predictor_filter_size: 41
|
||||
predictor_fsmn_num_layers: 3
|
||||
predictor_num_memory_units: 128
|
||||
predictor_ffn_inner_dim: 256
|
||||
predictor_dropout: 0.1
|
||||
predictor_shift: 0
|
||||
predictor_lstm_units: 128
|
||||
dur_pred_prenet_units: [128, 128]
|
||||
dur_pred_lstm_units: 128
|
||||
|
||||
decoder_prenet_units: [256, 256]
|
||||
decoder_num_layers: 12
|
||||
decoder_num_heads: 8
|
||||
decoder_num_units: 128
|
||||
decoder_ffn_inner_dim: 1024
|
||||
decoder_dropout: 0.1
|
||||
decoder_attention_dropout: 0.1
|
||||
decoder_relu_dropout: 0.1
|
||||
|
||||
outputs_per_step: 3
|
||||
num_mels: 80
|
||||
|
||||
postnet_filter_size: 41
|
||||
postnet_fsmn_num_layers: 4
|
||||
postnet_num_memory_units: 256
|
||||
postnet_ffn_inner_dim: 512
|
||||
postnet_dropout: 0.1
|
||||
postnet_shift: 17
|
||||
postnet_lstm_units: 128
|
||||
MAS: False
|
||||
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 0.001
|
||||
betas: [0.9, 0.98]
|
||||
eps: 1.0e-9
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: NoamLR
|
||||
params:
|
||||
warmup_steps: 4000
|
||||
|
||||
linguistic_unit:
|
||||
cleaners: english_cleaners
|
||||
lfeat_type_list: sy,tone,syllable_flag,word_segment,emo_category,speaker_category
|
||||
speaker_list: F7,F74,FBYN,FRXL,M7,xiaoyu
|
||||
####################################################
|
||||
# LOSS SETTING #
|
||||
####################################################
|
||||
Loss:
|
||||
MelReconLoss:
|
||||
enable: True
|
||||
params:
|
||||
loss_type: mae
|
||||
|
||||
ProsodyReconLoss:
|
||||
enable: True
|
||||
params:
|
||||
loss_type: mae
|
||||
|
||||
###########################################################
|
||||
# DATA LOADER SETTING #
|
||||
###########################################################
|
||||
batch_size: 32
|
||||
pin_memory: False
|
||||
num_workers: 4 # FIXME: set > 0 may stuck on macos
|
||||
remove_short_samples: False
|
||||
allow_cache: True
|
||||
grad_norm: 1.0
|
||||
|
||||
###########################################################
|
||||
# INTERVAL SETTING #
|
||||
###########################################################
|
||||
train_max_steps: 1000000 # Number of training steps.
|
||||
save_interval_steps: 20000 # Interval steps to save checkpoint.
|
||||
eval_interval_steps: 10000 # Interval steps to evaluate the network.
|
||||
log_interval_steps: 1000 # Interval steps to record the training log.
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.
|
||||
BIN
voices/zhizhe_emo/am/pytorch_model.bin
(Stored with Git LFS)
Normal file
BIN
voices/zhizhe_emo/am/pytorch_model.bin
(Stored with Git LFS)
Normal file
Binary file not shown.
27
voices/zhizhe_emo/audio_config.yaml
Normal file
27
voices/zhizhe_emo/audio_config.yaml
Normal file
@ -0,0 +1,27 @@
|
||||
# Audio processing configs
|
||||
|
||||
audio_config:
|
||||
# Preprocess
|
||||
wav_normalize: True
|
||||
trim_silence: True
|
||||
trim_silence_threshold_db: 60
|
||||
preemphasize: False
|
||||
|
||||
# Feature extraction
|
||||
sampling_rate: 16000
|
||||
hop_length: 200
|
||||
win_length: 1000
|
||||
n_fft: 2048
|
||||
n_mels: 80
|
||||
fmin: 0.0
|
||||
fmax: 8000.0
|
||||
phone_level_feature: True
|
||||
|
||||
# Normalization
|
||||
norm_type: "mean_std" # "mean_std" or "global"
|
||||
max_norm: 1.0
|
||||
symmetric: False
|
||||
min_level_db: -100.0
|
||||
ref_level_db: 20
|
||||
|
||||
num_workers: 16
|
||||
2
voices/zhizhe_emo/dict/PinYin/En2ChPhoneMap.txt
Normal file
2
voices/zhizhe_emo/dict/PinYin/En2ChPhoneMap.txt
Normal file
@ -0,0 +1,2 @@
|
||||
wu w
|
||||
yi y
|
||||
1263
voices/zhizhe_emo/dict/PinYin/PhoneSet.xml
Normal file
1263
voices/zhizhe_emo/dict/PinYin/PhoneSet.xml
Normal file
File diff suppressed because it is too large
Load Diff
147
voices/zhizhe_emo/dict/PinYin/PosSet.xml
Normal file
147
voices/zhizhe_emo/dict/PinYin/PosSet.xml
Normal file
@ -0,0 +1,147 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<posSet xmlns="http://schemas.alibaba-inc.com/tts">
|
||||
<pos>
|
||||
<id>1</id>
|
||||
<name>a</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>2</id>
|
||||
<name>b</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>3</id>
|
||||
<name>c</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>4</id>
|
||||
<name>d</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>5</id>
|
||||
<name>e</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>6</id>
|
||||
<name>f</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>7</id>
|
||||
<name>g</name>
|
||||
<desc>todo</desc>
|
||||
<sub>
|
||||
<pos>
|
||||
<id>8</id>
|
||||
<name>gb</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
</sub>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>9</id>
|
||||
<name>h</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>10</id>
|
||||
<name>i</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>11</id>
|
||||
<name>j</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>12</id>
|
||||
<name>k</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>13</id>
|
||||
<name>l</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>14</id>
|
||||
<name>m</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>15</id>
|
||||
<name>n</name>
|
||||
<desc>todo</desc>
|
||||
<sub>
|
||||
<pos>
|
||||
<id>16</id>
|
||||
<name>nz</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
</sub>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>17</id>
|
||||
<name>o</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>18</id>
|
||||
<name>p</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>19</id>
|
||||
<name>q</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>20</id>
|
||||
<name>r</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>21</id>
|
||||
<name>s</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>22</id>
|
||||
<name>t</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>23</id>
|
||||
<name>u</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>24</id>
|
||||
<name>v</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>25</id>
|
||||
<name>w</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>26</id>
|
||||
<name>x</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>27</id>
|
||||
<name>y</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
<pos>
|
||||
<id>28</id>
|
||||
<name>z</name>
|
||||
<desc>todo</desc>
|
||||
</pos>
|
||||
</posSet>
|
||||
661
voices/zhizhe_emo/dict/PinYin/py2phoneMap.txt
Normal file
661
voices/zhizhe_emo/dict/PinYin/py2phoneMap.txt
Normal file
@ -0,0 +1,661 @@
|
||||
a ga a_c
|
||||
ai ga ai_c
|
||||
an ga an_c
|
||||
ang ga ang_c
|
||||
ao ga ao_c
|
||||
ba b_c a_c
|
||||
bai b_c ai_c
|
||||
ban b_c an_c
|
||||
bang b_c ang_c
|
||||
bao b_c ao_c
|
||||
bei b_c ei_c
|
||||
ben b_c en_c
|
||||
beng b_c eng_c
|
||||
bi b_c i_c
|
||||
bian b_c ian_c
|
||||
biao b_c iao_c
|
||||
bie b_c ie_c
|
||||
bin b_c in_c
|
||||
bing b_c ing_c
|
||||
bo b_c o_c
|
||||
bu b_c u_c
|
||||
ca c_c a_c
|
||||
cai c_c ai_c
|
||||
can c_c an_c
|
||||
cang c_c ang_c
|
||||
cao c_c ao_c
|
||||
ce c_c e_c
|
||||
cen c_c en_c
|
||||
ceng c_c eng_c
|
||||
cha ch_c a_c
|
||||
chai ch_c ai_c
|
||||
chan ch_c an_c
|
||||
chang ch_c ang_c
|
||||
chao ch_c ao_c
|
||||
che ch_c e_c
|
||||
chen ch_c en_c
|
||||
cheng ch_c eng_c
|
||||
chi ch_c ih_c
|
||||
chong ch_c ong_c
|
||||
chou ch_c ou_c
|
||||
chu ch_c u_c
|
||||
chua ch_c ua_c
|
||||
chuai ch_c uai_c
|
||||
chuan ch_c uan_c
|
||||
chuang ch_c uang_c
|
||||
chui ch_c uei_c
|
||||
chun ch_c uen_c
|
||||
chuo ch_c uo_c
|
||||
ci c_c ii_c
|
||||
cong c_c ong_c
|
||||
cou c_c ou_c
|
||||
cu c_c u_c
|
||||
cuan c_c uan_c
|
||||
cui c_c uei_c
|
||||
cun c_c uen_c
|
||||
cuo c_c uo_c
|
||||
da d_c a_c
|
||||
dai d_c ai_c
|
||||
dan d_c an_c
|
||||
dang d_c ang_c
|
||||
dao d_c ao_c
|
||||
de d_c e_c
|
||||
dei d_c ei_c
|
||||
den d_c en_c
|
||||
deng d_c eng_c
|
||||
di d_c i_c
|
||||
dia d_c ia_c
|
||||
dian d_c ian_c
|
||||
diao d_c iao_c
|
||||
die d_c ie_c
|
||||
ding d_c ing_c
|
||||
diu d_c iou_c
|
||||
dong d_c ong_c
|
||||
dou d_c ou_c
|
||||
du d_c u_c
|
||||
duan d_c uan_c
|
||||
dui d_c uei_c
|
||||
dun d_c uen_c
|
||||
duo d_c uo_c
|
||||
e ge e_c
|
||||
ei ge ei_c
|
||||
en ge en_c
|
||||
eng ge eng_c
|
||||
er ge er_c
|
||||
fa f_c a_c
|
||||
fan f_c an_c
|
||||
fang f_c ang_c
|
||||
fei f_c ei_c
|
||||
fen f_c en_c
|
||||
feng f_c eng_c
|
||||
fo f_c o_c
|
||||
fou f_c ou_c
|
||||
fu f_c u_c
|
||||
ga g_c a_c
|
||||
gai g_c ai_c
|
||||
gan g_c an_c
|
||||
gang g_c ang_c
|
||||
gao g_c ao_c
|
||||
ge g_c e_c
|
||||
gei g_c ei_c
|
||||
gen g_c en_c
|
||||
geng g_c eng_c
|
||||
gong g_c ong_c
|
||||
gou g_c ou_c
|
||||
gu g_c u_c
|
||||
gua g_c ua_c
|
||||
guai g_c uai_c
|
||||
guan g_c uan_c
|
||||
guang g_c uang_c
|
||||
gui g_c uei_c
|
||||
gun g_c uen_c
|
||||
guo g_c uo_c
|
||||
ha h_c a_c
|
||||
hai h_c ai_c
|
||||
han h_c an_c
|
||||
hang h_c ang_c
|
||||
hao h_c ao_c
|
||||
he h_c e_c
|
||||
hei h_c ei_c
|
||||
hen h_c en_c
|
||||
heng h_c eng_c
|
||||
hong h_c ong_c
|
||||
hou h_c ou_c
|
||||
hu h_c u_c
|
||||
hua h_c ua_c
|
||||
huai h_c uai_c
|
||||
huan h_c uan_c
|
||||
huang h_c uang_c
|
||||
hui h_c uei_c
|
||||
hun h_c uen_c
|
||||
huo h_c uo_c
|
||||
ji j_c i_c
|
||||
jia j_c ia_c
|
||||
jian j_c ian_c
|
||||
jiang j_c iang_c
|
||||
jiao j_c iao_c
|
||||
jie j_c ie_c
|
||||
jin j_c in_c
|
||||
jing j_c ing_c
|
||||
jiong j_c iong_c
|
||||
jiu j_c iou_c
|
||||
jv j_c v_c
|
||||
jvan j_c van_c
|
||||
jve j_c ve_c
|
||||
jvn j_c vn_c
|
||||
ka k_c a_c
|
||||
kai k_c ai_c
|
||||
kan k_c an_c
|
||||
kang k_c ang_c
|
||||
kao k_c ao_c
|
||||
ke k_c e_c
|
||||
kei k_c ei_c
|
||||
ken k_c en_c
|
||||
keng k_c eng_c
|
||||
kong k_c ong_c
|
||||
kou k_c ou_c
|
||||
ku k_c u_c
|
||||
kua k_c ua_c
|
||||
kuai k_c uai_c
|
||||
kuan k_c uan_c
|
||||
kuang k_c uang_c
|
||||
kui k_c uei_c
|
||||
kun k_c uen_c
|
||||
kuo k_c uo_c
|
||||
la l_c a_c
|
||||
lai l_c ai_c
|
||||
lan l_c an_c
|
||||
lang l_c ang_c
|
||||
lao l_c ao_c
|
||||
le l_c e_c
|
||||
lei l_c ei_c
|
||||
leng l_c eng_c
|
||||
li l_c i_c
|
||||
lia l_c ia_c
|
||||
lian l_c ian_c
|
||||
liang l_c iang_c
|
||||
liao l_c iao_c
|
||||
lie l_c ie_c
|
||||
lin l_c in_c
|
||||
ling l_c ing_c
|
||||
liu l_c iou_c
|
||||
lo l_c o_c
|
||||
long l_c ong_c
|
||||
lou l_c ou_c
|
||||
lu l_c u_c
|
||||
luan l_c uan_c
|
||||
lun l_c uen_c
|
||||
luo l_c uo_c
|
||||
lv l_c v_c
|
||||
lve l_c ve_c
|
||||
ma m_c a_c
|
||||
mai m_c ai_c
|
||||
man m_c an_c
|
||||
mang m_c ang_c
|
||||
mao m_c ao_c
|
||||
me m_c e_c
|
||||
mei m_c ei_c
|
||||
men m_c en_c
|
||||
meng m_c eng_c
|
||||
mi m_c i_c
|
||||
mian m_c ian_c
|
||||
miao m_c iao_c
|
||||
mie m_c ie_c
|
||||
min m_c in_c
|
||||
ming m_c ing_c
|
||||
miu m_c iou_c
|
||||
mo m_c o_c
|
||||
mou m_c ou_c
|
||||
mu m_c u_c
|
||||
na n_c a_c
|
||||
nai n_c ai_c
|
||||
nan n_c an_c
|
||||
nang n_c ang_c
|
||||
nao n_c ao_c
|
||||
ne n_c e_c
|
||||
nei n_c ei_c
|
||||
nen n_c en_c
|
||||
neng n_c eng_c
|
||||
ni n_c i_c
|
||||
nian n_c ian_c
|
||||
niang n_c iang_c
|
||||
niao n_c iao_c
|
||||
nie n_c ie_c
|
||||
nin n_c in_c
|
||||
ning n_c ing_c
|
||||
niu n_c iou_c
|
||||
nong n_c ong_c
|
||||
nou n_c ou_c
|
||||
nu n_c u_c
|
||||
nuan n_c uan_c
|
||||
nun n_c uen_c
|
||||
nuo n_c uo_c
|
||||
nv n_c v_c
|
||||
nve n_c ve_c
|
||||
o go o_c
|
||||
ou go ou_c
|
||||
pa p_c a_c
|
||||
pai p_c ai_c
|
||||
pan p_c an_c
|
||||
pang p_c ang_c
|
||||
pao p_c ao_c
|
||||
pei p_c ei_c
|
||||
pen p_c en_c
|
||||
peng p_c eng_c
|
||||
pi p_c i_c
|
||||
pian p_c ian_c
|
||||
piao p_c iao_c
|
||||
pie p_c ie_c
|
||||
pin p_c in_c
|
||||
ping p_c ing_c
|
||||
po p_c o_c
|
||||
pou p_c ou_c
|
||||
pu p_c u_c
|
||||
qi q_c i_c
|
||||
qia q_c ia_c
|
||||
qian q_c ian_c
|
||||
qiang q_c iang_c
|
||||
qiao q_c iao_c
|
||||
qie q_c ie_c
|
||||
qin q_c in_c
|
||||
qing q_c ing_c
|
||||
qiong q_c iong_c
|
||||
qiu q_c iou_c
|
||||
qv q_c v_c
|
||||
qvan q_c van_c
|
||||
qve q_c ve_c
|
||||
qvn q_c vn_c
|
||||
ran r_c an_c
|
||||
rang r_c ang_c
|
||||
rao r_c ao_c
|
||||
re r_c e_c
|
||||
ren r_c en_c
|
||||
reng r_c eng_c
|
||||
ri r_c ih_c
|
||||
rong r_c ong_c
|
||||
rou r_c ou_c
|
||||
ru r_c u_c
|
||||
ruan r_c uan_c
|
||||
rui r_c uei_c
|
||||
run r_c uen_c
|
||||
ruo r_c uo_c
|
||||
sa s_c a_c
|
||||
sai s_c ai_c
|
||||
san s_c an_c
|
||||
sang s_c ang_c
|
||||
sao s_c ao_c
|
||||
se s_c e_c
|
||||
sen s_c en_c
|
||||
seng s_c eng_c
|
||||
sha sh_c a_c
|
||||
shai sh_c ai_c
|
||||
shan sh_c an_c
|
||||
shang sh_c ang_c
|
||||
shao sh_c ao_c
|
||||
she sh_c e_c
|
||||
shei sh_c ei_c
|
||||
shen sh_c en_c
|
||||
sheng sh_c eng_c
|
||||
shi sh_c ih_c
|
||||
shou sh_c ou_c
|
||||
shu sh_c u_c
|
||||
shua sh_c ua_c
|
||||
shuai sh_c uai_c
|
||||
shuan sh_c uan_c
|
||||
shuang sh_c uang_c
|
||||
shui sh_c uei_c
|
||||
shun sh_c uen_c
|
||||
shuo sh_c uo_c
|
||||
si s_c ii_c
|
||||
song s_c ong_c
|
||||
sou s_c ou_c
|
||||
su s_c u_c
|
||||
suan s_c uan_c
|
||||
sui s_c uei_c
|
||||
sun s_c uen_c
|
||||
suo s_c uo_c
|
||||
ta t_c a_c
|
||||
tai t_c ai_c
|
||||
tan t_c an_c
|
||||
tang t_c ang_c
|
||||
tao t_c ao_c
|
||||
te t_c e_c
|
||||
tei t_c ei_c
|
||||
teng t_c eng_c
|
||||
ti t_c i_c
|
||||
tian t_c ian_c
|
||||
tiao t_c iao_c
|
||||
tie t_c ie_c
|
||||
ting t_c ing_c
|
||||
tong t_c ong_c
|
||||
tou t_c ou_c
|
||||
tu t_c u_c
|
||||
tuan t_c uan_c
|
||||
tui t_c uei_c
|
||||
tun t_c uen_c
|
||||
tuo t_c uo_c
|
||||
wa w_c a_c
|
||||
wai w_c ai_c
|
||||
wan w_c an_c
|
||||
wang w_c ang_c
|
||||
wei w_c ei_c
|
||||
wen w_c en_c
|
||||
weng w_c eng_c
|
||||
wo w_c o_c
|
||||
wu w_c u_c
|
||||
xi xx_c i_c
|
||||
xia xx_c ia_c
|
||||
xian xx_c ian_c
|
||||
xiang xx_c iang_c
|
||||
xiao xx_c iao_c
|
||||
xie xx_c ie_c
|
||||
xin xx_c in_c
|
||||
xing xx_c ing_c
|
||||
xiong xx_c iong_c
|
||||
xiu xx_c iou_c
|
||||
xv xx_c v_c
|
||||
xvan xx_c van_c
|
||||
xve xx_c ve_c
|
||||
xvn xx_c vn_c
|
||||
ya y_c a_c
|
||||
yan y_c an_c
|
||||
yang y_c ang_c
|
||||
yao y_c ao_c
|
||||
ye y_c e_c
|
||||
yi y_c i_c
|
||||
yin y_c in_c
|
||||
ying y_c ing_c
|
||||
yo y_c o_c
|
||||
yong y_c ong_c
|
||||
you y_c ou_c
|
||||
yv y_c v_c
|
||||
yvan y_c van_c
|
||||
yve y_c ve_c
|
||||
yvn y_c vn_c
|
||||
za z_c a_c
|
||||
zai z_c ai_c
|
||||
zan z_c an_c
|
||||
zang z_c ang_c
|
||||
zao z_c ao_c
|
||||
ze z_c e_c
|
||||
zei z_c ei_c
|
||||
zen z_c en_c
|
||||
zeng z_c eng_c
|
||||
zha zh_c a_c
|
||||
zhai zh_c ai_c
|
||||
zhan zh_c an_c
|
||||
zhang zh_c ang_c
|
||||
zhao zh_c ao_c
|
||||
zhe zh_c e_c
|
||||
zhei zh_c ei_c
|
||||
zhen zh_c en_c
|
||||
zheng zh_c eng_c
|
||||
zhi zh_c ih_c
|
||||
zhong zh_c ong_c
|
||||
zhou zh_c ou_c
|
||||
zhu zh_c u_c
|
||||
zhua zh_c ua_c
|
||||
zhuai zh_c uai_c
|
||||
zhuan zh_c uan_c
|
||||
zhuang zh_c uang_c
|
||||
zhui zh_c uei_c
|
||||
zhun zh_c uen_c
|
||||
zhuo zh_c uo_c
|
||||
zi z_c ii_c
|
||||
zong z_c ong_c
|
||||
zou z_c ou_c
|
||||
zu z_c u_c
|
||||
zuan z_c uan_c
|
||||
zui z_c uei_c
|
||||
zun z_c uen_c
|
||||
zuo z_c uo_c
|
||||
bangr b_c angr_c
|
||||
banr b_c anr_c
|
||||
baor b_c aor_c
|
||||
bar b_c ar_c
|
||||
beir b_c eir_c
|
||||
bengr b_c engr_c
|
||||
benr b_c enr_c
|
||||
bianr b_c ianr_c
|
||||
biaor b_c iaor_c
|
||||
bingr b_c ingr_c
|
||||
bir b_c ir_c
|
||||
bor b_c or_c
|
||||
bur b_c ur_c
|
||||
caor c_c aor_c
|
||||
car c_c ar_c
|
||||
changr ch_c angr_c
|
||||
chaor ch_c aor_c
|
||||
char ch_c ar_c
|
||||
chengr ch_c engr_c
|
||||
cher ch_c er_c
|
||||
chir ch_c ihr_c
|
||||
chongr ch_c ongr_c
|
||||
chour ch_c our_c
|
||||
chuangr ch_c uangr_c
|
||||
chuanr ch_c uanr_c
|
||||
chuir ch_c ueir_c
|
||||
chunr ch_c uenr_c
|
||||
chuor ch_c uor_c
|
||||
chur ch_c ur_c
|
||||
cir c_c iir_c
|
||||
congr c_c ongr_c
|
||||
cuir c_c ueir_c
|
||||
cunr c_c uenr_c
|
||||
cuor c_c uor_c
|
||||
dair d_c air_c
|
||||
danr d_c anr_c
|
||||
dangr d_c angr_c
|
||||
daor d_c aor_c
|
||||
dengr d_c engr_c
|
||||
dianr d_c ianr_c
|
||||
diaor d_c iaor_c
|
||||
dier d_c ier_c
|
||||
dingr d_c ingr_c
|
||||
dir d_c ir_c
|
||||
dongr d_c ongr_c
|
||||
dour d_c our_c
|
||||
duanr d_c uanr_c
|
||||
duir d_c ueir_c
|
||||
dunr d_c uenr_c
|
||||
duor d_c uor_c
|
||||
dur d_c ur_c
|
||||
fangr f_c angr_c
|
||||
fanr f_c anr_c
|
||||
far f_c ar_c
|
||||
fengr f_c engr_c
|
||||
fenr f_c enr_c
|
||||
fur f_c ur_c
|
||||
gair g_c air_c
|
||||
ganr g_c anr_c
|
||||
gaor g_c aor_c
|
||||
gengr g_c engr_c
|
||||
genr g_c enr_c
|
||||
ger g_c er_c
|
||||
gongr g_c ongr_c
|
||||
gour g_c our_c
|
||||
guair g_c uair_c
|
||||
guanr g_c uanr_c
|
||||
guar g_c uar_c
|
||||
guir g_c ueir_c
|
||||
gunr g_c uenr_c
|
||||
guor g_c uor_c
|
||||
gur g_c ur_c
|
||||
hair h_c air_c
|
||||
hanr h_c anr_c
|
||||
haor h_c aor_c
|
||||
heir h_c eir_c
|
||||
her h_c er_c
|
||||
hour h_c our_c
|
||||
huanr h_c uanr_c
|
||||
huangr h_c uangr_c
|
||||
huar h_c uar_c
|
||||
huir h_c ueir_c
|
||||
hunr h_c uenr_c
|
||||
huor h_c uor_c
|
||||
hur h_c ur_c
|
||||
jianr j_c ianr_c
|
||||
jiaor j_c iaor_c
|
||||
jiar j_c iar_c
|
||||
jier j_c ier_c
|
||||
jingr j_c ingr_c
|
||||
jinr j_c inr_c
|
||||
jir j_c ir_c
|
||||
jiur j_c iour_c
|
||||
jvanr j_c vanr_c
|
||||
jver j_c ver_c
|
||||
jvnr j_c vnr_c
|
||||
kair k_c air_c
|
||||
kanr k_c anr_c
|
||||
kaor k_c aor_c
|
||||
kengr k_c engr_c
|
||||
ker k_c er_c
|
||||
kongr k_c ongr_c
|
||||
kour k_c our_c
|
||||
kuair k_c uair_c
|
||||
kuangr k_c uangr_c
|
||||
kuanr k_c uanr_c
|
||||
kunr k_c uenr_c
|
||||
lanr l_c anr_c
|
||||
laor l_c aor_c
|
||||
lar l_c ar_c
|
||||
leir l_c eir_c
|
||||
lengr l_c engr_c
|
||||
ler l_c er_c
|
||||
liangr l_c iangr_c
|
||||
lianr l_c ianr_c
|
||||
liaor l_c iaor_c
|
||||
liar l_c iar_c
|
||||
lingr l_c ingr_c
|
||||
lir l_c ir_c
|
||||
liur l_c iour_c
|
||||
lour l_c our_c
|
||||
luor l_c uor_c
|
||||
lunr l_c uenr_c
|
||||
lur l_c ur_c
|
||||
lvr l_c vr_c
|
||||
mair m_c air_c
|
||||
manr m_c anr_c
|
||||
mangr m_c angr_c
|
||||
maor m_c aor_c
|
||||
mar m_c ar_c
|
||||
meir m_c eir_c
|
||||
menr m_c enr_c
|
||||
mianr m_c ianr_c
|
||||
miaor m_c iaor_c
|
||||
mingr m_c ingr_c
|
||||
mir m_c ir_c
|
||||
mor m_c or_c
|
||||
naor n_c aor_c
|
||||
nar n_c ar_c
|
||||
niangr n_c iangr_c
|
||||
nianr n_c ianr_c
|
||||
niaor n_c iaor_c
|
||||
ningr n_c ingr_c
|
||||
nir n_c ir_c
|
||||
niur n_c iour_c
|
||||
nvr n_c vr_c
|
||||
pair p_c air_c
|
||||
pangr p_c angr_c
|
||||
panr p_c anr_c
|
||||
paor p_c aor_c
|
||||
penr p_c enr_c
|
||||
pianr p_c ianr_c
|
||||
piaor p_c iaor_c
|
||||
pier p_c ier_c
|
||||
pingr p_c ingr_c
|
||||
pir p_c ir_c
|
||||
por p_c or_c
|
||||
pur p_c ur_c
|
||||
qianr q_c ianr_c
|
||||
qiaor q_c iaor_c
|
||||
qingr q_c ingr_c
|
||||
qir q_c ir_c
|
||||
qiur q_c iour_c
|
||||
qvanr q_c vanr_c
|
||||
qvnr q_c vnr_c
|
||||
qvr q_c vr_c
|
||||
sar s_c ar_c
|
||||
rangr r_c angr_c
|
||||
renr r_c enr_c
|
||||
sair s_c air_c
|
||||
sanr s_c anr_c
|
||||
shair sh_c air_c
|
||||
shaor sh_c aor_c
|
||||
shengr sh_c engr_c
|
||||
shenr sh_c enr_c
|
||||
shir sh_c ihr_c
|
||||
shuair sh_c uair_c
|
||||
shour sh_c our_c
|
||||
shuar sh_c uar_c
|
||||
shuir sh_c ueir_c
|
||||
shunr sh_c uenr_c
|
||||
shuor sh_c uor_c
|
||||
shur sh_c ur_c
|
||||
sir s_c iir_c
|
||||
suir s_c ueir_c
|
||||
sunr s_c uenr_c
|
||||
tair t_c air_c
|
||||
tangr t_c angr_c
|
||||
tanr t_c anr_c
|
||||
taor t_c aor_c
|
||||
ter t_c er_c
|
||||
tianr t_c ianr_c
|
||||
tiaor t_c iaor_c
|
||||
tir t_c ir_c
|
||||
tingr t_c ingr_c
|
||||
tongr t_c ongr_c
|
||||
tour t_c our_c
|
||||
tuanr t_c uanr_c
|
||||
tuir t_c ueir_c
|
||||
tuor t_c uor_c
|
||||
tur t_c ur_c
|
||||
wanr w_c anr_c
|
||||
war w_c ar_c
|
||||
weir w_c eir_c
|
||||
wenr w_c enr_c
|
||||
wengr w_c engr_c
|
||||
wor w_c or_c
|
||||
wur w_c ur_c
|
||||
xiangr xx_c iangr_c
|
||||
xianr xx_c ianr_c
|
||||
xiar xx_c iar_c
|
||||
xier xx_c ier_c
|
||||
xingr xx_c ingr_c
|
||||
xir xx_c ir_c
|
||||
xinr xx_c inr_c
|
||||
xiongr xx_c iongr_c
|
||||
xiur xx_c iour_c
|
||||
yangr y_c angr_c
|
||||
yanr y_c anr_c
|
||||
yaor y_c aor_c
|
||||
yar y_c ar_c
|
||||
yer y_c er_c
|
||||
yingr y_c ingr_c
|
||||
yinr y_c inr_c
|
||||
yir y_c ir_c
|
||||
your y_c our_c
|
||||
yvanr y_c vanr_c
|
||||
zair z_c air_c
|
||||
yvr y_c vr_c
|
||||
yver y_c ver_c
|
||||
zaor z_c aor_c
|
||||
zar z_c ar_c
|
||||
zhangr zh_c angr_c
|
||||
zhanr zh_c anr_c
|
||||
zhaor zh_c aor_c
|
||||
zhar zh_c ar_c
|
||||
zhenr zh_c enr_c
|
||||
zher zh_c er_c
|
||||
zhir zh_c ihr_c
|
||||
zhongr zh_c ongr_c
|
||||
zhour zh_c our_c
|
||||
zhuar zh_c uar_c
|
||||
zhuanr zh_c uanr_c
|
||||
zhunr zh_c uenr_c
|
||||
zhuor zh_c uor_c
|
||||
zhur zh_c ur_c
|
||||
zir z_c iir_c
|
||||
zuanr z_c uanr_c
|
||||
zuir z_c ueir_c
|
||||
zuor z_c uor_c
|
||||
7
voices/zhizhe_emo/dict/PinYin/tonelist.txt
Normal file
7
voices/zhizhe_emo/dict/PinYin/tonelist.txt
Normal file
@ -0,0 +1,7 @@
|
||||
1
|
||||
|
||||
4
|
||||
2
|
||||
3
|
||||
5
|
||||
0
|
||||
33
voices/zhizhe_emo/dict/emo_category_dict.txt
Executable file
33
voices/zhizhe_emo/dict/emo_category_dict.txt
Executable file
@ -0,0 +1,33 @@
|
||||
emotion_none
|
||||
emotion_neutral
|
||||
emotion_angry
|
||||
emotion_disgust
|
||||
emotion_fear
|
||||
emotion_happy
|
||||
emotion_sad
|
||||
emotion_surprise
|
||||
emotion_calm
|
||||
emotion_gentle
|
||||
emotion_relax
|
||||
emotion_lyrical
|
||||
emotion_serious
|
||||
emotion_disgruntled
|
||||
emotion_satisfied
|
||||
emotion_disappointed
|
||||
emotion_excited
|
||||
emotion_anxiety
|
||||
emotion_jealousy
|
||||
emotion_hate
|
||||
emotion_pity
|
||||
emotion_pleasure
|
||||
emotion_arousal
|
||||
emotion_dominance
|
||||
emotion_placeholder1
|
||||
emotion_placeholder2
|
||||
emotion_placeholder3
|
||||
emotion_placeholder4
|
||||
emotion_placeholder5
|
||||
emotion_placeholder6
|
||||
emotion_placeholder7
|
||||
emotion_placeholder8
|
||||
emotion_placeholder9
|
||||
6
voices/zhizhe_emo/dict/speaker_dict.txt
Executable file
6
voices/zhizhe_emo/dict/speaker_dict.txt
Executable file
@ -0,0 +1,6 @@
|
||||
F7
|
||||
F74
|
||||
FBYN
|
||||
FRXL
|
||||
M7
|
||||
xiaoyu
|
||||
144
voices/zhizhe_emo/dict/sy_dict.txt
Executable file
144
voices/zhizhe_emo/dict/sy_dict.txt
Executable file
@ -0,0 +1,144 @@
|
||||
a_c
|
||||
ai_c
|
||||
an_c
|
||||
ang_c
|
||||
ao_c
|
||||
b_c
|
||||
c_c
|
||||
ch_c
|
||||
d_c
|
||||
e_c
|
||||
ei_c
|
||||
en_c
|
||||
eng_c
|
||||
er_c
|
||||
f_c
|
||||
g_c
|
||||
h_c
|
||||
i_c
|
||||
ia_c
|
||||
ian_c
|
||||
iang_c
|
||||
iao_c
|
||||
ie_c
|
||||
ih_c
|
||||
ii_c
|
||||
in_c
|
||||
ing_c
|
||||
io_c
|
||||
iong_c
|
||||
iou_c
|
||||
j_c
|
||||
k_c
|
||||
l_c
|
||||
m_c
|
||||
n_c
|
||||
o_c
|
||||
ong_c
|
||||
ou_c
|
||||
p_c
|
||||
q_c
|
||||
r_c
|
||||
s_c
|
||||
sh_c
|
||||
t_c
|
||||
u_c
|
||||
ua_c
|
||||
uai_c
|
||||
uan_c
|
||||
uang_c
|
||||
uei_c
|
||||
uen_c
|
||||
ueng_c
|
||||
uo_c
|
||||
v_c
|
||||
van_c
|
||||
ve_c
|
||||
vn_c
|
||||
xx_c
|
||||
z_c
|
||||
zh_c
|
||||
w_c
|
||||
y_c
|
||||
ga
|
||||
ge
|
||||
go
|
||||
aa
|
||||
ae
|
||||
ah
|
||||
ao
|
||||
aw
|
||||
ay
|
||||
b
|
||||
ch
|
||||
d
|
||||
dh
|
||||
eh
|
||||
er
|
||||
ey
|
||||
f
|
||||
g
|
||||
hh
|
||||
ih
|
||||
iy
|
||||
jh
|
||||
k
|
||||
l
|
||||
m
|
||||
n
|
||||
ng
|
||||
ow
|
||||
oy
|
||||
p
|
||||
r
|
||||
s
|
||||
sh
|
||||
t
|
||||
th
|
||||
uh
|
||||
uw
|
||||
v
|
||||
w
|
||||
y
|
||||
z
|
||||
zh
|
||||
air_c
|
||||
angr_c
|
||||
anr_c
|
||||
aor_c
|
||||
ar_c
|
||||
eir_c
|
||||
engr_c
|
||||
enr_c
|
||||
iangr_c
|
||||
ianr_c
|
||||
iaor_c
|
||||
iar_c
|
||||
ier_c
|
||||
ihr_c
|
||||
iir_c
|
||||
ingr_c
|
||||
inr_c
|
||||
iongr_c
|
||||
iour_c
|
||||
ir_c
|
||||
ongr_c
|
||||
or_c
|
||||
our_c
|
||||
uair_c
|
||||
uangr_c
|
||||
uanr_c
|
||||
uar_c
|
||||
ueir_c
|
||||
uenr_c
|
||||
uor_c
|
||||
ur_c
|
||||
vanr_c
|
||||
ver_c
|
||||
vnr_c
|
||||
vr_c
|
||||
pau
|
||||
#1
|
||||
#2
|
||||
#3
|
||||
#4
|
||||
5
voices/zhizhe_emo/dict/syllable_flag_dict.txt
Executable file
5
voices/zhizhe_emo/dict/syllable_flag_dict.txt
Executable file
@ -0,0 +1,5 @@
|
||||
s_begin
|
||||
s_end
|
||||
s_none
|
||||
s_both
|
||||
s_middle
|
||||
7
voices/zhizhe_emo/dict/tone_dict.txt
Executable file
7
voices/zhizhe_emo/dict/tone_dict.txt
Executable file
@ -0,0 +1,7 @@
|
||||
tone1
|
||||
tone_none
|
||||
tone4
|
||||
tone2
|
||||
tone3
|
||||
tone5
|
||||
tone0
|
||||
5
voices/zhizhe_emo/dict/word_segment_dict.txt
Executable file
5
voices/zhizhe_emo/dict/word_segment_dict.txt
Executable file
@ -0,0 +1,5 @@
|
||||
word_begin
|
||||
word_end
|
||||
word_middle
|
||||
word_both
|
||||
word_none
|
||||
BIN
voices/zhizhe_emo/voc/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
BIN
voices/zhizhe_emo/voc/ckpt/checkpoint_0.pth
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
voices/zhizhe_emo/voc/ckpt/checkpoint_1.pth
(Stored with Git LFS)
Normal file
BIN
voices/zhizhe_emo/voc/ckpt/checkpoint_1.pth
(Stored with Git LFS)
Normal file
Binary file not shown.
188
voices/zhizhe_emo/voc/config.yaml
Normal file
188
voices/zhizhe_emo/voc/config.yaml
Normal file
@ -0,0 +1,188 @@
|
||||
model_type: hifigan
|
||||
Model:
|
||||
###########################################################
|
||||
# GENERATOR NETWORK ARCHITECTURE SETTING #
|
||||
###########################################################
|
||||
Generator:
|
||||
params:
|
||||
in_channels: 80
|
||||
out_channels: 1
|
||||
channels: 256
|
||||
kernel_size: 7
|
||||
upsample_scales: [10, 5, 2, 2]
|
||||
upsample_kernal_sizes: [20, 11, 4, 4]
|
||||
resblock_kernel_sizes: [3, 7, 11]
|
||||
resblock_dilations:
|
||||
- [1, 3, 5, 7]
|
||||
- [1, 3, 5, 7]
|
||||
- [1, 3, 5, 7]
|
||||
bias: true
|
||||
causal: true
|
||||
nonlinear_activation: "LeakyReLU"
|
||||
nonlinear_activation_params:
|
||||
negative_slope: 0.1
|
||||
use_weight_norm: true
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 2.0e-4
|
||||
betas: [0.5, 0.9]
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: MultiStepLR
|
||||
params:
|
||||
gamma: 0.5
|
||||
milestones:
|
||||
- 200000
|
||||
- 400000
|
||||
- 600000
|
||||
- 800000
|
||||
|
||||
###########################################################
|
||||
# DISCRIMINATOR NETWORK ARCHITECTURE SETTING #
|
||||
###########################################################
|
||||
MultiScaleDiscriminator:
|
||||
params:
|
||||
scales: 3
|
||||
downsample_pooling: "DWT"
|
||||
downsample_pooling_params:
|
||||
kernel_size: 4
|
||||
stride: 2
|
||||
padding: 2
|
||||
discriminator_params:
|
||||
in_channels: 1
|
||||
out_channels: 1
|
||||
kernel_sizes: [15, 41, 5, 3]
|
||||
channels: 128
|
||||
max_downsample_channels: 1024
|
||||
max_groups: 16
|
||||
bias: true
|
||||
downsample_scales: [4, 4, 4, 4, 1]
|
||||
nonlinear_activation: "LeakyReLU"
|
||||
nonlinear_activation_params:
|
||||
negative_slope: 0.1
|
||||
follow_official_norm: true
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 2.0e-4
|
||||
betas: [0.5, 0.9]
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: MultiStepLR
|
||||
params:
|
||||
gamma: 0.5
|
||||
milestones:
|
||||
- 200000
|
||||
- 400000
|
||||
- 600000
|
||||
- 800000
|
||||
|
||||
MultiPeriodDiscriminator:
|
||||
params:
|
||||
periods: [2, 3, 5, 7, 11]
|
||||
discriminator_params:
|
||||
in_channels: 1
|
||||
out_channels: 1
|
||||
kernel_sizes: [5, 3]
|
||||
channels: 32
|
||||
downsample_scales: [3, 3, 3, 3, 1]
|
||||
max_downsample_channels: 1024
|
||||
bias: true
|
||||
nonlinear_activation: "LeakyReLU"
|
||||
nonlinear_activation_params:
|
||||
negative_slope: 0.1
|
||||
use_spectral_norm: false
|
||||
optimizer:
|
||||
type: Adam
|
||||
params:
|
||||
lr: 2.0e-4
|
||||
betas: [0.5, 0.9]
|
||||
weight_decay: 0.0
|
||||
scheduler:
|
||||
type: MultiStepLR
|
||||
params:
|
||||
gamma: 0.5
|
||||
milestones:
|
||||
- 200000
|
||||
- 400000
|
||||
- 600000
|
||||
- 800000
|
||||
|
||||
####################################################
|
||||
# LOSS SETTING #
|
||||
####################################################
|
||||
Loss:
|
||||
generator_adv_loss:
|
||||
enable: True
|
||||
params:
|
||||
average_by_discriminators: False
|
||||
weights: 1.0
|
||||
|
||||
discriminator_adv_loss:
|
||||
enable: True
|
||||
params:
|
||||
average_by_discriminators: False
|
||||
weights: 1.0
|
||||
|
||||
stft_loss:
|
||||
enable: False # Whether to use multi-resolution STFT loss.
|
||||
|
||||
mel_loss:
|
||||
enable: True
|
||||
params:
|
||||
fs: 16000
|
||||
fft_size: 2048
|
||||
hop_size: 200
|
||||
win_length: 1000
|
||||
window: "hann"
|
||||
num_mels: 80
|
||||
fmin: 0
|
||||
fmax: 8000
|
||||
log_base: null
|
||||
weights: 45.0
|
||||
|
||||
subband_stft_loss:
|
||||
enable: False
|
||||
params:
|
||||
fft_sizes: [384, 683, 171] # List of FFT size for STFT-based loss.
|
||||
hop_sizes: [35, 75, 15] # List of hop size for STFT-based loss
|
||||
win_lengths: [150, 300, 60] # List of window length for STFT-based loss.
|
||||
window: "hann_window" # Window function for STFT-based loss
|
||||
|
||||
feat_match_loss:
|
||||
enable: True
|
||||
params:
|
||||
average_by_discriminators: false
|
||||
average_by_layers: false
|
||||
weights: 2.0
|
||||
|
||||
|
||||
###########################################################
|
||||
# DATA LOADER SETTING #
|
||||
###########################################################
|
||||
batch_size: 16
|
||||
batch_max_steps: 9600 # Length of each audio in batch. Make sure dividable by hop_size.
|
||||
pin_memory: True
|
||||
num_workers: 2 # FIXME: set > 0 may stuck on macos
|
||||
remove_short_samples: False
|
||||
allow_cache: True
|
||||
|
||||
generator_grad_norm: -1
|
||||
|
||||
discriminator_grad_norm: -1
|
||||
|
||||
###########################################################
|
||||
# INTERVAL SETTING #
|
||||
###########################################################
|
||||
generator_train_start_steps: 1 # Number of steps to start to train discriminator.
|
||||
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
|
||||
train_max_steps: 2500000 # Number of training steps.
|
||||
save_interval_steps: 20000 # Interval steps to save checkpoint.
|
||||
eval_interval_steps: 10000 # Interval steps to evaluate the network.
|
||||
log_interval_steps: 1000 # Interval steps to record the training log.
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.
|
||||
BIN
voices/zhizhe_emo/vocoder/pytorch_model.bin
(Stored with Git LFS)
Normal file
BIN
voices/zhizhe_emo/vocoder/pytorch_model.bin
(Stored with Git LFS)
Normal file
Binary file not shown.
Reference in New Issue
Block a user