Merge branch 'finetune'

This commit is contained in:
jiaqi.sjq
2023-01-16 17:45:14 +08:00
77 changed files with 10468 additions and 0 deletions

8
voices/voices.json Normal file
View File

@ -0,0 +1,8 @@
{
"voices": [
"zhitian_emo",
"zhibei_emo",
"zhizhe_emo",
"zhiyan_emo"
]
}

BIN
voices/zhibei_emo/am/ckpt/checkpoint_0.pth (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,105 @@
model_type: sambert
Model:
#########################################################
# SAMBERT NETWORK ARCHITECTURE SETTING #
#########################################################
KanTtsSAMBERT:
params:
max_len: 800
embedding_dim: 512
encoder_num_layers: 8
encoder_num_heads: 8
encoder_num_units: 128
encoder_ffn_inner_dim: 1024
encoder_dropout: 0.1
encoder_attention_dropout: 0.1
encoder_relu_dropout: 0.1
encoder_projection_units: 32
speaker_units: 32
emotion_units: 32
predictor_filter_size: 41
predictor_fsmn_num_layers: 3
predictor_num_memory_units: 128
predictor_ffn_inner_dim: 256
predictor_dropout: 0.1
predictor_shift: 0
predictor_lstm_units: 128
dur_pred_prenet_units: [128, 128]
dur_pred_lstm_units: 128
decoder_prenet_units: [256, 256]
decoder_num_layers: 12
decoder_num_heads: 8
decoder_num_units: 128
decoder_ffn_inner_dim: 1024
decoder_dropout: 0.1
decoder_attention_dropout: 0.1
decoder_relu_dropout: 0.1
outputs_per_step: 3
num_mels: 80
postnet_filter_size: 41
postnet_fsmn_num_layers: 4
postnet_num_memory_units: 256
postnet_ffn_inner_dim: 512
postnet_dropout: 0.1
postnet_shift: 17
postnet_lstm_units: 128
MAS: False
optimizer:
type: Adam
params:
lr: 0.001
betas: [0.9, 0.98]
eps: 1.0e-9
weight_decay: 0.0
scheduler:
type: NoamLR
params:
warmup_steps: 4000
linguistic_unit:
cleaners: english_cleaners
lfeat_type_list: sy,tone,syllable_flag,word_segment,emo_category,speaker_category
speaker_list: F7,F74,FBYN,FRXL,M7,xiaoyu
####################################################
# LOSS SETTING #
####################################################
Loss:
MelReconLoss:
enable: True
params:
loss_type: mae
ProsodyReconLoss:
enable: True
params:
loss_type: mae
###########################################################
# DATA LOADER SETTING #
###########################################################
batch_size: 32
pin_memory: False
num_workers: 4 # FIXME: set > 0 may stuck on macos
remove_short_samples: False
allow_cache: True
grad_norm: 1.0
###########################################################
# INTERVAL SETTING #
###########################################################
train_max_steps: 1000000 # Number of training steps.
save_interval_steps: 20000 # Interval steps to save checkpoint.
eval_interval_steps: 10000 # Interval steps to evaluate the network.
log_interval_steps: 1000 # Interval steps to record the training log.
###########################################################
# OTHER SETTING #
###########################################################
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.

BIN
voices/zhibei_emo/am/pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,27 @@
# Audio processing configs
audio_config:
# Preprocess
wav_normalize: True
trim_silence: True
trim_silence_threshold_db: 60
preemphasize: False
# Feature extraction
sampling_rate: 16000
hop_length: 200
win_length: 1000
n_fft: 2048
n_mels: 80
fmin: 0.0
fmax: 8000.0
phone_level_feature: True
# Normalization
norm_type: "mean_std" # "mean_std" or "global"
max_norm: 1.0
symmetric: False
min_level_db: -100.0
ref_level_db: 20
num_workers: 16

View File

@ -0,0 +1,2 @@
wu w
yi y

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,147 @@
<?xml version="1.0" encoding="utf-8"?>
<posSet xmlns="http://schemas.alibaba-inc.com/tts">
<pos>
<id>1</id>
<name>a</name>
<desc>todo</desc>
</pos>
<pos>
<id>2</id>
<name>b</name>
<desc>todo</desc>
</pos>
<pos>
<id>3</id>
<name>c</name>
<desc>todo</desc>
</pos>
<pos>
<id>4</id>
<name>d</name>
<desc>todo</desc>
</pos>
<pos>
<id>5</id>
<name>e</name>
<desc>todo</desc>
</pos>
<pos>
<id>6</id>
<name>f</name>
<desc>todo</desc>
</pos>
<pos>
<id>7</id>
<name>g</name>
<desc>todo</desc>
<sub>
<pos>
<id>8</id>
<name>gb</name>
<desc>todo</desc>
</pos>
</sub>
</pos>
<pos>
<id>9</id>
<name>h</name>
<desc>todo</desc>
</pos>
<pos>
<id>10</id>
<name>i</name>
<desc>todo</desc>
</pos>
<pos>
<id>11</id>
<name>j</name>
<desc>todo</desc>
</pos>
<pos>
<id>12</id>
<name>k</name>
<desc>todo</desc>
</pos>
<pos>
<id>13</id>
<name>l</name>
<desc>todo</desc>
</pos>
<pos>
<id>14</id>
<name>m</name>
<desc>todo</desc>
</pos>
<pos>
<id>15</id>
<name>n</name>
<desc>todo</desc>
<sub>
<pos>
<id>16</id>
<name>nz</name>
<desc>todo</desc>
</pos>
</sub>
</pos>
<pos>
<id>17</id>
<name>o</name>
<desc>todo</desc>
</pos>
<pos>
<id>18</id>
<name>p</name>
<desc>todo</desc>
</pos>
<pos>
<id>19</id>
<name>q</name>
<desc>todo</desc>
</pos>
<pos>
<id>20</id>
<name>r</name>
<desc>todo</desc>
</pos>
<pos>
<id>21</id>
<name>s</name>
<desc>todo</desc>
</pos>
<pos>
<id>22</id>
<name>t</name>
<desc>todo</desc>
</pos>
<pos>
<id>23</id>
<name>u</name>
<desc>todo</desc>
</pos>
<pos>
<id>24</id>
<name>v</name>
<desc>todo</desc>
</pos>
<pos>
<id>25</id>
<name>w</name>
<desc>todo</desc>
</pos>
<pos>
<id>26</id>
<name>x</name>
<desc>todo</desc>
</pos>
<pos>
<id>27</id>
<name>y</name>
<desc>todo</desc>
</pos>
<pos>
<id>28</id>
<name>z</name>
<desc>todo</desc>
</pos>
</posSet>

View File

@ -0,0 +1,661 @@
a ga a_c
ai ga ai_c
an ga an_c
ang ga ang_c
ao ga ao_c
ba b_c a_c
bai b_c ai_c
ban b_c an_c
bang b_c ang_c
bao b_c ao_c
bei b_c ei_c
ben b_c en_c
beng b_c eng_c
bi b_c i_c
bian b_c ian_c
biao b_c iao_c
bie b_c ie_c
bin b_c in_c
bing b_c ing_c
bo b_c o_c
bu b_c u_c
ca c_c a_c
cai c_c ai_c
can c_c an_c
cang c_c ang_c
cao c_c ao_c
ce c_c e_c
cen c_c en_c
ceng c_c eng_c
cha ch_c a_c
chai ch_c ai_c
chan ch_c an_c
chang ch_c ang_c
chao ch_c ao_c
che ch_c e_c
chen ch_c en_c
cheng ch_c eng_c
chi ch_c ih_c
chong ch_c ong_c
chou ch_c ou_c
chu ch_c u_c
chua ch_c ua_c
chuai ch_c uai_c
chuan ch_c uan_c
chuang ch_c uang_c
chui ch_c uei_c
chun ch_c uen_c
chuo ch_c uo_c
ci c_c ii_c
cong c_c ong_c
cou c_c ou_c
cu c_c u_c
cuan c_c uan_c
cui c_c uei_c
cun c_c uen_c
cuo c_c uo_c
da d_c a_c
dai d_c ai_c
dan d_c an_c
dang d_c ang_c
dao d_c ao_c
de d_c e_c
dei d_c ei_c
den d_c en_c
deng d_c eng_c
di d_c i_c
dia d_c ia_c
dian d_c ian_c
diao d_c iao_c
die d_c ie_c
ding d_c ing_c
diu d_c iou_c
dong d_c ong_c
dou d_c ou_c
du d_c u_c
duan d_c uan_c
dui d_c uei_c
dun d_c uen_c
duo d_c uo_c
e ge e_c
ei ge ei_c
en ge en_c
eng ge eng_c
er ge er_c
fa f_c a_c
fan f_c an_c
fang f_c ang_c
fei f_c ei_c
fen f_c en_c
feng f_c eng_c
fo f_c o_c
fou f_c ou_c
fu f_c u_c
ga g_c a_c
gai g_c ai_c
gan g_c an_c
gang g_c ang_c
gao g_c ao_c
ge g_c e_c
gei g_c ei_c
gen g_c en_c
geng g_c eng_c
gong g_c ong_c
gou g_c ou_c
gu g_c u_c
gua g_c ua_c
guai g_c uai_c
guan g_c uan_c
guang g_c uang_c
gui g_c uei_c
gun g_c uen_c
guo g_c uo_c
ha h_c a_c
hai h_c ai_c
han h_c an_c
hang h_c ang_c
hao h_c ao_c
he h_c e_c
hei h_c ei_c
hen h_c en_c
heng h_c eng_c
hong h_c ong_c
hou h_c ou_c
hu h_c u_c
hua h_c ua_c
huai h_c uai_c
huan h_c uan_c
huang h_c uang_c
hui h_c uei_c
hun h_c uen_c
huo h_c uo_c
ji j_c i_c
jia j_c ia_c
jian j_c ian_c
jiang j_c iang_c
jiao j_c iao_c
jie j_c ie_c
jin j_c in_c
jing j_c ing_c
jiong j_c iong_c
jiu j_c iou_c
jv j_c v_c
jvan j_c van_c
jve j_c ve_c
jvn j_c vn_c
ka k_c a_c
kai k_c ai_c
kan k_c an_c
kang k_c ang_c
kao k_c ao_c
ke k_c e_c
kei k_c ei_c
ken k_c en_c
keng k_c eng_c
kong k_c ong_c
kou k_c ou_c
ku k_c u_c
kua k_c ua_c
kuai k_c uai_c
kuan k_c uan_c
kuang k_c uang_c
kui k_c uei_c
kun k_c uen_c
kuo k_c uo_c
la l_c a_c
lai l_c ai_c
lan l_c an_c
lang l_c ang_c
lao l_c ao_c
le l_c e_c
lei l_c ei_c
leng l_c eng_c
li l_c i_c
lia l_c ia_c
lian l_c ian_c
liang l_c iang_c
liao l_c iao_c
lie l_c ie_c
lin l_c in_c
ling l_c ing_c
liu l_c iou_c
lo l_c o_c
long l_c ong_c
lou l_c ou_c
lu l_c u_c
luan l_c uan_c
lun l_c uen_c
luo l_c uo_c
lv l_c v_c
lve l_c ve_c
ma m_c a_c
mai m_c ai_c
man m_c an_c
mang m_c ang_c
mao m_c ao_c
me m_c e_c
mei m_c ei_c
men m_c en_c
meng m_c eng_c
mi m_c i_c
mian m_c ian_c
miao m_c iao_c
mie m_c ie_c
min m_c in_c
ming m_c ing_c
miu m_c iou_c
mo m_c o_c
mou m_c ou_c
mu m_c u_c
na n_c a_c
nai n_c ai_c
nan n_c an_c
nang n_c ang_c
nao n_c ao_c
ne n_c e_c
nei n_c ei_c
nen n_c en_c
neng n_c eng_c
ni n_c i_c
nian n_c ian_c
niang n_c iang_c
niao n_c iao_c
nie n_c ie_c
nin n_c in_c
ning n_c ing_c
niu n_c iou_c
nong n_c ong_c
nou n_c ou_c
nu n_c u_c
nuan n_c uan_c
nun n_c uen_c
nuo n_c uo_c
nv n_c v_c
nve n_c ve_c
o go o_c
ou go ou_c
pa p_c a_c
pai p_c ai_c
pan p_c an_c
pang p_c ang_c
pao p_c ao_c
pei p_c ei_c
pen p_c en_c
peng p_c eng_c
pi p_c i_c
pian p_c ian_c
piao p_c iao_c
pie p_c ie_c
pin p_c in_c
ping p_c ing_c
po p_c o_c
pou p_c ou_c
pu p_c u_c
qi q_c i_c
qia q_c ia_c
qian q_c ian_c
qiang q_c iang_c
qiao q_c iao_c
qie q_c ie_c
qin q_c in_c
qing q_c ing_c
qiong q_c iong_c
qiu q_c iou_c
qv q_c v_c
qvan q_c van_c
qve q_c ve_c
qvn q_c vn_c
ran r_c an_c
rang r_c ang_c
rao r_c ao_c
re r_c e_c
ren r_c en_c
reng r_c eng_c
ri r_c ih_c
rong r_c ong_c
rou r_c ou_c
ru r_c u_c
ruan r_c uan_c
rui r_c uei_c
run r_c uen_c
ruo r_c uo_c
sa s_c a_c
sai s_c ai_c
san s_c an_c
sang s_c ang_c
sao s_c ao_c
se s_c e_c
sen s_c en_c
seng s_c eng_c
sha sh_c a_c
shai sh_c ai_c
shan sh_c an_c
shang sh_c ang_c
shao sh_c ao_c
she sh_c e_c
shei sh_c ei_c
shen sh_c en_c
sheng sh_c eng_c
shi sh_c ih_c
shou sh_c ou_c
shu sh_c u_c
shua sh_c ua_c
shuai sh_c uai_c
shuan sh_c uan_c
shuang sh_c uang_c
shui sh_c uei_c
shun sh_c uen_c
shuo sh_c uo_c
si s_c ii_c
song s_c ong_c
sou s_c ou_c
su s_c u_c
suan s_c uan_c
sui s_c uei_c
sun s_c uen_c
suo s_c uo_c
ta t_c a_c
tai t_c ai_c
tan t_c an_c
tang t_c ang_c
tao t_c ao_c
te t_c e_c
tei t_c ei_c
teng t_c eng_c
ti t_c i_c
tian t_c ian_c
tiao t_c iao_c
tie t_c ie_c
ting t_c ing_c
tong t_c ong_c
tou t_c ou_c
tu t_c u_c
tuan t_c uan_c
tui t_c uei_c
tun t_c uen_c
tuo t_c uo_c
wa w_c a_c
wai w_c ai_c
wan w_c an_c
wang w_c ang_c
wei w_c ei_c
wen w_c en_c
weng w_c eng_c
wo w_c o_c
wu w_c u_c
xi xx_c i_c
xia xx_c ia_c
xian xx_c ian_c
xiang xx_c iang_c
xiao xx_c iao_c
xie xx_c ie_c
xin xx_c in_c
xing xx_c ing_c
xiong xx_c iong_c
xiu xx_c iou_c
xv xx_c v_c
xvan xx_c van_c
xve xx_c ve_c
xvn xx_c vn_c
ya y_c a_c
yan y_c an_c
yang y_c ang_c
yao y_c ao_c
ye y_c e_c
yi y_c i_c
yin y_c in_c
ying y_c ing_c
yo y_c o_c
yong y_c ong_c
you y_c ou_c
yv y_c v_c
yvan y_c van_c
yve y_c ve_c
yvn y_c vn_c
za z_c a_c
zai z_c ai_c
zan z_c an_c
zang z_c ang_c
zao z_c ao_c
ze z_c e_c
zei z_c ei_c
zen z_c en_c
zeng z_c eng_c
zha zh_c a_c
zhai zh_c ai_c
zhan zh_c an_c
zhang zh_c ang_c
zhao zh_c ao_c
zhe zh_c e_c
zhei zh_c ei_c
zhen zh_c en_c
zheng zh_c eng_c
zhi zh_c ih_c
zhong zh_c ong_c
zhou zh_c ou_c
zhu zh_c u_c
zhua zh_c ua_c
zhuai zh_c uai_c
zhuan zh_c uan_c
zhuang zh_c uang_c
zhui zh_c uei_c
zhun zh_c uen_c
zhuo zh_c uo_c
zi z_c ii_c
zong z_c ong_c
zou z_c ou_c
zu z_c u_c
zuan z_c uan_c
zui z_c uei_c
zun z_c uen_c
zuo z_c uo_c
bangr b_c angr_c
banr b_c anr_c
baor b_c aor_c
bar b_c ar_c
beir b_c eir_c
bengr b_c engr_c
benr b_c enr_c
bianr b_c ianr_c
biaor b_c iaor_c
bingr b_c ingr_c
bir b_c ir_c
bor b_c or_c
bur b_c ur_c
caor c_c aor_c
car c_c ar_c
changr ch_c angr_c
chaor ch_c aor_c
char ch_c ar_c
chengr ch_c engr_c
cher ch_c er_c
chir ch_c ihr_c
chongr ch_c ongr_c
chour ch_c our_c
chuangr ch_c uangr_c
chuanr ch_c uanr_c
chuir ch_c ueir_c
chunr ch_c uenr_c
chuor ch_c uor_c
chur ch_c ur_c
cir c_c iir_c
congr c_c ongr_c
cuir c_c ueir_c
cunr c_c uenr_c
cuor c_c uor_c
dair d_c air_c
danr d_c anr_c
dangr d_c angr_c
daor d_c aor_c
dengr d_c engr_c
dianr d_c ianr_c
diaor d_c iaor_c
dier d_c ier_c
dingr d_c ingr_c
dir d_c ir_c
dongr d_c ongr_c
dour d_c our_c
duanr d_c uanr_c
duir d_c ueir_c
dunr d_c uenr_c
duor d_c uor_c
dur d_c ur_c
fangr f_c angr_c
fanr f_c anr_c
far f_c ar_c
fengr f_c engr_c
fenr f_c enr_c
fur f_c ur_c
gair g_c air_c
ganr g_c anr_c
gaor g_c aor_c
gengr g_c engr_c
genr g_c enr_c
ger g_c er_c
gongr g_c ongr_c
gour g_c our_c
guair g_c uair_c
guanr g_c uanr_c
guar g_c uar_c
guir g_c ueir_c
gunr g_c uenr_c
guor g_c uor_c
gur g_c ur_c
hair h_c air_c
hanr h_c anr_c
haor h_c aor_c
heir h_c eir_c
her h_c er_c
hour h_c our_c
huanr h_c uanr_c
huangr h_c uangr_c
huar h_c uar_c
huir h_c ueir_c
hunr h_c uenr_c
huor h_c uor_c
hur h_c ur_c
jianr j_c ianr_c
jiaor j_c iaor_c
jiar j_c iar_c
jier j_c ier_c
jingr j_c ingr_c
jinr j_c inr_c
jir j_c ir_c
jiur j_c iour_c
jvanr j_c vanr_c
jver j_c ver_c
jvnr j_c vnr_c
kair k_c air_c
kanr k_c anr_c
kaor k_c aor_c
kengr k_c engr_c
ker k_c er_c
kongr k_c ongr_c
kour k_c our_c
kuair k_c uair_c
kuangr k_c uangr_c
kuanr k_c uanr_c
kunr k_c uenr_c
lanr l_c anr_c
laor l_c aor_c
lar l_c ar_c
leir l_c eir_c
lengr l_c engr_c
ler l_c er_c
liangr l_c iangr_c
lianr l_c ianr_c
liaor l_c iaor_c
liar l_c iar_c
lingr l_c ingr_c
lir l_c ir_c
liur l_c iour_c
lour l_c our_c
luor l_c uor_c
lunr l_c uenr_c
lur l_c ur_c
lvr l_c vr_c
mair m_c air_c
manr m_c anr_c
mangr m_c angr_c
maor m_c aor_c
mar m_c ar_c
meir m_c eir_c
menr m_c enr_c
mianr m_c ianr_c
miaor m_c iaor_c
mingr m_c ingr_c
mir m_c ir_c
mor m_c or_c
naor n_c aor_c
nar n_c ar_c
niangr n_c iangr_c
nianr n_c ianr_c
niaor n_c iaor_c
ningr n_c ingr_c
nir n_c ir_c
niur n_c iour_c
nvr n_c vr_c
pair p_c air_c
pangr p_c angr_c
panr p_c anr_c
paor p_c aor_c
penr p_c enr_c
pianr p_c ianr_c
piaor p_c iaor_c
pier p_c ier_c
pingr p_c ingr_c
pir p_c ir_c
por p_c or_c
pur p_c ur_c
qianr q_c ianr_c
qiaor q_c iaor_c
qingr q_c ingr_c
qir q_c ir_c
qiur q_c iour_c
qvanr q_c vanr_c
qvnr q_c vnr_c
qvr q_c vr_c
sar s_c ar_c
rangr r_c angr_c
renr r_c enr_c
sair s_c air_c
sanr s_c anr_c
shair sh_c air_c
shaor sh_c aor_c
shengr sh_c engr_c
shenr sh_c enr_c
shir sh_c ihr_c
shuair sh_c uair_c
shour sh_c our_c
shuar sh_c uar_c
shuir sh_c ueir_c
shunr sh_c uenr_c
shuor sh_c uor_c
shur sh_c ur_c
sir s_c iir_c
suir s_c ueir_c
sunr s_c uenr_c
tair t_c air_c
tangr t_c angr_c
tanr t_c anr_c
taor t_c aor_c
ter t_c er_c
tianr t_c ianr_c
tiaor t_c iaor_c
tir t_c ir_c
tingr t_c ingr_c
tongr t_c ongr_c
tour t_c our_c
tuanr t_c uanr_c
tuir t_c ueir_c
tuor t_c uor_c
tur t_c ur_c
wanr w_c anr_c
war w_c ar_c
weir w_c eir_c
wenr w_c enr_c
wengr w_c engr_c
wor w_c or_c
wur w_c ur_c
xiangr xx_c iangr_c
xianr xx_c ianr_c
xiar xx_c iar_c
xier xx_c ier_c
xingr xx_c ingr_c
xir xx_c ir_c
xinr xx_c inr_c
xiongr xx_c iongr_c
xiur xx_c iour_c
yangr y_c angr_c
yanr y_c anr_c
yaor y_c aor_c
yar y_c ar_c
yer y_c er_c
yingr y_c ingr_c
yinr y_c inr_c
yir y_c ir_c
your y_c our_c
yvanr y_c vanr_c
zair z_c air_c
yvr y_c vr_c
yver y_c ver_c
zaor z_c aor_c
zar z_c ar_c
zhangr zh_c angr_c
zhanr zh_c anr_c
zhaor zh_c aor_c
zhar zh_c ar_c
zhenr zh_c enr_c
zher zh_c er_c
zhir zh_c ihr_c
zhongr zh_c ongr_c
zhour zh_c our_c
zhuar zh_c uar_c
zhuanr zh_c uanr_c
zhunr zh_c uenr_c
zhuor zh_c uor_c
zhur zh_c ur_c
zir z_c iir_c
zuanr z_c uanr_c
zuir z_c ueir_c
zuor z_c uor_c

View File

@ -0,0 +1,7 @@
1
4
2
3
5
0

View File

@ -0,0 +1,33 @@
emotion_none
emotion_neutral
emotion_angry
emotion_disgust
emotion_fear
emotion_happy
emotion_sad
emotion_surprise
emotion_calm
emotion_gentle
emotion_relax
emotion_lyrical
emotion_serious
emotion_disgruntled
emotion_satisfied
emotion_disappointed
emotion_excited
emotion_anxiety
emotion_jealousy
emotion_hate
emotion_pity
emotion_pleasure
emotion_arousal
emotion_dominance
emotion_placeholder1
emotion_placeholder2
emotion_placeholder3
emotion_placeholder4
emotion_placeholder5
emotion_placeholder6
emotion_placeholder7
emotion_placeholder8
emotion_placeholder9

View File

@ -0,0 +1,6 @@
F7
F74
FBYN
FRXL
M7
xiaoyu

View File

@ -0,0 +1,144 @@
a_c
ai_c
an_c
ang_c
ao_c
b_c
c_c
ch_c
d_c
e_c
ei_c
en_c
eng_c
er_c
f_c
g_c
h_c
i_c
ia_c
ian_c
iang_c
iao_c
ie_c
ih_c
ii_c
in_c
ing_c
io_c
iong_c
iou_c
j_c
k_c
l_c
m_c
n_c
o_c
ong_c
ou_c
p_c
q_c
r_c
s_c
sh_c
t_c
u_c
ua_c
uai_c
uan_c
uang_c
uei_c
uen_c
ueng_c
uo_c
v_c
van_c
ve_c
vn_c
xx_c
z_c
zh_c
w_c
y_c
ga
ge
go
aa
ae
ah
ao
aw
ay
b
ch
d
dh
eh
er
ey
f
g
hh
ih
iy
jh
k
l
m
n
ng
ow
oy
p
r
s
sh
t
th
uh
uw
v
w
y
z
zh
air_c
angr_c
anr_c
aor_c
ar_c
eir_c
engr_c
enr_c
iangr_c
ianr_c
iaor_c
iar_c
ier_c
ihr_c
iir_c
ingr_c
inr_c
iongr_c
iour_c
ir_c
ongr_c
or_c
our_c
uair_c
uangr_c
uanr_c
uar_c
ueir_c
uenr_c
uor_c
ur_c
vanr_c
ver_c
vnr_c
vr_c
pau
#1
#2
#3
#4

View File

@ -0,0 +1,5 @@
s_begin
s_end
s_none
s_both
s_middle

View File

@ -0,0 +1,7 @@
tone1
tone_none
tone4
tone2
tone3
tone5
tone0

View File

@ -0,0 +1,5 @@
word_begin
word_end
word_middle
word_both
word_none

BIN
voices/zhibei_emo/voc/ckpt/checkpoint_0.pth (Stored with Git LFS) Normal file

Binary file not shown.

BIN
voices/zhibei_emo/voc/ckpt/checkpoint_1.pth (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,188 @@
model_type: hifigan
Model:
###########################################################
# GENERATOR NETWORK ARCHITECTURE SETTING #
###########################################################
Generator:
params:
in_channels: 80
out_channels: 1
channels: 256
kernel_size: 7
upsample_scales: [10, 5, 2, 2]
upsample_kernal_sizes: [20, 11, 4, 4]
resblock_kernel_sizes: [3, 7, 11]
resblock_dilations:
- [1, 3, 5, 7]
- [1, 3, 5, 7]
- [1, 3, 5, 7]
bias: true
causal: true
nonlinear_activation: "LeakyReLU"
nonlinear_activation_params:
negative_slope: 0.1
use_weight_norm: true
optimizer:
type: Adam
params:
lr: 2.0e-4
betas: [0.5, 0.9]
weight_decay: 0.0
scheduler:
type: MultiStepLR
params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
###########################################################
# DISCRIMINATOR NETWORK ARCHITECTURE SETTING #
###########################################################
MultiScaleDiscriminator:
params:
scales: 3
downsample_pooling: "DWT"
downsample_pooling_params:
kernel_size: 4
stride: 2
padding: 2
discriminator_params:
in_channels: 1
out_channels: 1
kernel_sizes: [15, 41, 5, 3]
channels: 128
max_downsample_channels: 1024
max_groups: 16
bias: true
downsample_scales: [4, 4, 4, 4, 1]
nonlinear_activation: "LeakyReLU"
nonlinear_activation_params:
negative_slope: 0.1
follow_official_norm: true
optimizer:
type: Adam
params:
lr: 2.0e-4
betas: [0.5, 0.9]
weight_decay: 0.0
scheduler:
type: MultiStepLR
params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
MultiPeriodDiscriminator:
params:
periods: [2, 3, 5, 7, 11]
discriminator_params:
in_channels: 1
out_channels: 1
kernel_sizes: [5, 3]
channels: 32
downsample_scales: [3, 3, 3, 3, 1]
max_downsample_channels: 1024
bias: true
nonlinear_activation: "LeakyReLU"
nonlinear_activation_params:
negative_slope: 0.1
use_spectral_norm: false
optimizer:
type: Adam
params:
lr: 2.0e-4
betas: [0.5, 0.9]
weight_decay: 0.0
scheduler:
type: MultiStepLR
params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
####################################################
# LOSS SETTING #
####################################################
Loss:
generator_adv_loss:
enable: True
params:
average_by_discriminators: False
weights: 1.0
discriminator_adv_loss:
enable: True
params:
average_by_discriminators: False
weights: 1.0
stft_loss:
enable: False # Whether to use multi-resolution STFT loss.
mel_loss:
enable: True
params:
fs: 16000
fft_size: 2048
hop_size: 200
win_length: 1000
window: "hann"
num_mels: 80
fmin: 0
fmax: 8000
log_base: null
weights: 45.0
subband_stft_loss:
enable: False
params:
fft_sizes: [384, 683, 171] # List of FFT size for STFT-based loss.
hop_sizes: [35, 75, 15] # List of hop size for STFT-based loss
win_lengths: [150, 300, 60] # List of window length for STFT-based loss.
window: "hann_window" # Window function for STFT-based loss
feat_match_loss:
enable: True
params:
average_by_discriminators: false
average_by_layers: false
weights: 2.0
###########################################################
# DATA LOADER SETTING #
###########################################################
batch_size: 16
batch_max_steps: 9600 # Length of each audio in batch. Make sure dividable by hop_size.
pin_memory: True
num_workers: 2 # FIXME: set > 0 may stuck on macos
remove_short_samples: False
allow_cache: True
generator_grad_norm: -1
discriminator_grad_norm: -1
###########################################################
# INTERVAL SETTING #
###########################################################
generator_train_start_steps: 1 # Number of steps to start to train discriminator.
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
train_max_steps: 2500000 # Number of training steps.
save_interval_steps: 20000 # Interval steps to save checkpoint.
eval_interval_steps: 10000 # Interval steps to evaluate the network.
log_interval_steps: 1000 # Interval steps to record the training log.
###########################################################
# OTHER SETTING #
###########################################################
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.

BIN
voices/zhibei_emo/vocoder/pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.

BIN
voices/zhitian_emo/am/ckpt/checkpoint_0.pth (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,105 @@
model_type: sambert
Model:
#########################################################
# SAMBERT NETWORK ARCHITECTURE SETTING #
#########################################################
KanTtsSAMBERT:
params:
max_len: 800
embedding_dim: 512
encoder_num_layers: 8
encoder_num_heads: 8
encoder_num_units: 128
encoder_ffn_inner_dim: 1024
encoder_dropout: 0.1
encoder_attention_dropout: 0.1
encoder_relu_dropout: 0.1
encoder_projection_units: 32
speaker_units: 32
emotion_units: 32
predictor_filter_size: 41
predictor_fsmn_num_layers: 3
predictor_num_memory_units: 128
predictor_ffn_inner_dim: 256
predictor_dropout: 0.1
predictor_shift: 0
predictor_lstm_units: 128
dur_pred_prenet_units: [128, 128]
dur_pred_lstm_units: 128
decoder_prenet_units: [256, 256]
decoder_num_layers: 12
decoder_num_heads: 8
decoder_num_units: 128
decoder_ffn_inner_dim: 1024
decoder_dropout: 0.1
decoder_attention_dropout: 0.1
decoder_relu_dropout: 0.1
outputs_per_step: 3
num_mels: 80
postnet_filter_size: 41
postnet_fsmn_num_layers: 4
postnet_num_memory_units: 256
postnet_ffn_inner_dim: 512
postnet_dropout: 0.1
postnet_shift: 17
postnet_lstm_units: 128
MAS: False
optimizer:
type: Adam
params:
lr: 0.001
betas: [0.9, 0.98]
eps: 1.0e-9
weight_decay: 0.0
scheduler:
type: NoamLR
params:
warmup_steps: 4000
linguistic_unit:
cleaners: english_cleaners
lfeat_type_list: sy,tone,syllable_flag,word_segment,emo_category,speaker_category
speaker_list: F7,F74,FBYN,FRXL,M7,xiaoyu
####################################################
# LOSS SETTING #
####################################################
Loss:
MelReconLoss:
enable: True
params:
loss_type: mae
ProsodyReconLoss:
enable: True
params:
loss_type: mae
###########################################################
# DATA LOADER SETTING #
###########################################################
batch_size: 32
pin_memory: False
num_workers: 4 # FIXME: set > 0 may stuck on macos
remove_short_samples: False
allow_cache: True
grad_norm: 1.0
###########################################################
# INTERVAL SETTING #
###########################################################
train_max_steps: 1000000 # Number of training steps.
save_interval_steps: 20000 # Interval steps to save checkpoint.
eval_interval_steps: 10000 # Interval steps to evaluate the network.
log_interval_steps: 1000 # Interval steps to record the training log.
###########################################################
# OTHER SETTING #
###########################################################
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.

BIN
voices/zhitian_emo/am/pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,27 @@
# Audio processing configs
audio_config:
# Preprocess
wav_normalize: True
trim_silence: True
trim_silence_threshold_db: 60
preemphasize: False
# Feature extraction
sampling_rate: 16000
hop_length: 200
win_length: 1000
n_fft: 2048
n_mels: 80
fmin: 0.0
fmax: 8000.0
phone_level_feature: True
# Normalization
norm_type: "mean_std" # "mean_std" or "global"
max_norm: 1.0
symmetric: False
min_level_db: -100.0
ref_level_db: 20
num_workers: 16

View File

@ -0,0 +1,2 @@
wu w
yi y

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,147 @@
<?xml version="1.0" encoding="utf-8"?>
<posSet xmlns="http://schemas.alibaba-inc.com/tts">
<pos>
<id>1</id>
<name>a</name>
<desc>todo</desc>
</pos>
<pos>
<id>2</id>
<name>b</name>
<desc>todo</desc>
</pos>
<pos>
<id>3</id>
<name>c</name>
<desc>todo</desc>
</pos>
<pos>
<id>4</id>
<name>d</name>
<desc>todo</desc>
</pos>
<pos>
<id>5</id>
<name>e</name>
<desc>todo</desc>
</pos>
<pos>
<id>6</id>
<name>f</name>
<desc>todo</desc>
</pos>
<pos>
<id>7</id>
<name>g</name>
<desc>todo</desc>
<sub>
<pos>
<id>8</id>
<name>gb</name>
<desc>todo</desc>
</pos>
</sub>
</pos>
<pos>
<id>9</id>
<name>h</name>
<desc>todo</desc>
</pos>
<pos>
<id>10</id>
<name>i</name>
<desc>todo</desc>
</pos>
<pos>
<id>11</id>
<name>j</name>
<desc>todo</desc>
</pos>
<pos>
<id>12</id>
<name>k</name>
<desc>todo</desc>
</pos>
<pos>
<id>13</id>
<name>l</name>
<desc>todo</desc>
</pos>
<pos>
<id>14</id>
<name>m</name>
<desc>todo</desc>
</pos>
<pos>
<id>15</id>
<name>n</name>
<desc>todo</desc>
<sub>
<pos>
<id>16</id>
<name>nz</name>
<desc>todo</desc>
</pos>
</sub>
</pos>
<pos>
<id>17</id>
<name>o</name>
<desc>todo</desc>
</pos>
<pos>
<id>18</id>
<name>p</name>
<desc>todo</desc>
</pos>
<pos>
<id>19</id>
<name>q</name>
<desc>todo</desc>
</pos>
<pos>
<id>20</id>
<name>r</name>
<desc>todo</desc>
</pos>
<pos>
<id>21</id>
<name>s</name>
<desc>todo</desc>
</pos>
<pos>
<id>22</id>
<name>t</name>
<desc>todo</desc>
</pos>
<pos>
<id>23</id>
<name>u</name>
<desc>todo</desc>
</pos>
<pos>
<id>24</id>
<name>v</name>
<desc>todo</desc>
</pos>
<pos>
<id>25</id>
<name>w</name>
<desc>todo</desc>
</pos>
<pos>
<id>26</id>
<name>x</name>
<desc>todo</desc>
</pos>
<pos>
<id>27</id>
<name>y</name>
<desc>todo</desc>
</pos>
<pos>
<id>28</id>
<name>z</name>
<desc>todo</desc>
</pos>
</posSet>

View File

@ -0,0 +1,661 @@
a ga a_c
ai ga ai_c
an ga an_c
ang ga ang_c
ao ga ao_c
ba b_c a_c
bai b_c ai_c
ban b_c an_c
bang b_c ang_c
bao b_c ao_c
bei b_c ei_c
ben b_c en_c
beng b_c eng_c
bi b_c i_c
bian b_c ian_c
biao b_c iao_c
bie b_c ie_c
bin b_c in_c
bing b_c ing_c
bo b_c o_c
bu b_c u_c
ca c_c a_c
cai c_c ai_c
can c_c an_c
cang c_c ang_c
cao c_c ao_c
ce c_c e_c
cen c_c en_c
ceng c_c eng_c
cha ch_c a_c
chai ch_c ai_c
chan ch_c an_c
chang ch_c ang_c
chao ch_c ao_c
che ch_c e_c
chen ch_c en_c
cheng ch_c eng_c
chi ch_c ih_c
chong ch_c ong_c
chou ch_c ou_c
chu ch_c u_c
chua ch_c ua_c
chuai ch_c uai_c
chuan ch_c uan_c
chuang ch_c uang_c
chui ch_c uei_c
chun ch_c uen_c
chuo ch_c uo_c
ci c_c ii_c
cong c_c ong_c
cou c_c ou_c
cu c_c u_c
cuan c_c uan_c
cui c_c uei_c
cun c_c uen_c
cuo c_c uo_c
da d_c a_c
dai d_c ai_c
dan d_c an_c
dang d_c ang_c
dao d_c ao_c
de d_c e_c
dei d_c ei_c
den d_c en_c
deng d_c eng_c
di d_c i_c
dia d_c ia_c
dian d_c ian_c
diao d_c iao_c
die d_c ie_c
ding d_c ing_c
diu d_c iou_c
dong d_c ong_c
dou d_c ou_c
du d_c u_c
duan d_c uan_c
dui d_c uei_c
dun d_c uen_c
duo d_c uo_c
e ge e_c
ei ge ei_c
en ge en_c
eng ge eng_c
er ge er_c
fa f_c a_c
fan f_c an_c
fang f_c ang_c
fei f_c ei_c
fen f_c en_c
feng f_c eng_c
fo f_c o_c
fou f_c ou_c
fu f_c u_c
ga g_c a_c
gai g_c ai_c
gan g_c an_c
gang g_c ang_c
gao g_c ao_c
ge g_c e_c
gei g_c ei_c
gen g_c en_c
geng g_c eng_c
gong g_c ong_c
gou g_c ou_c
gu g_c u_c
gua g_c ua_c
guai g_c uai_c
guan g_c uan_c
guang g_c uang_c
gui g_c uei_c
gun g_c uen_c
guo g_c uo_c
ha h_c a_c
hai h_c ai_c
han h_c an_c
hang h_c ang_c
hao h_c ao_c
he h_c e_c
hei h_c ei_c
hen h_c en_c
heng h_c eng_c
hong h_c ong_c
hou h_c ou_c
hu h_c u_c
hua h_c ua_c
huai h_c uai_c
huan h_c uan_c
huang h_c uang_c
hui h_c uei_c
hun h_c uen_c
huo h_c uo_c
ji j_c i_c
jia j_c ia_c
jian j_c ian_c
jiang j_c iang_c
jiao j_c iao_c
jie j_c ie_c
jin j_c in_c
jing j_c ing_c
jiong j_c iong_c
jiu j_c iou_c
jv j_c v_c
jvan j_c van_c
jve j_c ve_c
jvn j_c vn_c
ka k_c a_c
kai k_c ai_c
kan k_c an_c
kang k_c ang_c
kao k_c ao_c
ke k_c e_c
kei k_c ei_c
ken k_c en_c
keng k_c eng_c
kong k_c ong_c
kou k_c ou_c
ku k_c u_c
kua k_c ua_c
kuai k_c uai_c
kuan k_c uan_c
kuang k_c uang_c
kui k_c uei_c
kun k_c uen_c
kuo k_c uo_c
la l_c a_c
lai l_c ai_c
lan l_c an_c
lang l_c ang_c
lao l_c ao_c
le l_c e_c
lei l_c ei_c
leng l_c eng_c
li l_c i_c
lia l_c ia_c
lian l_c ian_c
liang l_c iang_c
liao l_c iao_c
lie l_c ie_c
lin l_c in_c
ling l_c ing_c
liu l_c iou_c
lo l_c o_c
long l_c ong_c
lou l_c ou_c
lu l_c u_c
luan l_c uan_c
lun l_c uen_c
luo l_c uo_c
lv l_c v_c
lve l_c ve_c
ma m_c a_c
mai m_c ai_c
man m_c an_c
mang m_c ang_c
mao m_c ao_c
me m_c e_c
mei m_c ei_c
men m_c en_c
meng m_c eng_c
mi m_c i_c
mian m_c ian_c
miao m_c iao_c
mie m_c ie_c
min m_c in_c
ming m_c ing_c
miu m_c iou_c
mo m_c o_c
mou m_c ou_c
mu m_c u_c
na n_c a_c
nai n_c ai_c
nan n_c an_c
nang n_c ang_c
nao n_c ao_c
ne n_c e_c
nei n_c ei_c
nen n_c en_c
neng n_c eng_c
ni n_c i_c
nian n_c ian_c
niang n_c iang_c
niao n_c iao_c
nie n_c ie_c
nin n_c in_c
ning n_c ing_c
niu n_c iou_c
nong n_c ong_c
nou n_c ou_c
nu n_c u_c
nuan n_c uan_c
nun n_c uen_c
nuo n_c uo_c
nv n_c v_c
nve n_c ve_c
o go o_c
ou go ou_c
pa p_c a_c
pai p_c ai_c
pan p_c an_c
pang p_c ang_c
pao p_c ao_c
pei p_c ei_c
pen p_c en_c
peng p_c eng_c
pi p_c i_c
pian p_c ian_c
piao p_c iao_c
pie p_c ie_c
pin p_c in_c
ping p_c ing_c
po p_c o_c
pou p_c ou_c
pu p_c u_c
qi q_c i_c
qia q_c ia_c
qian q_c ian_c
qiang q_c iang_c
qiao q_c iao_c
qie q_c ie_c
qin q_c in_c
qing q_c ing_c
qiong q_c iong_c
qiu q_c iou_c
qv q_c v_c
qvan q_c van_c
qve q_c ve_c
qvn q_c vn_c
ran r_c an_c
rang r_c ang_c
rao r_c ao_c
re r_c e_c
ren r_c en_c
reng r_c eng_c
ri r_c ih_c
rong r_c ong_c
rou r_c ou_c
ru r_c u_c
ruan r_c uan_c
rui r_c uei_c
run r_c uen_c
ruo r_c uo_c
sa s_c a_c
sai s_c ai_c
san s_c an_c
sang s_c ang_c
sao s_c ao_c
se s_c e_c
sen s_c en_c
seng s_c eng_c
sha sh_c a_c
shai sh_c ai_c
shan sh_c an_c
shang sh_c ang_c
shao sh_c ao_c
she sh_c e_c
shei sh_c ei_c
shen sh_c en_c
sheng sh_c eng_c
shi sh_c ih_c
shou sh_c ou_c
shu sh_c u_c
shua sh_c ua_c
shuai sh_c uai_c
shuan sh_c uan_c
shuang sh_c uang_c
shui sh_c uei_c
shun sh_c uen_c
shuo sh_c uo_c
si s_c ii_c
song s_c ong_c
sou s_c ou_c
su s_c u_c
suan s_c uan_c
sui s_c uei_c
sun s_c uen_c
suo s_c uo_c
ta t_c a_c
tai t_c ai_c
tan t_c an_c
tang t_c ang_c
tao t_c ao_c
te t_c e_c
tei t_c ei_c
teng t_c eng_c
ti t_c i_c
tian t_c ian_c
tiao t_c iao_c
tie t_c ie_c
ting t_c ing_c
tong t_c ong_c
tou t_c ou_c
tu t_c u_c
tuan t_c uan_c
tui t_c uei_c
tun t_c uen_c
tuo t_c uo_c
wa w_c a_c
wai w_c ai_c
wan w_c an_c
wang w_c ang_c
wei w_c ei_c
wen w_c en_c
weng w_c eng_c
wo w_c o_c
wu w_c u_c
xi xx_c i_c
xia xx_c ia_c
xian xx_c ian_c
xiang xx_c iang_c
xiao xx_c iao_c
xie xx_c ie_c
xin xx_c in_c
xing xx_c ing_c
xiong xx_c iong_c
xiu xx_c iou_c
xv xx_c v_c
xvan xx_c van_c
xve xx_c ve_c
xvn xx_c vn_c
ya y_c a_c
yan y_c an_c
yang y_c ang_c
yao y_c ao_c
ye y_c e_c
yi y_c i_c
yin y_c in_c
ying y_c ing_c
yo y_c o_c
yong y_c ong_c
you y_c ou_c
yv y_c v_c
yvan y_c van_c
yve y_c ve_c
yvn y_c vn_c
za z_c a_c
zai z_c ai_c
zan z_c an_c
zang z_c ang_c
zao z_c ao_c
ze z_c e_c
zei z_c ei_c
zen z_c en_c
zeng z_c eng_c
zha zh_c a_c
zhai zh_c ai_c
zhan zh_c an_c
zhang zh_c ang_c
zhao zh_c ao_c
zhe zh_c e_c
zhei zh_c ei_c
zhen zh_c en_c
zheng zh_c eng_c
zhi zh_c ih_c
zhong zh_c ong_c
zhou zh_c ou_c
zhu zh_c u_c
zhua zh_c ua_c
zhuai zh_c uai_c
zhuan zh_c uan_c
zhuang zh_c uang_c
zhui zh_c uei_c
zhun zh_c uen_c
zhuo zh_c uo_c
zi z_c ii_c
zong z_c ong_c
zou z_c ou_c
zu z_c u_c
zuan z_c uan_c
zui z_c uei_c
zun z_c uen_c
zuo z_c uo_c
bangr b_c angr_c
banr b_c anr_c
baor b_c aor_c
bar b_c ar_c
beir b_c eir_c
bengr b_c engr_c
benr b_c enr_c
bianr b_c ianr_c
biaor b_c iaor_c
bingr b_c ingr_c
bir b_c ir_c
bor b_c or_c
bur b_c ur_c
caor c_c aor_c
car c_c ar_c
changr ch_c angr_c
chaor ch_c aor_c
char ch_c ar_c
chengr ch_c engr_c
cher ch_c er_c
chir ch_c ihr_c
chongr ch_c ongr_c
chour ch_c our_c
chuangr ch_c uangr_c
chuanr ch_c uanr_c
chuir ch_c ueir_c
chunr ch_c uenr_c
chuor ch_c uor_c
chur ch_c ur_c
cir c_c iir_c
congr c_c ongr_c
cuir c_c ueir_c
cunr c_c uenr_c
cuor c_c uor_c
dair d_c air_c
danr d_c anr_c
dangr d_c angr_c
daor d_c aor_c
dengr d_c engr_c
dianr d_c ianr_c
diaor d_c iaor_c
dier d_c ier_c
dingr d_c ingr_c
dir d_c ir_c
dongr d_c ongr_c
dour d_c our_c
duanr d_c uanr_c
duir d_c ueir_c
dunr d_c uenr_c
duor d_c uor_c
dur d_c ur_c
fangr f_c angr_c
fanr f_c anr_c
far f_c ar_c
fengr f_c engr_c
fenr f_c enr_c
fur f_c ur_c
gair g_c air_c
ganr g_c anr_c
gaor g_c aor_c
gengr g_c engr_c
genr g_c enr_c
ger g_c er_c
gongr g_c ongr_c
gour g_c our_c
guair g_c uair_c
guanr g_c uanr_c
guar g_c uar_c
guir g_c ueir_c
gunr g_c uenr_c
guor g_c uor_c
gur g_c ur_c
hair h_c air_c
hanr h_c anr_c
haor h_c aor_c
heir h_c eir_c
her h_c er_c
hour h_c our_c
huanr h_c uanr_c
huangr h_c uangr_c
huar h_c uar_c
huir h_c ueir_c
hunr h_c uenr_c
huor h_c uor_c
hur h_c ur_c
jianr j_c ianr_c
jiaor j_c iaor_c
jiar j_c iar_c
jier j_c ier_c
jingr j_c ingr_c
jinr j_c inr_c
jir j_c ir_c
jiur j_c iour_c
jvanr j_c vanr_c
jver j_c ver_c
jvnr j_c vnr_c
kair k_c air_c
kanr k_c anr_c
kaor k_c aor_c
kengr k_c engr_c
ker k_c er_c
kongr k_c ongr_c
kour k_c our_c
kuair k_c uair_c
kuangr k_c uangr_c
kuanr k_c uanr_c
kunr k_c uenr_c
lanr l_c anr_c
laor l_c aor_c
lar l_c ar_c
leir l_c eir_c
lengr l_c engr_c
ler l_c er_c
liangr l_c iangr_c
lianr l_c ianr_c
liaor l_c iaor_c
liar l_c iar_c
lingr l_c ingr_c
lir l_c ir_c
liur l_c iour_c
lour l_c our_c
luor l_c uor_c
lunr l_c uenr_c
lur l_c ur_c
lvr l_c vr_c
mair m_c air_c
manr m_c anr_c
mangr m_c angr_c
maor m_c aor_c
mar m_c ar_c
meir m_c eir_c
menr m_c enr_c
mianr m_c ianr_c
miaor m_c iaor_c
mingr m_c ingr_c
mir m_c ir_c
mor m_c or_c
naor n_c aor_c
nar n_c ar_c
niangr n_c iangr_c
nianr n_c ianr_c
niaor n_c iaor_c
ningr n_c ingr_c
nir n_c ir_c
niur n_c iour_c
nvr n_c vr_c
pair p_c air_c
pangr p_c angr_c
panr p_c anr_c
paor p_c aor_c
penr p_c enr_c
pianr p_c ianr_c
piaor p_c iaor_c
pier p_c ier_c
pingr p_c ingr_c
pir p_c ir_c
por p_c or_c
pur p_c ur_c
qianr q_c ianr_c
qiaor q_c iaor_c
qingr q_c ingr_c
qir q_c ir_c
qiur q_c iour_c
qvanr q_c vanr_c
qvnr q_c vnr_c
qvr q_c vr_c
sar s_c ar_c
rangr r_c angr_c
renr r_c enr_c
sair s_c air_c
sanr s_c anr_c
shair sh_c air_c
shaor sh_c aor_c
shengr sh_c engr_c
shenr sh_c enr_c
shir sh_c ihr_c
shuair sh_c uair_c
shour sh_c our_c
shuar sh_c uar_c
shuir sh_c ueir_c
shunr sh_c uenr_c
shuor sh_c uor_c
shur sh_c ur_c
sir s_c iir_c
suir s_c ueir_c
sunr s_c uenr_c
tair t_c air_c
tangr t_c angr_c
tanr t_c anr_c
taor t_c aor_c
ter t_c er_c
tianr t_c ianr_c
tiaor t_c iaor_c
tir t_c ir_c
tingr t_c ingr_c
tongr t_c ongr_c
tour t_c our_c
tuanr t_c uanr_c
tuir t_c ueir_c
tuor t_c uor_c
tur t_c ur_c
wanr w_c anr_c
war w_c ar_c
weir w_c eir_c
wenr w_c enr_c
wengr w_c engr_c
wor w_c or_c
wur w_c ur_c
xiangr xx_c iangr_c
xianr xx_c ianr_c
xiar xx_c iar_c
xier xx_c ier_c
xingr xx_c ingr_c
xir xx_c ir_c
xinr xx_c inr_c
xiongr xx_c iongr_c
xiur xx_c iour_c
yangr y_c angr_c
yanr y_c anr_c
yaor y_c aor_c
yar y_c ar_c
yer y_c er_c
yingr y_c ingr_c
yinr y_c inr_c
yir y_c ir_c
your y_c our_c
yvanr y_c vanr_c
zair z_c air_c
yvr y_c vr_c
yver y_c ver_c
zaor z_c aor_c
zar z_c ar_c
zhangr zh_c angr_c
zhanr zh_c anr_c
zhaor zh_c aor_c
zhar zh_c ar_c
zhenr zh_c enr_c
zher zh_c er_c
zhir zh_c ihr_c
zhongr zh_c ongr_c
zhour zh_c our_c
zhuar zh_c uar_c
zhuanr zh_c uanr_c
zhunr zh_c uenr_c
zhuor zh_c uor_c
zhur zh_c ur_c
zir z_c iir_c
zuanr z_c uanr_c
zuir z_c ueir_c
zuor z_c uor_c

View File

@ -0,0 +1,7 @@
1
4
2
3
5
0

View File

@ -0,0 +1,33 @@
emotion_none
emotion_neutral
emotion_angry
emotion_disgust
emotion_fear
emotion_happy
emotion_sad
emotion_surprise
emotion_calm
emotion_gentle
emotion_relax
emotion_lyrical
emotion_serious
emotion_disgruntled
emotion_satisfied
emotion_disappointed
emotion_excited
emotion_anxiety
emotion_jealousy
emotion_hate
emotion_pity
emotion_pleasure
emotion_arousal
emotion_dominance
emotion_placeholder1
emotion_placeholder2
emotion_placeholder3
emotion_placeholder4
emotion_placeholder5
emotion_placeholder6
emotion_placeholder7
emotion_placeholder8
emotion_placeholder9

View File

@ -0,0 +1,6 @@
F7
F74
FBYN
FRXL
M7
xiaoyu

View File

@ -0,0 +1,144 @@
a_c
ai_c
an_c
ang_c
ao_c
b_c
c_c
ch_c
d_c
e_c
ei_c
en_c
eng_c
er_c
f_c
g_c
h_c
i_c
ia_c
ian_c
iang_c
iao_c
ie_c
ih_c
ii_c
in_c
ing_c
io_c
iong_c
iou_c
j_c
k_c
l_c
m_c
n_c
o_c
ong_c
ou_c
p_c
q_c
r_c
s_c
sh_c
t_c
u_c
ua_c
uai_c
uan_c
uang_c
uei_c
uen_c
ueng_c
uo_c
v_c
van_c
ve_c
vn_c
xx_c
z_c
zh_c
w_c
y_c
ga
ge
go
aa
ae
ah
ao
aw
ay
b
ch
d
dh
eh
er
ey
f
g
hh
ih
iy
jh
k
l
m
n
ng
ow
oy
p
r
s
sh
t
th
uh
uw
v
w
y
z
zh
air_c
angr_c
anr_c
aor_c
ar_c
eir_c
engr_c
enr_c
iangr_c
ianr_c
iaor_c
iar_c
ier_c
ihr_c
iir_c
ingr_c
inr_c
iongr_c
iour_c
ir_c
ongr_c
or_c
our_c
uair_c
uangr_c
uanr_c
uar_c
ueir_c
uenr_c
uor_c
ur_c
vanr_c
ver_c
vnr_c
vr_c
pau
#1
#2
#3
#4

View File

@ -0,0 +1,5 @@
s_begin
s_end
s_none
s_both
s_middle

View File

@ -0,0 +1,7 @@
tone1
tone_none
tone4
tone2
tone3
tone5
tone0

View File

@ -0,0 +1,5 @@
word_begin
word_end
word_middle
word_both
word_none

BIN
voices/zhitian_emo/voc/ckpt/checkpoint_0.pth (Stored with Git LFS) Normal file

Binary file not shown.

BIN
voices/zhitian_emo/voc/ckpt/checkpoint_1.pth (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,188 @@
model_type: hifigan
Model:
###########################################################
# GENERATOR NETWORK ARCHITECTURE SETTING #
###########################################################
Generator:
params:
in_channels: 80
out_channels: 1
channels: 256
kernel_size: 7
upsample_scales: [10, 5, 2, 2]
upsample_kernal_sizes: [20, 11, 4, 4]
resblock_kernel_sizes: [3, 7, 11]
resblock_dilations:
- [1, 3, 5, 7]
- [1, 3, 5, 7]
- [1, 3, 5, 7]
bias: true
causal: true
nonlinear_activation: "LeakyReLU"
nonlinear_activation_params:
negative_slope: 0.1
use_weight_norm: true
optimizer:
type: Adam
params:
lr: 2.0e-4
betas: [0.5, 0.9]
weight_decay: 0.0
scheduler:
type: MultiStepLR
params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
###########################################################
# DISCRIMINATOR NETWORK ARCHITECTURE SETTING #
###########################################################
MultiScaleDiscriminator:
params:
scales: 3
downsample_pooling: "DWT"
downsample_pooling_params:
kernel_size: 4
stride: 2
padding: 2
discriminator_params:
in_channels: 1
out_channels: 1
kernel_sizes: [15, 41, 5, 3]
channels: 128
max_downsample_channels: 1024
max_groups: 16
bias: true
downsample_scales: [4, 4, 4, 4, 1]
nonlinear_activation: "LeakyReLU"
nonlinear_activation_params:
negative_slope: 0.1
follow_official_norm: true
optimizer:
type: Adam
params:
lr: 2.0e-4
betas: [0.5, 0.9]
weight_decay: 0.0
scheduler:
type: MultiStepLR
params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
MultiPeriodDiscriminator:
params:
periods: [2, 3, 5, 7, 11]
discriminator_params:
in_channels: 1
out_channels: 1
kernel_sizes: [5, 3]
channels: 32
downsample_scales: [3, 3, 3, 3, 1]
max_downsample_channels: 1024
bias: true
nonlinear_activation: "LeakyReLU"
nonlinear_activation_params:
negative_slope: 0.1
use_spectral_norm: false
optimizer:
type: Adam
params:
lr: 2.0e-4
betas: [0.5, 0.9]
weight_decay: 0.0
scheduler:
type: MultiStepLR
params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
####################################################
# LOSS SETTING #
####################################################
Loss:
generator_adv_loss:
enable: True
params:
average_by_discriminators: False
weights: 1.0
discriminator_adv_loss:
enable: True
params:
average_by_discriminators: False
weights: 1.0
stft_loss:
enable: False # Whether to use multi-resolution STFT loss.
mel_loss:
enable: True
params:
fs: 16000
fft_size: 2048
hop_size: 200
win_length: 1000
window: "hann"
num_mels: 80
fmin: 0
fmax: 8000
log_base: null
weights: 45.0
subband_stft_loss:
enable: False
params:
fft_sizes: [384, 683, 171] # List of FFT size for STFT-based loss.
hop_sizes: [35, 75, 15] # List of hop size for STFT-based loss
win_lengths: [150, 300, 60] # List of window length for STFT-based loss.
window: "hann_window" # Window function for STFT-based loss
feat_match_loss:
enable: True
params:
average_by_discriminators: false
average_by_layers: false
weights: 2.0
###########################################################
# DATA LOADER SETTING #
###########################################################
batch_size: 16
batch_max_steps: 9600 # Length of each audio in batch. Make sure dividable by hop_size.
pin_memory: True
num_workers: 2 # FIXME: set > 0 may stuck on macos
remove_short_samples: False
allow_cache: True
generator_grad_norm: -1
discriminator_grad_norm: -1
###########################################################
# INTERVAL SETTING #
###########################################################
generator_train_start_steps: 1 # Number of steps to start to train discriminator.
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
train_max_steps: 2500000 # Number of training steps.
save_interval_steps: 20000 # Interval steps to save checkpoint.
eval_interval_steps: 10000 # Interval steps to evaluate the network.
log_interval_steps: 1000 # Interval steps to record the training log.
###########################################################
# OTHER SETTING #
###########################################################
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.

BIN
voices/zhitian_emo/vocoder/pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.

BIN
voices/zhiyan_emo/am/ckpt/checkpoint_0.pth (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,105 @@
model_type: sambert
Model:
#########################################################
# SAMBERT NETWORK ARCHITECTURE SETTING #
#########################################################
KanTtsSAMBERT:
params:
max_len: 800
embedding_dim: 512
encoder_num_layers: 8
encoder_num_heads: 8
encoder_num_units: 128
encoder_ffn_inner_dim: 1024
encoder_dropout: 0.1
encoder_attention_dropout: 0.1
encoder_relu_dropout: 0.1
encoder_projection_units: 32
speaker_units: 32
emotion_units: 32
predictor_filter_size: 41
predictor_fsmn_num_layers: 3
predictor_num_memory_units: 128
predictor_ffn_inner_dim: 256
predictor_dropout: 0.1
predictor_shift: 0
predictor_lstm_units: 128
dur_pred_prenet_units: [128, 128]
dur_pred_lstm_units: 128
decoder_prenet_units: [256, 256]
decoder_num_layers: 12
decoder_num_heads: 8
decoder_num_units: 128
decoder_ffn_inner_dim: 1024
decoder_dropout: 0.1
decoder_attention_dropout: 0.1
decoder_relu_dropout: 0.1
outputs_per_step: 3
num_mels: 80
postnet_filter_size: 41
postnet_fsmn_num_layers: 4
postnet_num_memory_units: 256
postnet_ffn_inner_dim: 512
postnet_dropout: 0.1
postnet_shift: 17
postnet_lstm_units: 128
MAS: False
optimizer:
type: Adam
params:
lr: 0.001
betas: [0.9, 0.98]
eps: 1.0e-9
weight_decay: 0.0
scheduler:
type: NoamLR
params:
warmup_steps: 4000
linguistic_unit:
cleaners: english_cleaners
lfeat_type_list: sy,tone,syllable_flag,word_segment,emo_category,speaker_category
speaker_list: F7,F74,FBYN,FRXL,M7,xiaoyu
####################################################
# LOSS SETTING #
####################################################
Loss:
MelReconLoss:
enable: True
params:
loss_type: mae
ProsodyReconLoss:
enable: True
params:
loss_type: mae
###########################################################
# DATA LOADER SETTING #
###########################################################
batch_size: 32
pin_memory: False
num_workers: 4 # FIXME: set > 0 may stuck on macos
remove_short_samples: False
allow_cache: True
grad_norm: 1.0
###########################################################
# INTERVAL SETTING #
###########################################################
train_max_steps: 1000000 # Number of training steps.
save_interval_steps: 20000 # Interval steps to save checkpoint.
eval_interval_steps: 10000 # Interval steps to evaluate the network.
log_interval_steps: 1000 # Interval steps to record the training log.
###########################################################
# OTHER SETTING #
###########################################################
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.

BIN
voices/zhiyan_emo/am/pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,27 @@
# Audio processing configs
audio_config:
# Preprocess
wav_normalize: True
trim_silence: True
trim_silence_threshold_db: 60
preemphasize: False
# Feature extraction
sampling_rate: 16000
hop_length: 200
win_length: 1000
n_fft: 2048
n_mels: 80
fmin: 0.0
fmax: 8000.0
phone_level_feature: True
# Normalization
norm_type: "mean_std" # "mean_std" or "global"
max_norm: 1.0
symmetric: False
min_level_db: -100.0
ref_level_db: 20
num_workers: 16

View File

@ -0,0 +1,2 @@
wu w
yi y

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,147 @@
<?xml version="1.0" encoding="utf-8"?>
<posSet xmlns="http://schemas.alibaba-inc.com/tts">
<pos>
<id>1</id>
<name>a</name>
<desc>todo</desc>
</pos>
<pos>
<id>2</id>
<name>b</name>
<desc>todo</desc>
</pos>
<pos>
<id>3</id>
<name>c</name>
<desc>todo</desc>
</pos>
<pos>
<id>4</id>
<name>d</name>
<desc>todo</desc>
</pos>
<pos>
<id>5</id>
<name>e</name>
<desc>todo</desc>
</pos>
<pos>
<id>6</id>
<name>f</name>
<desc>todo</desc>
</pos>
<pos>
<id>7</id>
<name>g</name>
<desc>todo</desc>
<sub>
<pos>
<id>8</id>
<name>gb</name>
<desc>todo</desc>
</pos>
</sub>
</pos>
<pos>
<id>9</id>
<name>h</name>
<desc>todo</desc>
</pos>
<pos>
<id>10</id>
<name>i</name>
<desc>todo</desc>
</pos>
<pos>
<id>11</id>
<name>j</name>
<desc>todo</desc>
</pos>
<pos>
<id>12</id>
<name>k</name>
<desc>todo</desc>
</pos>
<pos>
<id>13</id>
<name>l</name>
<desc>todo</desc>
</pos>
<pos>
<id>14</id>
<name>m</name>
<desc>todo</desc>
</pos>
<pos>
<id>15</id>
<name>n</name>
<desc>todo</desc>
<sub>
<pos>
<id>16</id>
<name>nz</name>
<desc>todo</desc>
</pos>
</sub>
</pos>
<pos>
<id>17</id>
<name>o</name>
<desc>todo</desc>
</pos>
<pos>
<id>18</id>
<name>p</name>
<desc>todo</desc>
</pos>
<pos>
<id>19</id>
<name>q</name>
<desc>todo</desc>
</pos>
<pos>
<id>20</id>
<name>r</name>
<desc>todo</desc>
</pos>
<pos>
<id>21</id>
<name>s</name>
<desc>todo</desc>
</pos>
<pos>
<id>22</id>
<name>t</name>
<desc>todo</desc>
</pos>
<pos>
<id>23</id>
<name>u</name>
<desc>todo</desc>
</pos>
<pos>
<id>24</id>
<name>v</name>
<desc>todo</desc>
</pos>
<pos>
<id>25</id>
<name>w</name>
<desc>todo</desc>
</pos>
<pos>
<id>26</id>
<name>x</name>
<desc>todo</desc>
</pos>
<pos>
<id>27</id>
<name>y</name>
<desc>todo</desc>
</pos>
<pos>
<id>28</id>
<name>z</name>
<desc>todo</desc>
</pos>
</posSet>

View File

@ -0,0 +1,661 @@
a ga a_c
ai ga ai_c
an ga an_c
ang ga ang_c
ao ga ao_c
ba b_c a_c
bai b_c ai_c
ban b_c an_c
bang b_c ang_c
bao b_c ao_c
bei b_c ei_c
ben b_c en_c
beng b_c eng_c
bi b_c i_c
bian b_c ian_c
biao b_c iao_c
bie b_c ie_c
bin b_c in_c
bing b_c ing_c
bo b_c o_c
bu b_c u_c
ca c_c a_c
cai c_c ai_c
can c_c an_c
cang c_c ang_c
cao c_c ao_c
ce c_c e_c
cen c_c en_c
ceng c_c eng_c
cha ch_c a_c
chai ch_c ai_c
chan ch_c an_c
chang ch_c ang_c
chao ch_c ao_c
che ch_c e_c
chen ch_c en_c
cheng ch_c eng_c
chi ch_c ih_c
chong ch_c ong_c
chou ch_c ou_c
chu ch_c u_c
chua ch_c ua_c
chuai ch_c uai_c
chuan ch_c uan_c
chuang ch_c uang_c
chui ch_c uei_c
chun ch_c uen_c
chuo ch_c uo_c
ci c_c ii_c
cong c_c ong_c
cou c_c ou_c
cu c_c u_c
cuan c_c uan_c
cui c_c uei_c
cun c_c uen_c
cuo c_c uo_c
da d_c a_c
dai d_c ai_c
dan d_c an_c
dang d_c ang_c
dao d_c ao_c
de d_c e_c
dei d_c ei_c
den d_c en_c
deng d_c eng_c
di d_c i_c
dia d_c ia_c
dian d_c ian_c
diao d_c iao_c
die d_c ie_c
ding d_c ing_c
diu d_c iou_c
dong d_c ong_c
dou d_c ou_c
du d_c u_c
duan d_c uan_c
dui d_c uei_c
dun d_c uen_c
duo d_c uo_c
e ge e_c
ei ge ei_c
en ge en_c
eng ge eng_c
er ge er_c
fa f_c a_c
fan f_c an_c
fang f_c ang_c
fei f_c ei_c
fen f_c en_c
feng f_c eng_c
fo f_c o_c
fou f_c ou_c
fu f_c u_c
ga g_c a_c
gai g_c ai_c
gan g_c an_c
gang g_c ang_c
gao g_c ao_c
ge g_c e_c
gei g_c ei_c
gen g_c en_c
geng g_c eng_c
gong g_c ong_c
gou g_c ou_c
gu g_c u_c
gua g_c ua_c
guai g_c uai_c
guan g_c uan_c
guang g_c uang_c
gui g_c uei_c
gun g_c uen_c
guo g_c uo_c
ha h_c a_c
hai h_c ai_c
han h_c an_c
hang h_c ang_c
hao h_c ao_c
he h_c e_c
hei h_c ei_c
hen h_c en_c
heng h_c eng_c
hong h_c ong_c
hou h_c ou_c
hu h_c u_c
hua h_c ua_c
huai h_c uai_c
huan h_c uan_c
huang h_c uang_c
hui h_c uei_c
hun h_c uen_c
huo h_c uo_c
ji j_c i_c
jia j_c ia_c
jian j_c ian_c
jiang j_c iang_c
jiao j_c iao_c
jie j_c ie_c
jin j_c in_c
jing j_c ing_c
jiong j_c iong_c
jiu j_c iou_c
jv j_c v_c
jvan j_c van_c
jve j_c ve_c
jvn j_c vn_c
ka k_c a_c
kai k_c ai_c
kan k_c an_c
kang k_c ang_c
kao k_c ao_c
ke k_c e_c
kei k_c ei_c
ken k_c en_c
keng k_c eng_c
kong k_c ong_c
kou k_c ou_c
ku k_c u_c
kua k_c ua_c
kuai k_c uai_c
kuan k_c uan_c
kuang k_c uang_c
kui k_c uei_c
kun k_c uen_c
kuo k_c uo_c
la l_c a_c
lai l_c ai_c
lan l_c an_c
lang l_c ang_c
lao l_c ao_c
le l_c e_c
lei l_c ei_c
leng l_c eng_c
li l_c i_c
lia l_c ia_c
lian l_c ian_c
liang l_c iang_c
liao l_c iao_c
lie l_c ie_c
lin l_c in_c
ling l_c ing_c
liu l_c iou_c
lo l_c o_c
long l_c ong_c
lou l_c ou_c
lu l_c u_c
luan l_c uan_c
lun l_c uen_c
luo l_c uo_c
lv l_c v_c
lve l_c ve_c
ma m_c a_c
mai m_c ai_c
man m_c an_c
mang m_c ang_c
mao m_c ao_c
me m_c e_c
mei m_c ei_c
men m_c en_c
meng m_c eng_c
mi m_c i_c
mian m_c ian_c
miao m_c iao_c
mie m_c ie_c
min m_c in_c
ming m_c ing_c
miu m_c iou_c
mo m_c o_c
mou m_c ou_c
mu m_c u_c
na n_c a_c
nai n_c ai_c
nan n_c an_c
nang n_c ang_c
nao n_c ao_c
ne n_c e_c
nei n_c ei_c
nen n_c en_c
neng n_c eng_c
ni n_c i_c
nian n_c ian_c
niang n_c iang_c
niao n_c iao_c
nie n_c ie_c
nin n_c in_c
ning n_c ing_c
niu n_c iou_c
nong n_c ong_c
nou n_c ou_c
nu n_c u_c
nuan n_c uan_c
nun n_c uen_c
nuo n_c uo_c
nv n_c v_c
nve n_c ve_c
o go o_c
ou go ou_c
pa p_c a_c
pai p_c ai_c
pan p_c an_c
pang p_c ang_c
pao p_c ao_c
pei p_c ei_c
pen p_c en_c
peng p_c eng_c
pi p_c i_c
pian p_c ian_c
piao p_c iao_c
pie p_c ie_c
pin p_c in_c
ping p_c ing_c
po p_c o_c
pou p_c ou_c
pu p_c u_c
qi q_c i_c
qia q_c ia_c
qian q_c ian_c
qiang q_c iang_c
qiao q_c iao_c
qie q_c ie_c
qin q_c in_c
qing q_c ing_c
qiong q_c iong_c
qiu q_c iou_c
qv q_c v_c
qvan q_c van_c
qve q_c ve_c
qvn q_c vn_c
ran r_c an_c
rang r_c ang_c
rao r_c ao_c
re r_c e_c
ren r_c en_c
reng r_c eng_c
ri r_c ih_c
rong r_c ong_c
rou r_c ou_c
ru r_c u_c
ruan r_c uan_c
rui r_c uei_c
run r_c uen_c
ruo r_c uo_c
sa s_c a_c
sai s_c ai_c
san s_c an_c
sang s_c ang_c
sao s_c ao_c
se s_c e_c
sen s_c en_c
seng s_c eng_c
sha sh_c a_c
shai sh_c ai_c
shan sh_c an_c
shang sh_c ang_c
shao sh_c ao_c
she sh_c e_c
shei sh_c ei_c
shen sh_c en_c
sheng sh_c eng_c
shi sh_c ih_c
shou sh_c ou_c
shu sh_c u_c
shua sh_c ua_c
shuai sh_c uai_c
shuan sh_c uan_c
shuang sh_c uang_c
shui sh_c uei_c
shun sh_c uen_c
shuo sh_c uo_c
si s_c ii_c
song s_c ong_c
sou s_c ou_c
su s_c u_c
suan s_c uan_c
sui s_c uei_c
sun s_c uen_c
suo s_c uo_c
ta t_c a_c
tai t_c ai_c
tan t_c an_c
tang t_c ang_c
tao t_c ao_c
te t_c e_c
tei t_c ei_c
teng t_c eng_c
ti t_c i_c
tian t_c ian_c
tiao t_c iao_c
tie t_c ie_c
ting t_c ing_c
tong t_c ong_c
tou t_c ou_c
tu t_c u_c
tuan t_c uan_c
tui t_c uei_c
tun t_c uen_c
tuo t_c uo_c
wa w_c a_c
wai w_c ai_c
wan w_c an_c
wang w_c ang_c
wei w_c ei_c
wen w_c en_c
weng w_c eng_c
wo w_c o_c
wu w_c u_c
xi xx_c i_c
xia xx_c ia_c
xian xx_c ian_c
xiang xx_c iang_c
xiao xx_c iao_c
xie xx_c ie_c
xin xx_c in_c
xing xx_c ing_c
xiong xx_c iong_c
xiu xx_c iou_c
xv xx_c v_c
xvan xx_c van_c
xve xx_c ve_c
xvn xx_c vn_c
ya y_c a_c
yan y_c an_c
yang y_c ang_c
yao y_c ao_c
ye y_c e_c
yi y_c i_c
yin y_c in_c
ying y_c ing_c
yo y_c o_c
yong y_c ong_c
you y_c ou_c
yv y_c v_c
yvan y_c van_c
yve y_c ve_c
yvn y_c vn_c
za z_c a_c
zai z_c ai_c
zan z_c an_c
zang z_c ang_c
zao z_c ao_c
ze z_c e_c
zei z_c ei_c
zen z_c en_c
zeng z_c eng_c
zha zh_c a_c
zhai zh_c ai_c
zhan zh_c an_c
zhang zh_c ang_c
zhao zh_c ao_c
zhe zh_c e_c
zhei zh_c ei_c
zhen zh_c en_c
zheng zh_c eng_c
zhi zh_c ih_c
zhong zh_c ong_c
zhou zh_c ou_c
zhu zh_c u_c
zhua zh_c ua_c
zhuai zh_c uai_c
zhuan zh_c uan_c
zhuang zh_c uang_c
zhui zh_c uei_c
zhun zh_c uen_c
zhuo zh_c uo_c
zi z_c ii_c
zong z_c ong_c
zou z_c ou_c
zu z_c u_c
zuan z_c uan_c
zui z_c uei_c
zun z_c uen_c
zuo z_c uo_c
bangr b_c angr_c
banr b_c anr_c
baor b_c aor_c
bar b_c ar_c
beir b_c eir_c
bengr b_c engr_c
benr b_c enr_c
bianr b_c ianr_c
biaor b_c iaor_c
bingr b_c ingr_c
bir b_c ir_c
bor b_c or_c
bur b_c ur_c
caor c_c aor_c
car c_c ar_c
changr ch_c angr_c
chaor ch_c aor_c
char ch_c ar_c
chengr ch_c engr_c
cher ch_c er_c
chir ch_c ihr_c
chongr ch_c ongr_c
chour ch_c our_c
chuangr ch_c uangr_c
chuanr ch_c uanr_c
chuir ch_c ueir_c
chunr ch_c uenr_c
chuor ch_c uor_c
chur ch_c ur_c
cir c_c iir_c
congr c_c ongr_c
cuir c_c ueir_c
cunr c_c uenr_c
cuor c_c uor_c
dair d_c air_c
danr d_c anr_c
dangr d_c angr_c
daor d_c aor_c
dengr d_c engr_c
dianr d_c ianr_c
diaor d_c iaor_c
dier d_c ier_c
dingr d_c ingr_c
dir d_c ir_c
dongr d_c ongr_c
dour d_c our_c
duanr d_c uanr_c
duir d_c ueir_c
dunr d_c uenr_c
duor d_c uor_c
dur d_c ur_c
fangr f_c angr_c
fanr f_c anr_c
far f_c ar_c
fengr f_c engr_c
fenr f_c enr_c
fur f_c ur_c
gair g_c air_c
ganr g_c anr_c
gaor g_c aor_c
gengr g_c engr_c
genr g_c enr_c
ger g_c er_c
gongr g_c ongr_c
gour g_c our_c
guair g_c uair_c
guanr g_c uanr_c
guar g_c uar_c
guir g_c ueir_c
gunr g_c uenr_c
guor g_c uor_c
gur g_c ur_c
hair h_c air_c
hanr h_c anr_c
haor h_c aor_c
heir h_c eir_c
her h_c er_c
hour h_c our_c
huanr h_c uanr_c
huangr h_c uangr_c
huar h_c uar_c
huir h_c ueir_c
hunr h_c uenr_c
huor h_c uor_c
hur h_c ur_c
jianr j_c ianr_c
jiaor j_c iaor_c
jiar j_c iar_c
jier j_c ier_c
jingr j_c ingr_c
jinr j_c inr_c
jir j_c ir_c
jiur j_c iour_c
jvanr j_c vanr_c
jver j_c ver_c
jvnr j_c vnr_c
kair k_c air_c
kanr k_c anr_c
kaor k_c aor_c
kengr k_c engr_c
ker k_c er_c
kongr k_c ongr_c
kour k_c our_c
kuair k_c uair_c
kuangr k_c uangr_c
kuanr k_c uanr_c
kunr k_c uenr_c
lanr l_c anr_c
laor l_c aor_c
lar l_c ar_c
leir l_c eir_c
lengr l_c engr_c
ler l_c er_c
liangr l_c iangr_c
lianr l_c ianr_c
liaor l_c iaor_c
liar l_c iar_c
lingr l_c ingr_c
lir l_c ir_c
liur l_c iour_c
lour l_c our_c
luor l_c uor_c
lunr l_c uenr_c
lur l_c ur_c
lvr l_c vr_c
mair m_c air_c
manr m_c anr_c
mangr m_c angr_c
maor m_c aor_c
mar m_c ar_c
meir m_c eir_c
menr m_c enr_c
mianr m_c ianr_c
miaor m_c iaor_c
mingr m_c ingr_c
mir m_c ir_c
mor m_c or_c
naor n_c aor_c
nar n_c ar_c
niangr n_c iangr_c
nianr n_c ianr_c
niaor n_c iaor_c
ningr n_c ingr_c
nir n_c ir_c
niur n_c iour_c
nvr n_c vr_c
pair p_c air_c
pangr p_c angr_c
panr p_c anr_c
paor p_c aor_c
penr p_c enr_c
pianr p_c ianr_c
piaor p_c iaor_c
pier p_c ier_c
pingr p_c ingr_c
pir p_c ir_c
por p_c or_c
pur p_c ur_c
qianr q_c ianr_c
qiaor q_c iaor_c
qingr q_c ingr_c
qir q_c ir_c
qiur q_c iour_c
qvanr q_c vanr_c
qvnr q_c vnr_c
qvr q_c vr_c
sar s_c ar_c
rangr r_c angr_c
renr r_c enr_c
sair s_c air_c
sanr s_c anr_c
shair sh_c air_c
shaor sh_c aor_c
shengr sh_c engr_c
shenr sh_c enr_c
shir sh_c ihr_c
shuair sh_c uair_c
shour sh_c our_c
shuar sh_c uar_c
shuir sh_c ueir_c
shunr sh_c uenr_c
shuor sh_c uor_c
shur sh_c ur_c
sir s_c iir_c
suir s_c ueir_c
sunr s_c uenr_c
tair t_c air_c
tangr t_c angr_c
tanr t_c anr_c
taor t_c aor_c
ter t_c er_c
tianr t_c ianr_c
tiaor t_c iaor_c
tir t_c ir_c
tingr t_c ingr_c
tongr t_c ongr_c
tour t_c our_c
tuanr t_c uanr_c
tuir t_c ueir_c
tuor t_c uor_c
tur t_c ur_c
wanr w_c anr_c
war w_c ar_c
weir w_c eir_c
wenr w_c enr_c
wengr w_c engr_c
wor w_c or_c
wur w_c ur_c
xiangr xx_c iangr_c
xianr xx_c ianr_c
xiar xx_c iar_c
xier xx_c ier_c
xingr xx_c ingr_c
xir xx_c ir_c
xinr xx_c inr_c
xiongr xx_c iongr_c
xiur xx_c iour_c
yangr y_c angr_c
yanr y_c anr_c
yaor y_c aor_c
yar y_c ar_c
yer y_c er_c
yingr y_c ingr_c
yinr y_c inr_c
yir y_c ir_c
your y_c our_c
yvanr y_c vanr_c
zair z_c air_c
yvr y_c vr_c
yver y_c ver_c
zaor z_c aor_c
zar z_c ar_c
zhangr zh_c angr_c
zhanr zh_c anr_c
zhaor zh_c aor_c
zhar zh_c ar_c
zhenr zh_c enr_c
zher zh_c er_c
zhir zh_c ihr_c
zhongr zh_c ongr_c
zhour zh_c our_c
zhuar zh_c uar_c
zhuanr zh_c uanr_c
zhunr zh_c uenr_c
zhuor zh_c uor_c
zhur zh_c ur_c
zir z_c iir_c
zuanr z_c uanr_c
zuir z_c ueir_c
zuor z_c uor_c

View File

@ -0,0 +1,7 @@
1
4
2
3
5
0

View File

@ -0,0 +1,33 @@
emotion_none
emotion_neutral
emotion_angry
emotion_disgust
emotion_fear
emotion_happy
emotion_sad
emotion_surprise
emotion_calm
emotion_gentle
emotion_relax
emotion_lyrical
emotion_serious
emotion_disgruntled
emotion_satisfied
emotion_disappointed
emotion_excited
emotion_anxiety
emotion_jealousy
emotion_hate
emotion_pity
emotion_pleasure
emotion_arousal
emotion_dominance
emotion_placeholder1
emotion_placeholder2
emotion_placeholder3
emotion_placeholder4
emotion_placeholder5
emotion_placeholder6
emotion_placeholder7
emotion_placeholder8
emotion_placeholder9

View File

@ -0,0 +1,6 @@
F7
F74
FBYN
FRXL
M7
xiaoyu

View File

@ -0,0 +1,144 @@
a_c
ai_c
an_c
ang_c
ao_c
b_c
c_c
ch_c
d_c
e_c
ei_c
en_c
eng_c
er_c
f_c
g_c
h_c
i_c
ia_c
ian_c
iang_c
iao_c
ie_c
ih_c
ii_c
in_c
ing_c
io_c
iong_c
iou_c
j_c
k_c
l_c
m_c
n_c
o_c
ong_c
ou_c
p_c
q_c
r_c
s_c
sh_c
t_c
u_c
ua_c
uai_c
uan_c
uang_c
uei_c
uen_c
ueng_c
uo_c
v_c
van_c
ve_c
vn_c
xx_c
z_c
zh_c
w_c
y_c
ga
ge
go
aa
ae
ah
ao
aw
ay
b
ch
d
dh
eh
er
ey
f
g
hh
ih
iy
jh
k
l
m
n
ng
ow
oy
p
r
s
sh
t
th
uh
uw
v
w
y
z
zh
air_c
angr_c
anr_c
aor_c
ar_c
eir_c
engr_c
enr_c
iangr_c
ianr_c
iaor_c
iar_c
ier_c
ihr_c
iir_c
ingr_c
inr_c
iongr_c
iour_c
ir_c
ongr_c
or_c
our_c
uair_c
uangr_c
uanr_c
uar_c
ueir_c
uenr_c
uor_c
ur_c
vanr_c
ver_c
vnr_c
vr_c
pau
#1
#2
#3
#4

View File

@ -0,0 +1,5 @@
s_begin
s_end
s_none
s_both
s_middle

View File

@ -0,0 +1,7 @@
tone1
tone_none
tone4
tone2
tone3
tone5
tone0

View File

@ -0,0 +1,5 @@
word_begin
word_end
word_middle
word_both
word_none

BIN
voices/zhiyan_emo/voc/ckpt/checkpoint_0.pth (Stored with Git LFS) Normal file

Binary file not shown.

BIN
voices/zhiyan_emo/voc/ckpt/checkpoint_1.pth (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,188 @@
model_type: hifigan
Model:
###########################################################
# GENERATOR NETWORK ARCHITECTURE SETTING #
###########################################################
Generator:
params:
in_channels: 80
out_channels: 1
channels: 256
kernel_size: 7
upsample_scales: [10, 5, 2, 2]
upsample_kernal_sizes: [20, 11, 4, 4]
resblock_kernel_sizes: [3, 7, 11]
resblock_dilations:
- [1, 3, 5, 7]
- [1, 3, 5, 7]
- [1, 3, 5, 7]
bias: true
causal: true
nonlinear_activation: "LeakyReLU"
nonlinear_activation_params:
negative_slope: 0.1
use_weight_norm: true
optimizer:
type: Adam
params:
lr: 2.0e-4
betas: [0.5, 0.9]
weight_decay: 0.0
scheduler:
type: MultiStepLR
params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
###########################################################
# DISCRIMINATOR NETWORK ARCHITECTURE SETTING #
###########################################################
MultiScaleDiscriminator:
params:
scales: 3
downsample_pooling: "DWT"
downsample_pooling_params:
kernel_size: 4
stride: 2
padding: 2
discriminator_params:
in_channels: 1
out_channels: 1
kernel_sizes: [15, 41, 5, 3]
channels: 128
max_downsample_channels: 1024
max_groups: 16
bias: true
downsample_scales: [4, 4, 4, 4, 1]
nonlinear_activation: "LeakyReLU"
nonlinear_activation_params:
negative_slope: 0.1
follow_official_norm: true
optimizer:
type: Adam
params:
lr: 2.0e-4
betas: [0.5, 0.9]
weight_decay: 0.0
scheduler:
type: MultiStepLR
params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
MultiPeriodDiscriminator:
params:
periods: [2, 3, 5, 7, 11]
discriminator_params:
in_channels: 1
out_channels: 1
kernel_sizes: [5, 3]
channels: 32
downsample_scales: [3, 3, 3, 3, 1]
max_downsample_channels: 1024
bias: true
nonlinear_activation: "LeakyReLU"
nonlinear_activation_params:
negative_slope: 0.1
use_spectral_norm: false
optimizer:
type: Adam
params:
lr: 2.0e-4
betas: [0.5, 0.9]
weight_decay: 0.0
scheduler:
type: MultiStepLR
params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
####################################################
# LOSS SETTING #
####################################################
Loss:
generator_adv_loss:
enable: True
params:
average_by_discriminators: False
weights: 1.0
discriminator_adv_loss:
enable: True
params:
average_by_discriminators: False
weights: 1.0
stft_loss:
enable: False # Whether to use multi-resolution STFT loss.
mel_loss:
enable: True
params:
fs: 16000
fft_size: 2048
hop_size: 200
win_length: 1000
window: "hann"
num_mels: 80
fmin: 0
fmax: 8000
log_base: null
weights: 45.0
subband_stft_loss:
enable: False
params:
fft_sizes: [384, 683, 171] # List of FFT size for STFT-based loss.
hop_sizes: [35, 75, 15] # List of hop size for STFT-based loss
win_lengths: [150, 300, 60] # List of window length for STFT-based loss.
window: "hann_window" # Window function for STFT-based loss
feat_match_loss:
enable: True
params:
average_by_discriminators: false
average_by_layers: false
weights: 2.0
###########################################################
# DATA LOADER SETTING #
###########################################################
batch_size: 16
batch_max_steps: 9600 # Length of each audio in batch. Make sure dividable by hop_size.
pin_memory: True
num_workers: 2 # FIXME: set > 0 may stuck on macos
remove_short_samples: False
allow_cache: True
generator_grad_norm: -1
discriminator_grad_norm: -1
###########################################################
# INTERVAL SETTING #
###########################################################
generator_train_start_steps: 1 # Number of steps to start to train discriminator.
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
train_max_steps: 2500000 # Number of training steps.
save_interval_steps: 20000 # Interval steps to save checkpoint.
eval_interval_steps: 10000 # Interval steps to evaluate the network.
log_interval_steps: 1000 # Interval steps to record the training log.
###########################################################
# OTHER SETTING #
###########################################################
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.

BIN
voices/zhiyan_emo/vocoder/pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.

BIN
voices/zhizhe_emo/am/ckpt/checkpoint_0.pth (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,105 @@
model_type: sambert
Model:
#########################################################
# SAMBERT NETWORK ARCHITECTURE SETTING #
#########################################################
KanTtsSAMBERT:
params:
max_len: 800
embedding_dim: 512
encoder_num_layers: 8
encoder_num_heads: 8
encoder_num_units: 128
encoder_ffn_inner_dim: 1024
encoder_dropout: 0.1
encoder_attention_dropout: 0.1
encoder_relu_dropout: 0.1
encoder_projection_units: 32
speaker_units: 32
emotion_units: 32
predictor_filter_size: 41
predictor_fsmn_num_layers: 3
predictor_num_memory_units: 128
predictor_ffn_inner_dim: 256
predictor_dropout: 0.1
predictor_shift: 0
predictor_lstm_units: 128
dur_pred_prenet_units: [128, 128]
dur_pred_lstm_units: 128
decoder_prenet_units: [256, 256]
decoder_num_layers: 12
decoder_num_heads: 8
decoder_num_units: 128
decoder_ffn_inner_dim: 1024
decoder_dropout: 0.1
decoder_attention_dropout: 0.1
decoder_relu_dropout: 0.1
outputs_per_step: 3
num_mels: 80
postnet_filter_size: 41
postnet_fsmn_num_layers: 4
postnet_num_memory_units: 256
postnet_ffn_inner_dim: 512
postnet_dropout: 0.1
postnet_shift: 17
postnet_lstm_units: 128
MAS: False
optimizer:
type: Adam
params:
lr: 0.001
betas: [0.9, 0.98]
eps: 1.0e-9
weight_decay: 0.0
scheduler:
type: NoamLR
params:
warmup_steps: 4000
linguistic_unit:
cleaners: english_cleaners
lfeat_type_list: sy,tone,syllable_flag,word_segment,emo_category,speaker_category
speaker_list: F7,F74,FBYN,FRXL,M7,xiaoyu
####################################################
# LOSS SETTING #
####################################################
Loss:
MelReconLoss:
enable: True
params:
loss_type: mae
ProsodyReconLoss:
enable: True
params:
loss_type: mae
###########################################################
# DATA LOADER SETTING #
###########################################################
batch_size: 32
pin_memory: False
num_workers: 4 # FIXME: set > 0 may stuck on macos
remove_short_samples: False
allow_cache: True
grad_norm: 1.0
###########################################################
# INTERVAL SETTING #
###########################################################
train_max_steps: 1000000 # Number of training steps.
save_interval_steps: 20000 # Interval steps to save checkpoint.
eval_interval_steps: 10000 # Interval steps to evaluate the network.
log_interval_steps: 1000 # Interval steps to record the training log.
###########################################################
# OTHER SETTING #
###########################################################
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.

BIN
voices/zhizhe_emo/am/pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,27 @@
# Audio processing configs
audio_config:
# Preprocess
wav_normalize: True
trim_silence: True
trim_silence_threshold_db: 60
preemphasize: False
# Feature extraction
sampling_rate: 16000
hop_length: 200
win_length: 1000
n_fft: 2048
n_mels: 80
fmin: 0.0
fmax: 8000.0
phone_level_feature: True
# Normalization
norm_type: "mean_std" # "mean_std" or "global"
max_norm: 1.0
symmetric: False
min_level_db: -100.0
ref_level_db: 20
num_workers: 16

View File

@ -0,0 +1,2 @@
wu w
yi y

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,147 @@
<?xml version="1.0" encoding="utf-8"?>
<posSet xmlns="http://schemas.alibaba-inc.com/tts">
<pos>
<id>1</id>
<name>a</name>
<desc>todo</desc>
</pos>
<pos>
<id>2</id>
<name>b</name>
<desc>todo</desc>
</pos>
<pos>
<id>3</id>
<name>c</name>
<desc>todo</desc>
</pos>
<pos>
<id>4</id>
<name>d</name>
<desc>todo</desc>
</pos>
<pos>
<id>5</id>
<name>e</name>
<desc>todo</desc>
</pos>
<pos>
<id>6</id>
<name>f</name>
<desc>todo</desc>
</pos>
<pos>
<id>7</id>
<name>g</name>
<desc>todo</desc>
<sub>
<pos>
<id>8</id>
<name>gb</name>
<desc>todo</desc>
</pos>
</sub>
</pos>
<pos>
<id>9</id>
<name>h</name>
<desc>todo</desc>
</pos>
<pos>
<id>10</id>
<name>i</name>
<desc>todo</desc>
</pos>
<pos>
<id>11</id>
<name>j</name>
<desc>todo</desc>
</pos>
<pos>
<id>12</id>
<name>k</name>
<desc>todo</desc>
</pos>
<pos>
<id>13</id>
<name>l</name>
<desc>todo</desc>
</pos>
<pos>
<id>14</id>
<name>m</name>
<desc>todo</desc>
</pos>
<pos>
<id>15</id>
<name>n</name>
<desc>todo</desc>
<sub>
<pos>
<id>16</id>
<name>nz</name>
<desc>todo</desc>
</pos>
</sub>
</pos>
<pos>
<id>17</id>
<name>o</name>
<desc>todo</desc>
</pos>
<pos>
<id>18</id>
<name>p</name>
<desc>todo</desc>
</pos>
<pos>
<id>19</id>
<name>q</name>
<desc>todo</desc>
</pos>
<pos>
<id>20</id>
<name>r</name>
<desc>todo</desc>
</pos>
<pos>
<id>21</id>
<name>s</name>
<desc>todo</desc>
</pos>
<pos>
<id>22</id>
<name>t</name>
<desc>todo</desc>
</pos>
<pos>
<id>23</id>
<name>u</name>
<desc>todo</desc>
</pos>
<pos>
<id>24</id>
<name>v</name>
<desc>todo</desc>
</pos>
<pos>
<id>25</id>
<name>w</name>
<desc>todo</desc>
</pos>
<pos>
<id>26</id>
<name>x</name>
<desc>todo</desc>
</pos>
<pos>
<id>27</id>
<name>y</name>
<desc>todo</desc>
</pos>
<pos>
<id>28</id>
<name>z</name>
<desc>todo</desc>
</pos>
</posSet>

View File

@ -0,0 +1,661 @@
a ga a_c
ai ga ai_c
an ga an_c
ang ga ang_c
ao ga ao_c
ba b_c a_c
bai b_c ai_c
ban b_c an_c
bang b_c ang_c
bao b_c ao_c
bei b_c ei_c
ben b_c en_c
beng b_c eng_c
bi b_c i_c
bian b_c ian_c
biao b_c iao_c
bie b_c ie_c
bin b_c in_c
bing b_c ing_c
bo b_c o_c
bu b_c u_c
ca c_c a_c
cai c_c ai_c
can c_c an_c
cang c_c ang_c
cao c_c ao_c
ce c_c e_c
cen c_c en_c
ceng c_c eng_c
cha ch_c a_c
chai ch_c ai_c
chan ch_c an_c
chang ch_c ang_c
chao ch_c ao_c
che ch_c e_c
chen ch_c en_c
cheng ch_c eng_c
chi ch_c ih_c
chong ch_c ong_c
chou ch_c ou_c
chu ch_c u_c
chua ch_c ua_c
chuai ch_c uai_c
chuan ch_c uan_c
chuang ch_c uang_c
chui ch_c uei_c
chun ch_c uen_c
chuo ch_c uo_c
ci c_c ii_c
cong c_c ong_c
cou c_c ou_c
cu c_c u_c
cuan c_c uan_c
cui c_c uei_c
cun c_c uen_c
cuo c_c uo_c
da d_c a_c
dai d_c ai_c
dan d_c an_c
dang d_c ang_c
dao d_c ao_c
de d_c e_c
dei d_c ei_c
den d_c en_c
deng d_c eng_c
di d_c i_c
dia d_c ia_c
dian d_c ian_c
diao d_c iao_c
die d_c ie_c
ding d_c ing_c
diu d_c iou_c
dong d_c ong_c
dou d_c ou_c
du d_c u_c
duan d_c uan_c
dui d_c uei_c
dun d_c uen_c
duo d_c uo_c
e ge e_c
ei ge ei_c
en ge en_c
eng ge eng_c
er ge er_c
fa f_c a_c
fan f_c an_c
fang f_c ang_c
fei f_c ei_c
fen f_c en_c
feng f_c eng_c
fo f_c o_c
fou f_c ou_c
fu f_c u_c
ga g_c a_c
gai g_c ai_c
gan g_c an_c
gang g_c ang_c
gao g_c ao_c
ge g_c e_c
gei g_c ei_c
gen g_c en_c
geng g_c eng_c
gong g_c ong_c
gou g_c ou_c
gu g_c u_c
gua g_c ua_c
guai g_c uai_c
guan g_c uan_c
guang g_c uang_c
gui g_c uei_c
gun g_c uen_c
guo g_c uo_c
ha h_c a_c
hai h_c ai_c
han h_c an_c
hang h_c ang_c
hao h_c ao_c
he h_c e_c
hei h_c ei_c
hen h_c en_c
heng h_c eng_c
hong h_c ong_c
hou h_c ou_c
hu h_c u_c
hua h_c ua_c
huai h_c uai_c
huan h_c uan_c
huang h_c uang_c
hui h_c uei_c
hun h_c uen_c
huo h_c uo_c
ji j_c i_c
jia j_c ia_c
jian j_c ian_c
jiang j_c iang_c
jiao j_c iao_c
jie j_c ie_c
jin j_c in_c
jing j_c ing_c
jiong j_c iong_c
jiu j_c iou_c
jv j_c v_c
jvan j_c van_c
jve j_c ve_c
jvn j_c vn_c
ka k_c a_c
kai k_c ai_c
kan k_c an_c
kang k_c ang_c
kao k_c ao_c
ke k_c e_c
kei k_c ei_c
ken k_c en_c
keng k_c eng_c
kong k_c ong_c
kou k_c ou_c
ku k_c u_c
kua k_c ua_c
kuai k_c uai_c
kuan k_c uan_c
kuang k_c uang_c
kui k_c uei_c
kun k_c uen_c
kuo k_c uo_c
la l_c a_c
lai l_c ai_c
lan l_c an_c
lang l_c ang_c
lao l_c ao_c
le l_c e_c
lei l_c ei_c
leng l_c eng_c
li l_c i_c
lia l_c ia_c
lian l_c ian_c
liang l_c iang_c
liao l_c iao_c
lie l_c ie_c
lin l_c in_c
ling l_c ing_c
liu l_c iou_c
lo l_c o_c
long l_c ong_c
lou l_c ou_c
lu l_c u_c
luan l_c uan_c
lun l_c uen_c
luo l_c uo_c
lv l_c v_c
lve l_c ve_c
ma m_c a_c
mai m_c ai_c
man m_c an_c
mang m_c ang_c
mao m_c ao_c
me m_c e_c
mei m_c ei_c
men m_c en_c
meng m_c eng_c
mi m_c i_c
mian m_c ian_c
miao m_c iao_c
mie m_c ie_c
min m_c in_c
ming m_c ing_c
miu m_c iou_c
mo m_c o_c
mou m_c ou_c
mu m_c u_c
na n_c a_c
nai n_c ai_c
nan n_c an_c
nang n_c ang_c
nao n_c ao_c
ne n_c e_c
nei n_c ei_c
nen n_c en_c
neng n_c eng_c
ni n_c i_c
nian n_c ian_c
niang n_c iang_c
niao n_c iao_c
nie n_c ie_c
nin n_c in_c
ning n_c ing_c
niu n_c iou_c
nong n_c ong_c
nou n_c ou_c
nu n_c u_c
nuan n_c uan_c
nun n_c uen_c
nuo n_c uo_c
nv n_c v_c
nve n_c ve_c
o go o_c
ou go ou_c
pa p_c a_c
pai p_c ai_c
pan p_c an_c
pang p_c ang_c
pao p_c ao_c
pei p_c ei_c
pen p_c en_c
peng p_c eng_c
pi p_c i_c
pian p_c ian_c
piao p_c iao_c
pie p_c ie_c
pin p_c in_c
ping p_c ing_c
po p_c o_c
pou p_c ou_c
pu p_c u_c
qi q_c i_c
qia q_c ia_c
qian q_c ian_c
qiang q_c iang_c
qiao q_c iao_c
qie q_c ie_c
qin q_c in_c
qing q_c ing_c
qiong q_c iong_c
qiu q_c iou_c
qv q_c v_c
qvan q_c van_c
qve q_c ve_c
qvn q_c vn_c
ran r_c an_c
rang r_c ang_c
rao r_c ao_c
re r_c e_c
ren r_c en_c
reng r_c eng_c
ri r_c ih_c
rong r_c ong_c
rou r_c ou_c
ru r_c u_c
ruan r_c uan_c
rui r_c uei_c
run r_c uen_c
ruo r_c uo_c
sa s_c a_c
sai s_c ai_c
san s_c an_c
sang s_c ang_c
sao s_c ao_c
se s_c e_c
sen s_c en_c
seng s_c eng_c
sha sh_c a_c
shai sh_c ai_c
shan sh_c an_c
shang sh_c ang_c
shao sh_c ao_c
she sh_c e_c
shei sh_c ei_c
shen sh_c en_c
sheng sh_c eng_c
shi sh_c ih_c
shou sh_c ou_c
shu sh_c u_c
shua sh_c ua_c
shuai sh_c uai_c
shuan sh_c uan_c
shuang sh_c uang_c
shui sh_c uei_c
shun sh_c uen_c
shuo sh_c uo_c
si s_c ii_c
song s_c ong_c
sou s_c ou_c
su s_c u_c
suan s_c uan_c
sui s_c uei_c
sun s_c uen_c
suo s_c uo_c
ta t_c a_c
tai t_c ai_c
tan t_c an_c
tang t_c ang_c
tao t_c ao_c
te t_c e_c
tei t_c ei_c
teng t_c eng_c
ti t_c i_c
tian t_c ian_c
tiao t_c iao_c
tie t_c ie_c
ting t_c ing_c
tong t_c ong_c
tou t_c ou_c
tu t_c u_c
tuan t_c uan_c
tui t_c uei_c
tun t_c uen_c
tuo t_c uo_c
wa w_c a_c
wai w_c ai_c
wan w_c an_c
wang w_c ang_c
wei w_c ei_c
wen w_c en_c
weng w_c eng_c
wo w_c o_c
wu w_c u_c
xi xx_c i_c
xia xx_c ia_c
xian xx_c ian_c
xiang xx_c iang_c
xiao xx_c iao_c
xie xx_c ie_c
xin xx_c in_c
xing xx_c ing_c
xiong xx_c iong_c
xiu xx_c iou_c
xv xx_c v_c
xvan xx_c van_c
xve xx_c ve_c
xvn xx_c vn_c
ya y_c a_c
yan y_c an_c
yang y_c ang_c
yao y_c ao_c
ye y_c e_c
yi y_c i_c
yin y_c in_c
ying y_c ing_c
yo y_c o_c
yong y_c ong_c
you y_c ou_c
yv y_c v_c
yvan y_c van_c
yve y_c ve_c
yvn y_c vn_c
za z_c a_c
zai z_c ai_c
zan z_c an_c
zang z_c ang_c
zao z_c ao_c
ze z_c e_c
zei z_c ei_c
zen z_c en_c
zeng z_c eng_c
zha zh_c a_c
zhai zh_c ai_c
zhan zh_c an_c
zhang zh_c ang_c
zhao zh_c ao_c
zhe zh_c e_c
zhei zh_c ei_c
zhen zh_c en_c
zheng zh_c eng_c
zhi zh_c ih_c
zhong zh_c ong_c
zhou zh_c ou_c
zhu zh_c u_c
zhua zh_c ua_c
zhuai zh_c uai_c
zhuan zh_c uan_c
zhuang zh_c uang_c
zhui zh_c uei_c
zhun zh_c uen_c
zhuo zh_c uo_c
zi z_c ii_c
zong z_c ong_c
zou z_c ou_c
zu z_c u_c
zuan z_c uan_c
zui z_c uei_c
zun z_c uen_c
zuo z_c uo_c
bangr b_c angr_c
banr b_c anr_c
baor b_c aor_c
bar b_c ar_c
beir b_c eir_c
bengr b_c engr_c
benr b_c enr_c
bianr b_c ianr_c
biaor b_c iaor_c
bingr b_c ingr_c
bir b_c ir_c
bor b_c or_c
bur b_c ur_c
caor c_c aor_c
car c_c ar_c
changr ch_c angr_c
chaor ch_c aor_c
char ch_c ar_c
chengr ch_c engr_c
cher ch_c er_c
chir ch_c ihr_c
chongr ch_c ongr_c
chour ch_c our_c
chuangr ch_c uangr_c
chuanr ch_c uanr_c
chuir ch_c ueir_c
chunr ch_c uenr_c
chuor ch_c uor_c
chur ch_c ur_c
cir c_c iir_c
congr c_c ongr_c
cuir c_c ueir_c
cunr c_c uenr_c
cuor c_c uor_c
dair d_c air_c
danr d_c anr_c
dangr d_c angr_c
daor d_c aor_c
dengr d_c engr_c
dianr d_c ianr_c
diaor d_c iaor_c
dier d_c ier_c
dingr d_c ingr_c
dir d_c ir_c
dongr d_c ongr_c
dour d_c our_c
duanr d_c uanr_c
duir d_c ueir_c
dunr d_c uenr_c
duor d_c uor_c
dur d_c ur_c
fangr f_c angr_c
fanr f_c anr_c
far f_c ar_c
fengr f_c engr_c
fenr f_c enr_c
fur f_c ur_c
gair g_c air_c
ganr g_c anr_c
gaor g_c aor_c
gengr g_c engr_c
genr g_c enr_c
ger g_c er_c
gongr g_c ongr_c
gour g_c our_c
guair g_c uair_c
guanr g_c uanr_c
guar g_c uar_c
guir g_c ueir_c
gunr g_c uenr_c
guor g_c uor_c
gur g_c ur_c
hair h_c air_c
hanr h_c anr_c
haor h_c aor_c
heir h_c eir_c
her h_c er_c
hour h_c our_c
huanr h_c uanr_c
huangr h_c uangr_c
huar h_c uar_c
huir h_c ueir_c
hunr h_c uenr_c
huor h_c uor_c
hur h_c ur_c
jianr j_c ianr_c
jiaor j_c iaor_c
jiar j_c iar_c
jier j_c ier_c
jingr j_c ingr_c
jinr j_c inr_c
jir j_c ir_c
jiur j_c iour_c
jvanr j_c vanr_c
jver j_c ver_c
jvnr j_c vnr_c
kair k_c air_c
kanr k_c anr_c
kaor k_c aor_c
kengr k_c engr_c
ker k_c er_c
kongr k_c ongr_c
kour k_c our_c
kuair k_c uair_c
kuangr k_c uangr_c
kuanr k_c uanr_c
kunr k_c uenr_c
lanr l_c anr_c
laor l_c aor_c
lar l_c ar_c
leir l_c eir_c
lengr l_c engr_c
ler l_c er_c
liangr l_c iangr_c
lianr l_c ianr_c
liaor l_c iaor_c
liar l_c iar_c
lingr l_c ingr_c
lir l_c ir_c
liur l_c iour_c
lour l_c our_c
luor l_c uor_c
lunr l_c uenr_c
lur l_c ur_c
lvr l_c vr_c
mair m_c air_c
manr m_c anr_c
mangr m_c angr_c
maor m_c aor_c
mar m_c ar_c
meir m_c eir_c
menr m_c enr_c
mianr m_c ianr_c
miaor m_c iaor_c
mingr m_c ingr_c
mir m_c ir_c
mor m_c or_c
naor n_c aor_c
nar n_c ar_c
niangr n_c iangr_c
nianr n_c ianr_c
niaor n_c iaor_c
ningr n_c ingr_c
nir n_c ir_c
niur n_c iour_c
nvr n_c vr_c
pair p_c air_c
pangr p_c angr_c
panr p_c anr_c
paor p_c aor_c
penr p_c enr_c
pianr p_c ianr_c
piaor p_c iaor_c
pier p_c ier_c
pingr p_c ingr_c
pir p_c ir_c
por p_c or_c
pur p_c ur_c
qianr q_c ianr_c
qiaor q_c iaor_c
qingr q_c ingr_c
qir q_c ir_c
qiur q_c iour_c
qvanr q_c vanr_c
qvnr q_c vnr_c
qvr q_c vr_c
sar s_c ar_c
rangr r_c angr_c
renr r_c enr_c
sair s_c air_c
sanr s_c anr_c
shair sh_c air_c
shaor sh_c aor_c
shengr sh_c engr_c
shenr sh_c enr_c
shir sh_c ihr_c
shuair sh_c uair_c
shour sh_c our_c
shuar sh_c uar_c
shuir sh_c ueir_c
shunr sh_c uenr_c
shuor sh_c uor_c
shur sh_c ur_c
sir s_c iir_c
suir s_c ueir_c
sunr s_c uenr_c
tair t_c air_c
tangr t_c angr_c
tanr t_c anr_c
taor t_c aor_c
ter t_c er_c
tianr t_c ianr_c
tiaor t_c iaor_c
tir t_c ir_c
tingr t_c ingr_c
tongr t_c ongr_c
tour t_c our_c
tuanr t_c uanr_c
tuir t_c ueir_c
tuor t_c uor_c
tur t_c ur_c
wanr w_c anr_c
war w_c ar_c
weir w_c eir_c
wenr w_c enr_c
wengr w_c engr_c
wor w_c or_c
wur w_c ur_c
xiangr xx_c iangr_c
xianr xx_c ianr_c
xiar xx_c iar_c
xier xx_c ier_c
xingr xx_c ingr_c
xir xx_c ir_c
xinr xx_c inr_c
xiongr xx_c iongr_c
xiur xx_c iour_c
yangr y_c angr_c
yanr y_c anr_c
yaor y_c aor_c
yar y_c ar_c
yer y_c er_c
yingr y_c ingr_c
yinr y_c inr_c
yir y_c ir_c
your y_c our_c
yvanr y_c vanr_c
zair z_c air_c
yvr y_c vr_c
yver y_c ver_c
zaor z_c aor_c
zar z_c ar_c
zhangr zh_c angr_c
zhanr zh_c anr_c
zhaor zh_c aor_c
zhar zh_c ar_c
zhenr zh_c enr_c
zher zh_c er_c
zhir zh_c ihr_c
zhongr zh_c ongr_c
zhour zh_c our_c
zhuar zh_c uar_c
zhuanr zh_c uanr_c
zhunr zh_c uenr_c
zhuor zh_c uor_c
zhur zh_c ur_c
zir z_c iir_c
zuanr z_c uanr_c
zuir z_c ueir_c
zuor z_c uor_c

View File

@ -0,0 +1,7 @@
1
4
2
3
5
0

View File

@ -0,0 +1,33 @@
emotion_none
emotion_neutral
emotion_angry
emotion_disgust
emotion_fear
emotion_happy
emotion_sad
emotion_surprise
emotion_calm
emotion_gentle
emotion_relax
emotion_lyrical
emotion_serious
emotion_disgruntled
emotion_satisfied
emotion_disappointed
emotion_excited
emotion_anxiety
emotion_jealousy
emotion_hate
emotion_pity
emotion_pleasure
emotion_arousal
emotion_dominance
emotion_placeholder1
emotion_placeholder2
emotion_placeholder3
emotion_placeholder4
emotion_placeholder5
emotion_placeholder6
emotion_placeholder7
emotion_placeholder8
emotion_placeholder9

View File

@ -0,0 +1,6 @@
F7
F74
FBYN
FRXL
M7
xiaoyu

View File

@ -0,0 +1,144 @@
a_c
ai_c
an_c
ang_c
ao_c
b_c
c_c
ch_c
d_c
e_c
ei_c
en_c
eng_c
er_c
f_c
g_c
h_c
i_c
ia_c
ian_c
iang_c
iao_c
ie_c
ih_c
ii_c
in_c
ing_c
io_c
iong_c
iou_c
j_c
k_c
l_c
m_c
n_c
o_c
ong_c
ou_c
p_c
q_c
r_c
s_c
sh_c
t_c
u_c
ua_c
uai_c
uan_c
uang_c
uei_c
uen_c
ueng_c
uo_c
v_c
van_c
ve_c
vn_c
xx_c
z_c
zh_c
w_c
y_c
ga
ge
go
aa
ae
ah
ao
aw
ay
b
ch
d
dh
eh
er
ey
f
g
hh
ih
iy
jh
k
l
m
n
ng
ow
oy
p
r
s
sh
t
th
uh
uw
v
w
y
z
zh
air_c
angr_c
anr_c
aor_c
ar_c
eir_c
engr_c
enr_c
iangr_c
ianr_c
iaor_c
iar_c
ier_c
ihr_c
iir_c
ingr_c
inr_c
iongr_c
iour_c
ir_c
ongr_c
or_c
our_c
uair_c
uangr_c
uanr_c
uar_c
ueir_c
uenr_c
uor_c
ur_c
vanr_c
ver_c
vnr_c
vr_c
pau
#1
#2
#3
#4

View File

@ -0,0 +1,5 @@
s_begin
s_end
s_none
s_both
s_middle

View File

@ -0,0 +1,7 @@
tone1
tone_none
tone4
tone2
tone3
tone5
tone0

View File

@ -0,0 +1,5 @@
word_begin
word_end
word_middle
word_both
word_none

BIN
voices/zhizhe_emo/voc/ckpt/checkpoint_0.pth (Stored with Git LFS) Normal file

Binary file not shown.

BIN
voices/zhizhe_emo/voc/ckpt/checkpoint_1.pth (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,188 @@
model_type: hifigan
Model:
###########################################################
# GENERATOR NETWORK ARCHITECTURE SETTING #
###########################################################
Generator:
params:
in_channels: 80
out_channels: 1
channels: 256
kernel_size: 7
upsample_scales: [10, 5, 2, 2]
upsample_kernal_sizes: [20, 11, 4, 4]
resblock_kernel_sizes: [3, 7, 11]
resblock_dilations:
- [1, 3, 5, 7]
- [1, 3, 5, 7]
- [1, 3, 5, 7]
bias: true
causal: true
nonlinear_activation: "LeakyReLU"
nonlinear_activation_params:
negative_slope: 0.1
use_weight_norm: true
optimizer:
type: Adam
params:
lr: 2.0e-4
betas: [0.5, 0.9]
weight_decay: 0.0
scheduler:
type: MultiStepLR
params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
###########################################################
# DISCRIMINATOR NETWORK ARCHITECTURE SETTING #
###########################################################
MultiScaleDiscriminator:
params:
scales: 3
downsample_pooling: "DWT"
downsample_pooling_params:
kernel_size: 4
stride: 2
padding: 2
discriminator_params:
in_channels: 1
out_channels: 1
kernel_sizes: [15, 41, 5, 3]
channels: 128
max_downsample_channels: 1024
max_groups: 16
bias: true
downsample_scales: [4, 4, 4, 4, 1]
nonlinear_activation: "LeakyReLU"
nonlinear_activation_params:
negative_slope: 0.1
follow_official_norm: true
optimizer:
type: Adam
params:
lr: 2.0e-4
betas: [0.5, 0.9]
weight_decay: 0.0
scheduler:
type: MultiStepLR
params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
MultiPeriodDiscriminator:
params:
periods: [2, 3, 5, 7, 11]
discriminator_params:
in_channels: 1
out_channels: 1
kernel_sizes: [5, 3]
channels: 32
downsample_scales: [3, 3, 3, 3, 1]
max_downsample_channels: 1024
bias: true
nonlinear_activation: "LeakyReLU"
nonlinear_activation_params:
negative_slope: 0.1
use_spectral_norm: false
optimizer:
type: Adam
params:
lr: 2.0e-4
betas: [0.5, 0.9]
weight_decay: 0.0
scheduler:
type: MultiStepLR
params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
####################################################
# LOSS SETTING #
####################################################
Loss:
generator_adv_loss:
enable: True
params:
average_by_discriminators: False
weights: 1.0
discriminator_adv_loss:
enable: True
params:
average_by_discriminators: False
weights: 1.0
stft_loss:
enable: False # Whether to use multi-resolution STFT loss.
mel_loss:
enable: True
params:
fs: 16000
fft_size: 2048
hop_size: 200
win_length: 1000
window: "hann"
num_mels: 80
fmin: 0
fmax: 8000
log_base: null
weights: 45.0
subband_stft_loss:
enable: False
params:
fft_sizes: [384, 683, 171] # List of FFT size for STFT-based loss.
hop_sizes: [35, 75, 15] # List of hop size for STFT-based loss
win_lengths: [150, 300, 60] # List of window length for STFT-based loss.
window: "hann_window" # Window function for STFT-based loss
feat_match_loss:
enable: True
params:
average_by_discriminators: false
average_by_layers: false
weights: 2.0
###########################################################
# DATA LOADER SETTING #
###########################################################
batch_size: 16
batch_max_steps: 9600 # Length of each audio in batch. Make sure dividable by hop_size.
pin_memory: True
num_workers: 2 # FIXME: set > 0 may stuck on macos
remove_short_samples: False
allow_cache: True
generator_grad_norm: -1
discriminator_grad_norm: -1
###########################################################
# INTERVAL SETTING #
###########################################################
generator_train_start_steps: 1 # Number of steps to start to train discriminator.
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
train_max_steps: 2500000 # Number of training steps.
save_interval_steps: 20000 # Interval steps to save checkpoint.
eval_interval_steps: 10000 # Interval steps to evaluate the network.
log_interval_steps: 1000 # Interval steps to record the training log.
###########################################################
# OTHER SETTING #
###########################################################
num_save_intermediate_results: 4 # Number of results to be saved as intermediate results.

BIN
voices/zhizhe_emo/vocoder/pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.