mirror of
https://www.modelscope.cn/iic/speech_sambert-hifigan_tts_zh-cn_16k.git
synced 2026-04-02 18:32:53 +08:00
28 lines
487 B
YAML
28 lines
487 B
YAML
# Audio processing configs
|
|
|
|
audio_config:
|
|
# Preprocess
|
|
wav_normalize: True
|
|
trim_silence: True
|
|
trim_silence_threshold_db: 60
|
|
preemphasize: False
|
|
|
|
# Feature extraction
|
|
sampling_rate: 16000
|
|
hop_length: 200
|
|
win_length: 1000
|
|
n_fft: 2048
|
|
n_mels: 80
|
|
fmin: 0.0
|
|
fmax: 8000.0
|
|
phone_level_feature: True
|
|
|
|
# Normalization
|
|
norm_type: "mean_std" # "mean_std" or "global"
|
|
max_norm: 1.0
|
|
symmetric: False
|
|
min_level_db: -100.0
|
|
ref_level_db: 20
|
|
|
|
num_workers: 16
|