Files
speech_sambert-hifigan_tts_…/voices/zhizhe_emo/audio_config.yaml
jiaqi.sjq d551729afa update
2022-12-14 16:34:44 +08:00

28 lines
487 B
YAML

# Audio processing configs
audio_config:
# Preprocess
wav_normalize: True
trim_silence: True
trim_silence_threshold_db: 60
preemphasize: False
# Feature extraction
sampling_rate: 16000
hop_length: 200
win_length: 1000
n_fft: 2048
n_mels: 80
fmin: 0.0
fmax: 8000.0
phone_level_feature: True
# Normalization
norm_type: "mean_std" # "mean_std" or "global"
max_norm: 1.0
symmetric: False
min_level_db: -100.0
ref_level_db: 20
num_workers: 16