speech_sambert-hifigan_tts_…/voices/zhizhe_emo/audio_config.yaml

# Audio processing configs

audio_config:
  # Preprocess
  wav_normalize: True
  trim_silence: True
  trim_silence_threshold_db: 60
  preemphasize: False

  # Feature extraction
  sampling_rate: 16000
  hop_length: 200
  win_length: 1000
  n_fft: 2048
  n_mels: 80
  fmin: 0.0
  fmax: 8000.0
  phone_level_feature: True

  # Normalization
  norm_type: "mean_std"  # "mean_std" or "global"
  max_norm: 1.0
  symmetric: False
  min_level_db: -100.0
  ref_level_db: 20

  num_workers: 16