speech_sambert-hifigan_tts_…/voices/F7/voc/config.yaml

Loss:
  discriminator_adv_loss:
    enable: true
    params: {average_by_discriminators: false}
    weights: 1.0
  feat_match_loss:
    enable: true
    params: {average_by_discriminators: false, average_by_layers: false}
    weights: 2.0
  generator_adv_loss:
    enable: true
    params: {average_by_discriminators: false}
    weights: 1.0
  mel_loss:
    enable: true
    params: {fft_size: 2048, fmax: 8000, fmin: 0, fs: 16000, hop_size: 200, log_base: null,
      num_mels: 80, win_length: 1000, window: hann}
    weights: 45.0
  stft_loss: {enable: false}
  subband_stft_loss:
    enable: false
    params:
      fft_sizes: [384, 683, 171]
      hop_sizes: [35, 75, 15]
      win_lengths: [150, 300, 60]
      window: hann_window
Model:
  Generator:
    optimizer:
      params:
        betas: [0.5, 0.9]
        lr: 0.0002
        weight_decay: 0.0
      type: Adam
    params:
      bias: true
      causal: false
      channels: 256
      in_channels: 80
      kernel_size: 7
      nonlinear_activation: LeakyReLU
      nonlinear_activation_params: {negative_slope: 0.1}
      out_channels: 1
      resblock_dilations:
      - [1, 3, 5, 7]
      - [1, 3, 5, 7]
      - [1, 3, 5, 7]
      resblock_kernel_sizes: [3, 7, 11]
      upsample_kernal_sizes: [20, 11, 4, 4]
      upsample_scales: [10, 5, 2, 2]
      use_weight_norm: true
    scheduler:
      params:
        gamma: 0.5
        milestones: [200000, 400000, 600000, 800000]
      type: MultiStepLR
  MultiPeriodDiscriminator:
    optimizer:
      params:
        betas: [0.5, 0.9]
        lr: 0.0002
        weight_decay: 0.0
      type: Adam
    params:
      discriminator_params:
        bias: true
        channels: 32
        downsample_scales: [3, 3, 3, 3, 1]
        in_channels: 1
        kernel_sizes: [5, 3]
        max_downsample_channels: 1024
        nonlinear_activation: LeakyReLU
        nonlinear_activation_params: {negative_slope: 0.1}
        out_channels: 1
        use_spectral_norm: false
      periods: [2, 3, 5, 7, 11]
    scheduler:
      params:
        gamma: 0.5
        milestones: [200000, 400000, 600000, 800000]
      type: MultiStepLR
  MultiScaleDiscriminator:
    optimizer:
      params:
        betas: [0.5, 0.9]
        lr: 0.0002
        weight_decay: 0.0
      type: Adam
    params:
      discriminator_params:
        bias: true
        channels: 128
        downsample_scales: [4, 4, 4, 4, 1]
        in_channels: 1
        kernel_sizes: [15, 41, 5, 3]
        max_downsample_channels: 1024
        max_groups: 16
        nonlinear_activation: LeakyReLU
        nonlinear_activation_params: {negative_slope: 0.1}
        out_channels: 1
      downsample_pooling: DWT
      downsample_pooling_params: {kernel_size: 4, padding: 2, stride: 2}
      follow_official_norm: true
      scales: 3
    scheduler:
      params:
        gamma: 0.5
        milestones: [200000, 400000, 600000, 800000]
      type: MultiStepLR
allow_cache: true
audio_config: {fmax: 8000.0, fmin: 0.0, hop_length: 200, max_norm: 1.0, min_level_db: -100.0,
  n_fft: 2048, n_mels: 80, norm_type: mean_std, num_workers: 16, phone_level_feature: true,
  preemphasize: false, ref_level_db: 20, sampling_rate: 16000, symmetric: false, trim_silence: true,
  trim_silence_threshold_db: 60, wav_normalize: true, win_length: 1000}
batch_max_steps: 9600
batch_size: 16
create_time: '2022-12-26 11:11:35'
discriminator_grad_norm: -1
discriminator_train_start_steps: 0
eval_interval_steps: 10000
generator_grad_norm: -1
generator_train_start_steps: 1
git_revision_hash: 388243c0c173756d1eb34783c02cec4c302cdc25
log_interval_steps: 1000
model_type: hifigan
num_save_intermediate_results: 4
num_workers: 2
pin_memory: true
remove_short_samples: false
save_interval_steps: 20000
train_max_steps: 2500000