mirror of
https://www.modelscope.cn/speech_tts/speech_sambert-hifigan_tts_chuangirl_Sichuan_16k.git
synced 2026-04-02 10:32:53 +08:00
132 lines
3.7 KiB
YAML
132 lines
3.7 KiB
YAML
Loss:
|
|
discriminator_adv_loss:
|
|
enable: true
|
|
params: {average_by_discriminators: false}
|
|
weights: 1.0
|
|
feat_match_loss:
|
|
enable: true
|
|
params: {average_by_discriminators: false, average_by_layers: false}
|
|
weights: 2.0
|
|
generator_adv_loss:
|
|
enable: true
|
|
params: {average_by_discriminators: false}
|
|
weights: 1.0
|
|
mel_loss:
|
|
enable: true
|
|
params: {fft_size: 2048, fmax: 8000, fmin: 0, fs: 16000, hop_size: 200, log_base: null,
|
|
num_mels: 80, win_length: 1000, window: hann}
|
|
weights: 45.0
|
|
stft_loss: {enable: false}
|
|
subband_stft_loss:
|
|
enable: false
|
|
params:
|
|
fft_sizes: [384, 683, 171]
|
|
hop_sizes: [35, 75, 15]
|
|
win_lengths: [150, 300, 60]
|
|
window: hann_window
|
|
Model:
|
|
Generator:
|
|
optimizer:
|
|
params:
|
|
betas: [0.5, 0.9]
|
|
lr: 0.0002
|
|
weight_decay: 0.0
|
|
type: Adam
|
|
params:
|
|
bias: true
|
|
causal: false
|
|
channels: 256
|
|
in_channels: 80
|
|
kernel_size: 7
|
|
nonlinear_activation: LeakyReLU
|
|
nonlinear_activation_params: {negative_slope: 0.1}
|
|
out_channels: 1
|
|
resblock_dilations:
|
|
- [1, 3, 5, 7]
|
|
- [1, 3, 5, 7]
|
|
- [1, 3, 5, 7]
|
|
resblock_kernel_sizes: [3, 7, 11]
|
|
upsample_kernal_sizes: [20, 11, 4, 4]
|
|
upsample_scales: [10, 5, 2, 2]
|
|
use_weight_norm: true
|
|
scheduler:
|
|
params:
|
|
gamma: 0.5
|
|
milestones: [200000, 400000, 600000, 800000]
|
|
type: MultiStepLR
|
|
MultiPeriodDiscriminator:
|
|
optimizer:
|
|
params:
|
|
betas: [0.5, 0.9]
|
|
lr: 0.0002
|
|
weight_decay: 0.0
|
|
type: Adam
|
|
params:
|
|
discriminator_params:
|
|
bias: true
|
|
channels: 32
|
|
downsample_scales: [3, 3, 3, 3, 1]
|
|
in_channels: 1
|
|
kernel_sizes: [5, 3]
|
|
max_downsample_channels: 1024
|
|
nonlinear_activation: LeakyReLU
|
|
nonlinear_activation_params: {negative_slope: 0.1}
|
|
out_channels: 1
|
|
use_spectral_norm: false
|
|
periods: [2, 3, 5, 7, 11]
|
|
scheduler:
|
|
params:
|
|
gamma: 0.5
|
|
milestones: [200000, 400000, 600000, 800000]
|
|
type: MultiStepLR
|
|
MultiScaleDiscriminator:
|
|
optimizer:
|
|
params:
|
|
betas: [0.5, 0.9]
|
|
lr: 0.0002
|
|
weight_decay: 0.0
|
|
type: Adam
|
|
params:
|
|
discriminator_params:
|
|
bias: true
|
|
channels: 128
|
|
downsample_scales: [4, 4, 4, 4, 1]
|
|
in_channels: 1
|
|
kernel_sizes: [15, 41, 5, 3]
|
|
max_downsample_channels: 1024
|
|
max_groups: 16
|
|
nonlinear_activation: LeakyReLU
|
|
nonlinear_activation_params: {negative_slope: 0.1}
|
|
out_channels: 1
|
|
downsample_pooling: DWT
|
|
downsample_pooling_params: {kernel_size: 4, padding: 2, stride: 2}
|
|
follow_official_norm: true
|
|
scales: 3
|
|
scheduler:
|
|
params:
|
|
gamma: 0.5
|
|
milestones: [200000, 400000, 600000, 800000]
|
|
type: MultiStepLR
|
|
allow_cache: true
|
|
audio_config: {fmax: 8000.0, fmin: 0.0, hop_length: 200, max_norm: 1.0, min_level_db: -100.0,
|
|
n_fft: 2048, n_mels: 80, norm_type: mean_std, num_workers: 16, phone_level_feature: true,
|
|
preemphasize: false, ref_level_db: 20, sampling_rate: 16000, symmetric: false, trim_silence: true,
|
|
trim_silence_threshold_db: 60, wav_normalize: true, win_length: 1000}
|
|
batch_max_steps: 9600
|
|
batch_size: 16
|
|
create_time: '2022-12-26 11:11:35'
|
|
discriminator_grad_norm: -1
|
|
discriminator_train_start_steps: 0
|
|
eval_interval_steps: 10000
|
|
generator_grad_norm: -1
|
|
generator_train_start_steps: 1
|
|
git_revision_hash: 388243c0c173756d1eb34783c02cec4c302cdc25
|
|
log_interval_steps: 1000
|
|
model_type: hifigan
|
|
num_save_intermediate_results: 4
|
|
num_workers: 2
|
|
pin_memory: true
|
|
remove_short_samples: false
|
|
save_interval_steps: 20000
|
|
train_max_steps: 2500000
|