mirror of
https://www.modelscope.cn/IndexTeam/IndexTTS-2.git
synced 2026-04-02 19:52:53 +08:00
Upload config.yaml to ModelScope hub
This commit is contained in:
120
config.yaml
Normal file
120
config.yaml
Normal file
@ -0,0 +1,120 @@
|
||||
dataset:
|
||||
bpe_model: bpe.model
|
||||
sample_rate: 24000
|
||||
squeeze: false
|
||||
mel:
|
||||
sample_rate: 24000
|
||||
n_fft: 1024
|
||||
hop_length: 256
|
||||
win_length: 1024
|
||||
n_mels: 100
|
||||
mel_fmin: 0
|
||||
normalize: false
|
||||
|
||||
gpt:
|
||||
model_dim: 1280
|
||||
max_mel_tokens: 1815
|
||||
max_text_tokens: 600
|
||||
heads: 20
|
||||
use_mel_codes_as_input: true
|
||||
mel_length_compression: 1024
|
||||
layers: 24
|
||||
number_text_tokens: 12000
|
||||
number_mel_codes: 8194
|
||||
start_mel_token: 8192
|
||||
stop_mel_token: 8193
|
||||
start_text_token: 0
|
||||
stop_text_token: 1
|
||||
train_solo_embeddings: false
|
||||
condition_type: "conformer_perceiver"
|
||||
condition_module:
|
||||
output_size: 512
|
||||
linear_units: 2048
|
||||
attention_heads: 8
|
||||
num_blocks: 6
|
||||
input_layer: "conv2d2"
|
||||
perceiver_mult: 2
|
||||
emo_condition_module:
|
||||
output_size: 512
|
||||
linear_units: 1024
|
||||
attention_heads: 4
|
||||
num_blocks: 4
|
||||
input_layer: "conv2d2"
|
||||
perceiver_mult: 2
|
||||
|
||||
semantic_codec:
|
||||
codebook_size: 8192
|
||||
hidden_size: 1024
|
||||
codebook_dim: 8
|
||||
vocos_dim: 384
|
||||
vocos_intermediate_dim: 2048
|
||||
vocos_num_layers: 12
|
||||
|
||||
s2mel:
|
||||
preprocess_params:
|
||||
sr: 22050
|
||||
spect_params:
|
||||
n_fft: 1024
|
||||
win_length: 1024
|
||||
hop_length: 256
|
||||
n_mels: 80
|
||||
fmin: 0
|
||||
fmax: "None"
|
||||
|
||||
dit_type: "DiT"
|
||||
reg_loss_type: "l1"
|
||||
style_encoder:
|
||||
dim: 192
|
||||
length_regulator:
|
||||
channels: 512
|
||||
is_discrete: false
|
||||
in_channels: 1024
|
||||
content_codebook_size: 2048
|
||||
sampling_ratios: [1, 1, 1, 1]
|
||||
vector_quantize: false
|
||||
n_codebooks: 1
|
||||
quantizer_dropout: 0.0
|
||||
f0_condition: false
|
||||
n_f0_bins: 512
|
||||
DiT:
|
||||
hidden_dim: 512
|
||||
num_heads: 8
|
||||
depth: 13
|
||||
class_dropout_prob: 0.1
|
||||
block_size: 8192
|
||||
in_channels: 80
|
||||
style_condition: true
|
||||
final_layer_type: 'wavenet'
|
||||
target: 'mel'
|
||||
content_dim: 512
|
||||
content_codebook_size: 1024
|
||||
content_type: 'discrete'
|
||||
f0_condition: false
|
||||
n_f0_bins: 512
|
||||
content_codebooks: 1
|
||||
is_causal: false
|
||||
long_skip_connection: true
|
||||
zero_prompt_speech_token: false
|
||||
time_as_token: false
|
||||
style_as_token: false
|
||||
uvit_skip_connection: true
|
||||
add_resblock_in_transformer: false
|
||||
wavenet:
|
||||
hidden_dim: 512
|
||||
num_layers: 8
|
||||
kernel_size: 5
|
||||
dilation_rate: 1
|
||||
p_dropout: 0.2
|
||||
style_condition: true
|
||||
|
||||
gpt_checkpoint: gpt.pth
|
||||
w2v_stat: wav2vec2bert_stats.pt
|
||||
s2mel_checkpoint: s2mel.pth
|
||||
emo_matrix: feat2.pt
|
||||
spk_matrix: feat1.pt
|
||||
emo_num: [3, 17, 2, 8, 4, 5, 10, 24]
|
||||
qwen_emo_path: qwen0.6bemo4-merge/
|
||||
vocoder:
|
||||
type: "bigvgan"
|
||||
name: "nvidia/bigvgan_v2_22khz_80band_256x"
|
||||
version: 2.0
|
||||
Reference in New Issue
Block a user