mirror of
https://www.modelscope.cn/Tencent-Hunyuan/HunyuanVideo-1.5.git
synced 2026-04-02 22:02:52 +08:00
Upload folder using ModelScope SDK (batch 1/1)
This commit is contained in:
43
transformer/1080p_sr_distilled/config.json
Normal file
43
transformer/1080p_sr_distilled/config.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
|
||||
"_diffusers_version": "0.35.0",
|
||||
"attn_mode": "flash",
|
||||
"attn_param": null,
|
||||
"concat_condition": false,
|
||||
"glyph_byT5_v2": true,
|
||||
"guidance_embed": false,
|
||||
"heads_num": 16,
|
||||
"hidden_size": 2048,
|
||||
"ideal_resolution": "1080p",
|
||||
"ideal_task": null,
|
||||
"in_channels": 98,
|
||||
"is_reshape_temporal_channels": false,
|
||||
"mlp_act_type": "gelu_tanh",
|
||||
"mlp_width_ratio": 4,
|
||||
"mm_double_blocks_depth": 54,
|
||||
"mm_single_blocks_depth": 0,
|
||||
"out_channels": 32,
|
||||
"patch_size": [
|
||||
1,
|
||||
1,
|
||||
1
|
||||
],
|
||||
"qk_norm": true,
|
||||
"qk_norm_type": "rms",
|
||||
"qkv_bias": true,
|
||||
"rope_dim_list": [
|
||||
16,
|
||||
56,
|
||||
56
|
||||
],
|
||||
"rope_theta": 256,
|
||||
"text_pool_type": null,
|
||||
"text_projection": "single_refiner",
|
||||
"text_states_dim": 3584,
|
||||
"text_states_dim_2": null,
|
||||
"use_attention_mask": true,
|
||||
"use_cond_type_embedding": true,
|
||||
"use_meanflow": true,
|
||||
"vision_projection": "linear",
|
||||
"vision_states_dim": 1152
|
||||
}
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:691dc1b81b49d942e2eb95e6d61b91321e17b868536eaa4e843db6e406390411
|
||||
size 33325793672
|
||||
43
transformer/480p_i2v/config.json
Normal file
43
transformer/480p_i2v/config.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
|
||||
"_diffusers_version": "0.35.0",
|
||||
"attn_mode": "flash",
|
||||
"attn_param": null,
|
||||
"concat_condition": true,
|
||||
"glyph_byT5_v2": true,
|
||||
"guidance_embed": false,
|
||||
"heads_num": 16,
|
||||
"hidden_size": 2048,
|
||||
"ideal_resolution": "480p",
|
||||
"ideal_task": "i2v",
|
||||
"in_channels": 32,
|
||||
"is_reshape_temporal_channels": false,
|
||||
"mlp_act_type": "gelu_tanh",
|
||||
"mlp_width_ratio": 4,
|
||||
"mm_double_blocks_depth": 54,
|
||||
"mm_single_blocks_depth": 0,
|
||||
"out_channels": 32,
|
||||
"patch_size": [
|
||||
1,
|
||||
1,
|
||||
1
|
||||
],
|
||||
"qk_norm": true,
|
||||
"qk_norm_type": "rms",
|
||||
"qkv_bias": true,
|
||||
"rope_dim_list": [
|
||||
16,
|
||||
56,
|
||||
56
|
||||
],
|
||||
"rope_theta": 256,
|
||||
"text_pool_type": null,
|
||||
"text_projection": "single_refiner",
|
||||
"text_states_dim": 3584,
|
||||
"text_states_dim_2": null,
|
||||
"use_attention_mask": true,
|
||||
"use_cond_type_embedding": true,
|
||||
"use_meanflow": false,
|
||||
"vision_projection": "linear",
|
||||
"vision_states_dim": 1152
|
||||
}
|
||||
3
transformer/480p_i2v/diffusion_pytorch_model.safetensors
Normal file
3
transformer/480p_i2v/diffusion_pytorch_model.safetensors
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f4d7d3e61404f5c742b57260f1b6a3bc41bb12fc880438252bf37913487dec56
|
||||
size 33306632192
|
||||
43
transformer/480p_i2v_distilled/config.json
Normal file
43
transformer/480p_i2v_distilled/config.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
|
||||
"_diffusers_version": "0.35.0",
|
||||
"attn_mode": "flash",
|
||||
"attn_param": null,
|
||||
"concat_condition": true,
|
||||
"glyph_byT5_v2": true,
|
||||
"guidance_embed": false,
|
||||
"heads_num": 16,
|
||||
"hidden_size": 2048,
|
||||
"ideal_resolution": "480p",
|
||||
"ideal_task": "i2v",
|
||||
"in_channels": 32,
|
||||
"is_reshape_temporal_channels": false,
|
||||
"mlp_act_type": "gelu_tanh",
|
||||
"mlp_width_ratio": 4,
|
||||
"mm_double_blocks_depth": 54,
|
||||
"mm_single_blocks_depth": 0,
|
||||
"out_channels": 32,
|
||||
"patch_size": [
|
||||
1,
|
||||
1,
|
||||
1
|
||||
],
|
||||
"qk_norm": true,
|
||||
"qk_norm_type": "rms",
|
||||
"qkv_bias": true,
|
||||
"rope_dim_list": [
|
||||
16,
|
||||
56,
|
||||
56
|
||||
],
|
||||
"rope_theta": 256,
|
||||
"text_pool_type": null,
|
||||
"text_projection": "single_refiner",
|
||||
"text_states_dim": 3584,
|
||||
"text_states_dim_2": null,
|
||||
"use_attention_mask": true,
|
||||
"use_cond_type_embedding": true,
|
||||
"use_meanflow": false,
|
||||
"vision_projection": "linear",
|
||||
"vision_states_dim": 1152
|
||||
}
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f51fe1c4302be44e25afcd1a9186385606482da8a77e2ee7793b0e8385b9cd57
|
||||
size 33306632192
|
||||
43
transformer/480p_t2v/config.json
Normal file
43
transformer/480p_t2v/config.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
|
||||
"_diffusers_version": "0.35.0",
|
||||
"attn_mode": "flash",
|
||||
"attn_param": null,
|
||||
"concat_condition": true,
|
||||
"glyph_byT5_v2": true,
|
||||
"guidance_embed": false,
|
||||
"heads_num": 16,
|
||||
"hidden_size": 2048,
|
||||
"ideal_resolution": "480p",
|
||||
"ideal_task": "t2v",
|
||||
"in_channels": 32,
|
||||
"is_reshape_temporal_channels": false,
|
||||
"mlp_act_type": "gelu_tanh",
|
||||
"mlp_width_ratio": 4,
|
||||
"mm_double_blocks_depth": 54,
|
||||
"mm_single_blocks_depth": 0,
|
||||
"out_channels": 32,
|
||||
"patch_size": [
|
||||
1,
|
||||
1,
|
||||
1
|
||||
],
|
||||
"qk_norm": true,
|
||||
"qk_norm_type": "rms",
|
||||
"qkv_bias": true,
|
||||
"rope_dim_list": [
|
||||
16,
|
||||
56,
|
||||
56
|
||||
],
|
||||
"rope_theta": 256,
|
||||
"text_pool_type": null,
|
||||
"text_projection": "single_refiner",
|
||||
"text_states_dim": 3584,
|
||||
"text_states_dim_2": null,
|
||||
"use_attention_mask": true,
|
||||
"use_cond_type_embedding": true,
|
||||
"use_meanflow": false,
|
||||
"vision_projection": "linear",
|
||||
"vision_states_dim": 1152
|
||||
}
|
||||
3
transformer/480p_t2v/diffusion_pytorch_model.safetensors
Normal file
3
transformer/480p_t2v/diffusion_pytorch_model.safetensors
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:71f9affa1115fef2b14bd41fba30eab966fe80c9ed98e0fcba495dbc6d8fff86
|
||||
size 33306632192
|
||||
43
transformer/480p_t2v_distilled/config.json
Normal file
43
transformer/480p_t2v_distilled/config.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
|
||||
"_diffusers_version": "0.35.0",
|
||||
"attn_mode": "flash",
|
||||
"attn_param": null,
|
||||
"concat_condition": true,
|
||||
"glyph_byT5_v2": true,
|
||||
"guidance_embed": false,
|
||||
"heads_num": 16,
|
||||
"hidden_size": 2048,
|
||||
"ideal_resolution": "480p",
|
||||
"ideal_task": "t2v",
|
||||
"in_channels": 32,
|
||||
"is_reshape_temporal_channels": false,
|
||||
"mlp_act_type": "gelu_tanh",
|
||||
"mlp_width_ratio": 4,
|
||||
"mm_double_blocks_depth": 54,
|
||||
"mm_single_blocks_depth": 0,
|
||||
"out_channels": 32,
|
||||
"patch_size": [
|
||||
1,
|
||||
1,
|
||||
1
|
||||
],
|
||||
"qk_norm": true,
|
||||
"qk_norm_type": "rms",
|
||||
"qkv_bias": true,
|
||||
"rope_dim_list": [
|
||||
16,
|
||||
56,
|
||||
56
|
||||
],
|
||||
"rope_theta": 256,
|
||||
"text_pool_type": null,
|
||||
"text_projection": "single_refiner",
|
||||
"text_states_dim": 3584,
|
||||
"text_states_dim_2": null,
|
||||
"use_attention_mask": true,
|
||||
"use_cond_type_embedding": true,
|
||||
"use_meanflow": false,
|
||||
"vision_projection": "linear",
|
||||
"vision_states_dim": 1152
|
||||
}
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0f35dc10a4037a618b22fef4ee20f8a9d972b4cf2e684764ed8f442fc7a2583f
|
||||
size 33306632192
|
||||
43
transformer/720p_i2v/config.json
Normal file
43
transformer/720p_i2v/config.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
|
||||
"_diffusers_version": "0.35.0",
|
||||
"attn_mode": "flash",
|
||||
"attn_param": null,
|
||||
"concat_condition": true,
|
||||
"glyph_byT5_v2": true,
|
||||
"guidance_embed": false,
|
||||
"heads_num": 16,
|
||||
"hidden_size": 2048,
|
||||
"ideal_resolution": "720p",
|
||||
"ideal_task": "i2v",
|
||||
"in_channels": 32,
|
||||
"is_reshape_temporal_channels": false,
|
||||
"mlp_act_type": "gelu_tanh",
|
||||
"mlp_width_ratio": 4,
|
||||
"mm_double_blocks_depth": 54,
|
||||
"mm_single_blocks_depth": 0,
|
||||
"out_channels": 32,
|
||||
"patch_size": [
|
||||
1,
|
||||
1,
|
||||
1
|
||||
],
|
||||
"qk_norm": true,
|
||||
"qk_norm_type": "rms",
|
||||
"qkv_bias": true,
|
||||
"rope_dim_list": [
|
||||
16,
|
||||
56,
|
||||
56
|
||||
],
|
||||
"rope_theta": 256,
|
||||
"text_pool_type": null,
|
||||
"text_projection": "single_refiner",
|
||||
"text_states_dim": 3584,
|
||||
"text_states_dim_2": null,
|
||||
"use_attention_mask": true,
|
||||
"use_cond_type_embedding": true,
|
||||
"use_meanflow": false,
|
||||
"vision_projection": "linear",
|
||||
"vision_states_dim": 1152
|
||||
}
|
||||
3
transformer/720p_i2v/diffusion_pytorch_model.safetensors
Normal file
3
transformer/720p_i2v/diffusion_pytorch_model.safetensors
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0ffd6e2e1c2de585fd011ace1a64105804830aa331ddb25a2fb4a32497f159a4
|
||||
size 33306632192
|
||||
43
transformer/720p_i2v_distilled/config.json
Normal file
43
transformer/720p_i2v_distilled/config.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
|
||||
"_diffusers_version": "0.35.0",
|
||||
"attn_mode": "flash",
|
||||
"attn_param": null,
|
||||
"concat_condition": true,
|
||||
"glyph_byT5_v2": true,
|
||||
"guidance_embed": false,
|
||||
"heads_num": 16,
|
||||
"hidden_size": 2048,
|
||||
"ideal_resolution": "720p",
|
||||
"ideal_task": "i2v",
|
||||
"in_channels": 32,
|
||||
"is_reshape_temporal_channels": false,
|
||||
"mlp_act_type": "gelu_tanh",
|
||||
"mlp_width_ratio": 4,
|
||||
"mm_double_blocks_depth": 54,
|
||||
"mm_single_blocks_depth": 0,
|
||||
"out_channels": 32,
|
||||
"patch_size": [
|
||||
1,
|
||||
1,
|
||||
1
|
||||
],
|
||||
"qk_norm": true,
|
||||
"qk_norm_type": "rms",
|
||||
"qkv_bias": true,
|
||||
"rope_dim_list": [
|
||||
16,
|
||||
56,
|
||||
56
|
||||
],
|
||||
"rope_theta": 256,
|
||||
"text_pool_type": null,
|
||||
"text_projection": "single_refiner",
|
||||
"text_states_dim": 3584,
|
||||
"text_states_dim_2": null,
|
||||
"use_attention_mask": true,
|
||||
"use_cond_type_embedding": true,
|
||||
"use_meanflow": false,
|
||||
"vision_projection": "linear",
|
||||
"vision_states_dim": 1152
|
||||
}
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:542a9a4367ebfe5c584d925308f63f11070d044e3615b07af95edd4e0500240a
|
||||
size 33306632192
|
||||
67
transformer/720p_i2v_distilled_sparse/config.json
Normal file
67
transformer/720p_i2v_distilled_sparse/config.json
Normal file
@ -0,0 +1,67 @@
|
||||
{
|
||||
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
|
||||
"_diffusers_version": "0.35.0",
|
||||
"attn_mode": "flex-block-attn",
|
||||
"attn_param": {
|
||||
"attn_mask_share_within_head": 0,
|
||||
"attn_pad_type": "zero",
|
||||
"attn_sparse_type": "ssta",
|
||||
"attn_use_text_mask": 1,
|
||||
"ssta_adaptive_pool": null,
|
||||
"ssta_lambda": 0.7,
|
||||
"ssta_sampling_type": "importance",
|
||||
"ssta_threshold": 0.0,
|
||||
"ssta_topk": 64,
|
||||
"tile_size": [
|
||||
6,
|
||||
8,
|
||||
8
|
||||
],
|
||||
"win_ratio": 10,
|
||||
"win_size": [
|
||||
[
|
||||
3,
|
||||
3,
|
||||
3
|
||||
]
|
||||
],
|
||||
"win_type": "fixed"
|
||||
},
|
||||
"concat_condition": true,
|
||||
"glyph_byT5_v2": true,
|
||||
"guidance_embed": false,
|
||||
"heads_num": 16,
|
||||
"hidden_size": 2048,
|
||||
"ideal_resolution": "720p",
|
||||
"ideal_task": "i2v",
|
||||
"in_channels": 32,
|
||||
"is_reshape_temporal_channels": false,
|
||||
"mlp_act_type": "gelu_tanh",
|
||||
"mlp_width_ratio": 4,
|
||||
"mm_double_blocks_depth": 54,
|
||||
"mm_single_blocks_depth": 0,
|
||||
"out_channels": 32,
|
||||
"patch_size": [
|
||||
1,
|
||||
1,
|
||||
1
|
||||
],
|
||||
"qk_norm": true,
|
||||
"qk_norm_type": "rms",
|
||||
"qkv_bias": true,
|
||||
"rope_dim_list": [
|
||||
16,
|
||||
56,
|
||||
56
|
||||
],
|
||||
"rope_theta": 256,
|
||||
"text_pool_type": null,
|
||||
"text_projection": "single_refiner",
|
||||
"text_states_dim": 3584,
|
||||
"text_states_dim_2": null,
|
||||
"use_attention_mask": true,
|
||||
"use_cond_type_embedding": true,
|
||||
"use_meanflow": false,
|
||||
"vision_projection": "linear",
|
||||
"vision_states_dim": 1152
|
||||
}
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:47345844cc15df1e64a38cc9715fa31471c2c1c68a2857404eda027d60f355a6
|
||||
size 33306632192
|
||||
43
transformer/720p_sr_distilled/config.json
Normal file
43
transformer/720p_sr_distilled/config.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
|
||||
"_diffusers_version": "0.35.0",
|
||||
"attn_mode": "flash",
|
||||
"attn_param": null,
|
||||
"concat_condition": false,
|
||||
"glyph_byT5_v2": true,
|
||||
"guidance_embed": false,
|
||||
"heads_num": 16,
|
||||
"hidden_size": 2048,
|
||||
"ideal_resolution": "720p",
|
||||
"ideal_task": null,
|
||||
"in_channels": 98,
|
||||
"is_reshape_temporal_channels": false,
|
||||
"mlp_act_type": "gelu_tanh",
|
||||
"mlp_width_ratio": 4,
|
||||
"mm_double_blocks_depth": 54,
|
||||
"mm_single_blocks_depth": 0,
|
||||
"out_channels": 32,
|
||||
"patch_size": [
|
||||
1,
|
||||
1,
|
||||
1
|
||||
],
|
||||
"qk_norm": true,
|
||||
"qk_norm_type": "rms",
|
||||
"qkv_bias": true,
|
||||
"rope_dim_list": [
|
||||
16,
|
||||
56,
|
||||
56
|
||||
],
|
||||
"rope_theta": 256,
|
||||
"text_pool_type": null,
|
||||
"text_projection": "single_refiner",
|
||||
"text_states_dim": 3584,
|
||||
"text_states_dim_2": null,
|
||||
"use_attention_mask": true,
|
||||
"use_cond_type_embedding": true,
|
||||
"use_meanflow": true,
|
||||
"vision_projection": "linear",
|
||||
"vision_states_dim": 1152
|
||||
}
|
||||
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8fcab24a2404731a5e42d5f4adf7731a69771a3b2fc7786f90233f599947794a
|
||||
size 33325793672
|
||||
43
transformer/720p_t2v/config.json
Normal file
43
transformer/720p_t2v/config.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
|
||||
"_diffusers_version": "0.35.0",
|
||||
"attn_mode": "flash",
|
||||
"attn_param": null,
|
||||
"concat_condition": true,
|
||||
"glyph_byT5_v2": true,
|
||||
"guidance_embed": false,
|
||||
"heads_num": 16,
|
||||
"hidden_size": 2048,
|
||||
"ideal_resolution": "720p",
|
||||
"ideal_task": "t2v",
|
||||
"in_channels": 32,
|
||||
"is_reshape_temporal_channels": false,
|
||||
"mlp_act_type": "gelu_tanh",
|
||||
"mlp_width_ratio": 4,
|
||||
"mm_double_blocks_depth": 54,
|
||||
"mm_single_blocks_depth": 0,
|
||||
"out_channels": 32,
|
||||
"patch_size": [
|
||||
1,
|
||||
1,
|
||||
1
|
||||
],
|
||||
"qk_norm": true,
|
||||
"qk_norm_type": "rms",
|
||||
"qkv_bias": true,
|
||||
"rope_dim_list": [
|
||||
16,
|
||||
56,
|
||||
56
|
||||
],
|
||||
"rope_theta": 256,
|
||||
"text_pool_type": null,
|
||||
"text_projection": "single_refiner",
|
||||
"text_states_dim": 3584,
|
||||
"text_states_dim_2": null,
|
||||
"use_attention_mask": true,
|
||||
"use_cond_type_embedding": true,
|
||||
"use_meanflow": false,
|
||||
"vision_projection": "linear",
|
||||
"vision_states_dim": 1152
|
||||
}
|
||||
3
transformer/720p_t2v/diffusion_pytorch_model.safetensors
Normal file
3
transformer/720p_t2v/diffusion_pytorch_model.safetensors
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:acb0a23ccd0b6c662a22bcc9783544fd917418227a5bdf5e2cbecb22a142c3cc
|
||||
size 33306632192
|
||||
Reference in New Issue
Block a user