Upload folder using ModelScope SDK (batch 1/1)

This commit is contained in:
Cherrytest
2025-11-21 04:21:48 +00:00
parent f0cb438d0f
commit b4a1193832
39 changed files with 1468 additions and 38 deletions

View File

@ -0,0 +1,43 @@
{
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
"_diffusers_version": "0.35.0",
"attn_mode": "flash",
"attn_param": null,
"concat_condition": false,
"glyph_byT5_v2": true,
"guidance_embed": false,
"heads_num": 16,
"hidden_size": 2048,
"ideal_resolution": "1080p",
"ideal_task": null,
"in_channels": 98,
"is_reshape_temporal_channels": false,
"mlp_act_type": "gelu_tanh",
"mlp_width_ratio": 4,
"mm_double_blocks_depth": 54,
"mm_single_blocks_depth": 0,
"out_channels": 32,
"patch_size": [
1,
1,
1
],
"qk_norm": true,
"qk_norm_type": "rms",
"qkv_bias": true,
"rope_dim_list": [
16,
56,
56
],
"rope_theta": 256,
"text_pool_type": null,
"text_projection": "single_refiner",
"text_states_dim": 3584,
"text_states_dim_2": null,
"use_attention_mask": true,
"use_cond_type_embedding": true,
"use_meanflow": true,
"vision_projection": "linear",
"vision_states_dim": 1152
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:691dc1b81b49d942e2eb95e6d61b91321e17b868536eaa4e843db6e406390411
size 33325793672

View File

@ -0,0 +1,43 @@
{
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
"_diffusers_version": "0.35.0",
"attn_mode": "flash",
"attn_param": null,
"concat_condition": true,
"glyph_byT5_v2": true,
"guidance_embed": false,
"heads_num": 16,
"hidden_size": 2048,
"ideal_resolution": "480p",
"ideal_task": "i2v",
"in_channels": 32,
"is_reshape_temporal_channels": false,
"mlp_act_type": "gelu_tanh",
"mlp_width_ratio": 4,
"mm_double_blocks_depth": 54,
"mm_single_blocks_depth": 0,
"out_channels": 32,
"patch_size": [
1,
1,
1
],
"qk_norm": true,
"qk_norm_type": "rms",
"qkv_bias": true,
"rope_dim_list": [
16,
56,
56
],
"rope_theta": 256,
"text_pool_type": null,
"text_projection": "single_refiner",
"text_states_dim": 3584,
"text_states_dim_2": null,
"use_attention_mask": true,
"use_cond_type_embedding": true,
"use_meanflow": false,
"vision_projection": "linear",
"vision_states_dim": 1152
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f4d7d3e61404f5c742b57260f1b6a3bc41bb12fc880438252bf37913487dec56
size 33306632192

View File

@ -0,0 +1,43 @@
{
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
"_diffusers_version": "0.35.0",
"attn_mode": "flash",
"attn_param": null,
"concat_condition": true,
"glyph_byT5_v2": true,
"guidance_embed": false,
"heads_num": 16,
"hidden_size": 2048,
"ideal_resolution": "480p",
"ideal_task": "i2v",
"in_channels": 32,
"is_reshape_temporal_channels": false,
"mlp_act_type": "gelu_tanh",
"mlp_width_ratio": 4,
"mm_double_blocks_depth": 54,
"mm_single_blocks_depth": 0,
"out_channels": 32,
"patch_size": [
1,
1,
1
],
"qk_norm": true,
"qk_norm_type": "rms",
"qkv_bias": true,
"rope_dim_list": [
16,
56,
56
],
"rope_theta": 256,
"text_pool_type": null,
"text_projection": "single_refiner",
"text_states_dim": 3584,
"text_states_dim_2": null,
"use_attention_mask": true,
"use_cond_type_embedding": true,
"use_meanflow": false,
"vision_projection": "linear",
"vision_states_dim": 1152
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f51fe1c4302be44e25afcd1a9186385606482da8a77e2ee7793b0e8385b9cd57
size 33306632192

View File

@ -0,0 +1,43 @@
{
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
"_diffusers_version": "0.35.0",
"attn_mode": "flash",
"attn_param": null,
"concat_condition": true,
"glyph_byT5_v2": true,
"guidance_embed": false,
"heads_num": 16,
"hidden_size": 2048,
"ideal_resolution": "480p",
"ideal_task": "t2v",
"in_channels": 32,
"is_reshape_temporal_channels": false,
"mlp_act_type": "gelu_tanh",
"mlp_width_ratio": 4,
"mm_double_blocks_depth": 54,
"mm_single_blocks_depth": 0,
"out_channels": 32,
"patch_size": [
1,
1,
1
],
"qk_norm": true,
"qk_norm_type": "rms",
"qkv_bias": true,
"rope_dim_list": [
16,
56,
56
],
"rope_theta": 256,
"text_pool_type": null,
"text_projection": "single_refiner",
"text_states_dim": 3584,
"text_states_dim_2": null,
"use_attention_mask": true,
"use_cond_type_embedding": true,
"use_meanflow": false,
"vision_projection": "linear",
"vision_states_dim": 1152
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:71f9affa1115fef2b14bd41fba30eab966fe80c9ed98e0fcba495dbc6d8fff86
size 33306632192

View File

@ -0,0 +1,43 @@
{
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
"_diffusers_version": "0.35.0",
"attn_mode": "flash",
"attn_param": null,
"concat_condition": true,
"glyph_byT5_v2": true,
"guidance_embed": false,
"heads_num": 16,
"hidden_size": 2048,
"ideal_resolution": "480p",
"ideal_task": "t2v",
"in_channels": 32,
"is_reshape_temporal_channels": false,
"mlp_act_type": "gelu_tanh",
"mlp_width_ratio": 4,
"mm_double_blocks_depth": 54,
"mm_single_blocks_depth": 0,
"out_channels": 32,
"patch_size": [
1,
1,
1
],
"qk_norm": true,
"qk_norm_type": "rms",
"qkv_bias": true,
"rope_dim_list": [
16,
56,
56
],
"rope_theta": 256,
"text_pool_type": null,
"text_projection": "single_refiner",
"text_states_dim": 3584,
"text_states_dim_2": null,
"use_attention_mask": true,
"use_cond_type_embedding": true,
"use_meanflow": false,
"vision_projection": "linear",
"vision_states_dim": 1152
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0f35dc10a4037a618b22fef4ee20f8a9d972b4cf2e684764ed8f442fc7a2583f
size 33306632192

View File

@ -0,0 +1,43 @@
{
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
"_diffusers_version": "0.35.0",
"attn_mode": "flash",
"attn_param": null,
"concat_condition": true,
"glyph_byT5_v2": true,
"guidance_embed": false,
"heads_num": 16,
"hidden_size": 2048,
"ideal_resolution": "720p",
"ideal_task": "i2v",
"in_channels": 32,
"is_reshape_temporal_channels": false,
"mlp_act_type": "gelu_tanh",
"mlp_width_ratio": 4,
"mm_double_blocks_depth": 54,
"mm_single_blocks_depth": 0,
"out_channels": 32,
"patch_size": [
1,
1,
1
],
"qk_norm": true,
"qk_norm_type": "rms",
"qkv_bias": true,
"rope_dim_list": [
16,
56,
56
],
"rope_theta": 256,
"text_pool_type": null,
"text_projection": "single_refiner",
"text_states_dim": 3584,
"text_states_dim_2": null,
"use_attention_mask": true,
"use_cond_type_embedding": true,
"use_meanflow": false,
"vision_projection": "linear",
"vision_states_dim": 1152
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0ffd6e2e1c2de585fd011ace1a64105804830aa331ddb25a2fb4a32497f159a4
size 33306632192

View File

@ -0,0 +1,43 @@
{
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
"_diffusers_version": "0.35.0",
"attn_mode": "flash",
"attn_param": null,
"concat_condition": true,
"glyph_byT5_v2": true,
"guidance_embed": false,
"heads_num": 16,
"hidden_size": 2048,
"ideal_resolution": "720p",
"ideal_task": "i2v",
"in_channels": 32,
"is_reshape_temporal_channels": false,
"mlp_act_type": "gelu_tanh",
"mlp_width_ratio": 4,
"mm_double_blocks_depth": 54,
"mm_single_blocks_depth": 0,
"out_channels": 32,
"patch_size": [
1,
1,
1
],
"qk_norm": true,
"qk_norm_type": "rms",
"qkv_bias": true,
"rope_dim_list": [
16,
56,
56
],
"rope_theta": 256,
"text_pool_type": null,
"text_projection": "single_refiner",
"text_states_dim": 3584,
"text_states_dim_2": null,
"use_attention_mask": true,
"use_cond_type_embedding": true,
"use_meanflow": false,
"vision_projection": "linear",
"vision_states_dim": 1152
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:542a9a4367ebfe5c584d925308f63f11070d044e3615b07af95edd4e0500240a
size 33306632192

View File

@ -0,0 +1,67 @@
{
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
"_diffusers_version": "0.35.0",
"attn_mode": "flex-block-attn",
"attn_param": {
"attn_mask_share_within_head": 0,
"attn_pad_type": "zero",
"attn_sparse_type": "ssta",
"attn_use_text_mask": 1,
"ssta_adaptive_pool": null,
"ssta_lambda": 0.7,
"ssta_sampling_type": "importance",
"ssta_threshold": 0.0,
"ssta_topk": 64,
"tile_size": [
6,
8,
8
],
"win_ratio": 10,
"win_size": [
[
3,
3,
3
]
],
"win_type": "fixed"
},
"concat_condition": true,
"glyph_byT5_v2": true,
"guidance_embed": false,
"heads_num": 16,
"hidden_size": 2048,
"ideal_resolution": "720p",
"ideal_task": "i2v",
"in_channels": 32,
"is_reshape_temporal_channels": false,
"mlp_act_type": "gelu_tanh",
"mlp_width_ratio": 4,
"mm_double_blocks_depth": 54,
"mm_single_blocks_depth": 0,
"out_channels": 32,
"patch_size": [
1,
1,
1
],
"qk_norm": true,
"qk_norm_type": "rms",
"qkv_bias": true,
"rope_dim_list": [
16,
56,
56
],
"rope_theta": 256,
"text_pool_type": null,
"text_projection": "single_refiner",
"text_states_dim": 3584,
"text_states_dim_2": null,
"use_attention_mask": true,
"use_cond_type_embedding": true,
"use_meanflow": false,
"vision_projection": "linear",
"vision_states_dim": 1152
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:47345844cc15df1e64a38cc9715fa31471c2c1c68a2857404eda027d60f355a6
size 33306632192

View File

@ -0,0 +1,43 @@
{
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
"_diffusers_version": "0.35.0",
"attn_mode": "flash",
"attn_param": null,
"concat_condition": false,
"glyph_byT5_v2": true,
"guidance_embed": false,
"heads_num": 16,
"hidden_size": 2048,
"ideal_resolution": "720p",
"ideal_task": null,
"in_channels": 98,
"is_reshape_temporal_channels": false,
"mlp_act_type": "gelu_tanh",
"mlp_width_ratio": 4,
"mm_double_blocks_depth": 54,
"mm_single_blocks_depth": 0,
"out_channels": 32,
"patch_size": [
1,
1,
1
],
"qk_norm": true,
"qk_norm_type": "rms",
"qkv_bias": true,
"rope_dim_list": [
16,
56,
56
],
"rope_theta": 256,
"text_pool_type": null,
"text_projection": "single_refiner",
"text_states_dim": 3584,
"text_states_dim_2": null,
"use_attention_mask": true,
"use_cond_type_embedding": true,
"use_meanflow": true,
"vision_projection": "linear",
"vision_states_dim": 1152
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8fcab24a2404731a5e42d5f4adf7731a69771a3b2fc7786f90233f599947794a
size 33325793672

View File

@ -0,0 +1,43 @@
{
"_class_name": "HunyuanVideo_1_5_DiffusionTransformer",
"_diffusers_version": "0.35.0",
"attn_mode": "flash",
"attn_param": null,
"concat_condition": true,
"glyph_byT5_v2": true,
"guidance_embed": false,
"heads_num": 16,
"hidden_size": 2048,
"ideal_resolution": "720p",
"ideal_task": "t2v",
"in_channels": 32,
"is_reshape_temporal_channels": false,
"mlp_act_type": "gelu_tanh",
"mlp_width_ratio": 4,
"mm_double_blocks_depth": 54,
"mm_single_blocks_depth": 0,
"out_channels": 32,
"patch_size": [
1,
1,
1
],
"qk_norm": true,
"qk_norm_type": "rms",
"qkv_bias": true,
"rope_dim_list": [
16,
56,
56
],
"rope_theta": 256,
"text_pool_type": null,
"text_projection": "single_refiner",
"text_states_dim": 3584,
"text_states_dim_2": null,
"use_attention_mask": true,
"use_cond_type_embedding": true,
"use_meanflow": false,
"vision_projection": "linear",
"vision_states_dim": 1152
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:acb0a23ccd0b6c662a22bcc9783544fd917418227a5bdf5e2cbecb22a142c3cc
size 33306632192