diff --git a/direct3d-s2-v-1-1/config.yaml b/direct3d-s2-v-1-1/config.yaml new file mode 100644 index 0000000..930585c --- /dev/null +++ b/direct3d-s2-v-1-1/config.yaml @@ -0,0 +1,161 @@ +dense_vae: + target: direct3d_s2.models.autoencoders.dense_vae.DenseShapeVAE + params: + use_checkpoint: true + embed_dim: 8 + in_channels: 1 + out_channels: 1 + model_channels_encoder: [32, 128, 512] + num_res_blocks_encoder: 2 + num_res_blocks_middle_encoder: 2 + model_channels_decoder: [512, 128, 64] + num_res_blocks_decoder: 4 + num_res_blocks_middle_decoder: 4 + use_fp16: true + latents_scale: 1.0 + latents_shift: 0.0 + +dense_dit: + target: direct3d_s2.models.transformers.dense_dit.DenseDiT + params: + resolution: 16 + in_channels: 8 + out_channels: 8 + model_channels: 1024 + cond_channels: 1024 + num_blocks: 24 + num_heads: 16 + mlp_ratio: 4 + patch_size: 1 + pe_mode: ape + qk_rms_norm: true + use_checkpoint: true + use_fp16: true + latent_shape: [8, 16, 16, 16] + +dense_image_encoder: + target: direct3d_s2.models.conditioner.DinoEncoder + params: + model: facebookresearch/dinov2 + version: dinov2_vitl14_reg + size: 518 + +dense_scheduler: + target: diffusers.schedulers.FlowMatchEulerDiscreteScheduler + params: + num_train_timesteps: 1000 + shift: 6.0 + +sparse_vae_512: + target: direct3d_s2.models.autoencoders.ss_vae.SparseSDFVAE + params: + use_checkpoint: true + embed_dim: 16 + num_head_channels_encoder: 64 + model_channels_encoder: 512 + num_heads_encoder: 8 + num_blocks_encoder: 4 + num_head_channels_decoder: 64 + model_channels_decoder: 512 + num_heads_decoder: 8 + num_blocks_decoder: 4 + resolution: 64 + out_channels: 1 + use_fp16: true + latents_scale: 1.0 + latents_shift: 0.0 + +sparse_dit_512: + target: direct3d_s2.models.transformers.sparse_dit.SparseDiT + params: + resolution: 64 + in_channels: 16 + out_channels: 16 + model_channels: 1024 + cond_channels: 1024 + num_blocks: 24 + num_heads: 32 + num_kv_heads: 2 + compression_block_size: 8 + selection_block_size: 8 + topk: 8 + compression_version: v1 + pe_mode: ape + factor: 1.0 + sparse_conditions: false + qk_rms_norm: true + use_shift: true + use_checkpoint: true + use_fp16: true + +sparse_scheduler_512: + target: diffusers.schedulers.FlowMatchEulerDiscreteScheduler + params: + num_train_timesteps: 1000 + shift: 6.0 + +sparse_vae_1024: + target: direct3d_s2.models.autoencoders.ss_vae.SparseSDFVAE + params: + use_checkpoint: true + embed_dim: 16 + num_head_channels_encoder: 64 + model_channels_encoder: 512 + num_heads_encoder: 8 + num_blocks_encoder: 4 + num_head_channels_decoder: 64 + model_channels_decoder: 512 + num_heads_decoder: 8 + num_blocks_decoder: 4 + resolution: 128 + out_channels: 1 + use_fp16: true + latents_scale: 1.0 + latents_shift: 0.0 + chunk_size: 4 + +sparse_dit_1024: + target: direct3d_s2.models.transformers.sparse_dit.SparseDiT + params: + resolution: 128 + in_channels: 16 + out_channels: 16 + model_channels: 1024 + cond_channels: 1024 + num_blocks: 24 + num_heads: 32 + num_kv_heads: 2 + compression_block_size: 8 + selection_block_size: 8 + topk: 8 + compression_version: v1 + pe_mode: ape + factor: 0.5 + sparse_conditions: false + qk_rms_norm: true + use_shift: true + use_checkpoint: true + use_fp16: true + +sparse_scheduler_1024: + target: diffusers.schedulers.FlowMatchEulerDiscreteScheduler + params: + num_train_timesteps: 1000 + shift: 8.0 + +sparse_image_encoder: + target: direct3d_s2.models.conditioner.DinoEncoder + params: + model: facebookresearch/dinov2 + version: dinov2_vitl14_reg + size: 518 + +refiner: + target: direct3d_s2.models.refiner.unet_refiner.Voxel_RefinerXL + params: + in_channels: 1 + out_channels: 1 + layers_per_block: 2 + layers_mid_block: 2 + patch_size: 192 + use_fp16: true \ No newline at end of file diff --git a/direct3d-s2-v-1-1/model_dense.ckpt b/direct3d-s2-v-1-1/model_dense.ckpt new file mode 100644 index 0000000..0d7b623 --- /dev/null +++ b/direct3d-s2-v-1-1/model_dense.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e6598155f907ca15eb9e4fb96905f9df4d10ebc319f9567ea947db8c684d8fd +size 1505767932 diff --git a/direct3d-s2-v-1-1/model_refiner.ckpt b/direct3d-s2-v-1-1/model_refiner.ckpt new file mode 100644 index 0000000..38d2e00 --- /dev/null +++ b/direct3d-s2-v-1-1/model_refiner.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb0f70d323a43d63ab37c91790c8a7c2b9ada1d7771ffe99ef7a647bdbc99e4 +size 269562302 diff --git a/direct3d-s2-v-1-1/model_sparse_1024.ckpt b/direct3d-s2-v-1-1/model_sparse_1024.ckpt new file mode 100644 index 0000000..68ff8ac --- /dev/null +++ b/direct3d-s2-v-1-1/model_sparse_1024.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3349a91b1ee0d5843bfca4077e4d526f8e0856bd8a03c9f097b3312f386642d +size 1094546938 diff --git a/direct3d-s2-v-1-1/model_sparse_512.ckpt b/direct3d-s2-v-1-1/model_sparse_512.ckpt new file mode 100644 index 0000000..bc66e75 --- /dev/null +++ b/direct3d-s2-v-1-1/model_sparse_512.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c54cd5527d6b040a931e00f7f6edd9460180726bce21cde03c7fe5c4f646e151 +size 1094546234