mirror of
https://hf-mirror.com/tencent/Hunyuan3D-2.1
synced 2026-04-03 01:42:55 +08:00
Upload folder using huggingface_hub
This commit is contained in:
53
hunyuan3d-paintpbr-v2-1/README.md
Normal file
53
hunyuan3d-paintpbr-v2-1/README.md
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
---
|
||||||
|
license: openrail++
|
||||||
|
tags:
|
||||||
|
- stable-diffusion
|
||||||
|
- text-to-image
|
||||||
|
---
|
||||||
|
|
||||||
|
# SD v2.1-base with Zero Terminal SNR (LAION Aesthetic 6+)
|
||||||
|
|
||||||
|
This model is used in [Diffusion Model with Perceptual Loss](https://arxiv.org/abs/2401.00110) paper as the MSE baseline.
|
||||||
|
|
||||||
|
This model is trained using zero terminal SNR schedule following [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/abs/2305.08891) paper on LAION aesthetic 6+ data.
|
||||||
|
|
||||||
|
This model is finetuned from [stabilityai/stable-diffusion-2-1-base](https://huggingface.co/stabilityai/stable-diffusion-2-1-base).
|
||||||
|
|
||||||
|
This model is meant for research demonstration, not for production use.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```python
|
||||||
|
from diffusers import StableDiffusionPipeline
|
||||||
|
prompt = "A young girl smiling"
|
||||||
|
pipe = StableDiffusionPipeline.from_pretrained("ByteDance/sd2.1-base-zsnr-laionaes6").to("cuda")
|
||||||
|
pipe(prompt, guidance_scale=7.5, guidance_rescale=0.7).images[0].save("out.jpg")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Related Models
|
||||||
|
|
||||||
|
* [bytedance/sd2.1-base-zsnr-laionaes5](https://huggingface.co/ByteDance/sd2.1-base-zsnr-laionaes5)
|
||||||
|
* [bytedance/sd2.1-base-zsnr-laionaes6](https://huggingface.co/ByteDance/sd2.1-base-zsnr-laionaes6)
|
||||||
|
* [bytedance/sd2.1-base-zsnr-laionaes6-perceptual](https://huggingface.co/ByteDance/sd2.1-base-zsnr-laionaes6-perceptual)
|
||||||
|
|
||||||
|
|
||||||
|
## Cite as
|
||||||
|
```
|
||||||
|
@misc{lin2024diffusion,
|
||||||
|
title={Diffusion Model with Perceptual Loss},
|
||||||
|
author={Shanchuan Lin and Xiao Yang},
|
||||||
|
year={2024},
|
||||||
|
eprint={2401.00110},
|
||||||
|
archivePrefix={arXiv},
|
||||||
|
primaryClass={cs.CV}
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{lin2023common,
|
||||||
|
title={Common Diffusion Noise Schedules and Sample Steps are Flawed},
|
||||||
|
author={Shanchuan Lin and Bingchen Liu and Jiashi Li and Xiao Yang},
|
||||||
|
year={2023},
|
||||||
|
eprint={2305.08891},
|
||||||
|
archivePrefix={arXiv},
|
||||||
|
primaryClass={cs.CV}
|
||||||
|
}
|
||||||
|
```
|
||||||
@ -0,0 +1,20 @@
|
|||||||
|
{
|
||||||
|
"crop_size": 224,
|
||||||
|
"do_center_crop": true,
|
||||||
|
"do_convert_rgb": true,
|
||||||
|
"do_normalize": true,
|
||||||
|
"do_resize": true,
|
||||||
|
"feature_extractor_type": "CLIPFeatureExtractor",
|
||||||
|
"image_mean": [
|
||||||
|
0.48145466,
|
||||||
|
0.4578275,
|
||||||
|
0.40821073
|
||||||
|
],
|
||||||
|
"image_std": [
|
||||||
|
0.26862954,
|
||||||
|
0.26130258,
|
||||||
|
0.27577711
|
||||||
|
],
|
||||||
|
"resample": 3,
|
||||||
|
"size": 224
|
||||||
|
}
|
||||||
23
hunyuan3d-paintpbr-v2-1/image_encoder/config.json
Normal file
23
hunyuan3d-paintpbr-v2-1/image_encoder/config.json
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"_name_or_path": "vision_encoder",
|
||||||
|
"architectures": [
|
||||||
|
"CLIPVisionModelWithProjection"
|
||||||
|
],
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"dropout": 0.0,
|
||||||
|
"hidden_act": "gelu",
|
||||||
|
"hidden_size": 1280,
|
||||||
|
"image_size": 224,
|
||||||
|
"initializer_factor": 1.0,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 5120,
|
||||||
|
"layer_norm_eps": 1e-05,
|
||||||
|
"model_type": "clip_vision_model",
|
||||||
|
"num_attention_heads": 16,
|
||||||
|
"num_channels": 3,
|
||||||
|
"num_hidden_layers": 32,
|
||||||
|
"patch_size": 14,
|
||||||
|
"projection_dim": 1024,
|
||||||
|
"torch_dtype": "float16",
|
||||||
|
"transformers_version": "4.36.0"
|
||||||
|
}
|
||||||
BIN
hunyuan3d-paintpbr-v2-1/image_encoder/model.safetensors
(Stored with Git LFS)
Normal file
BIN
hunyuan3d-paintpbr-v2-1/image_encoder/model.safetensors
(Stored with Git LFS)
Normal file
Binary file not shown.
37
hunyuan3d-paintpbr-v2-1/model_index.json
Normal file
37
hunyuan3d-paintpbr-v2-1/model_index.json
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
{
|
||||||
|
"_class_name": "HunyuanPaintPipeline",
|
||||||
|
"_diffusers_version": "0.24.0",
|
||||||
|
"feature_extractor": [
|
||||||
|
"transformers",
|
||||||
|
"CLIPImageProcessor"
|
||||||
|
],
|
||||||
|
"requires_safety_checker": false,
|
||||||
|
"safety_checker": [
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
],
|
||||||
|
"scheduler": [
|
||||||
|
"diffusers",
|
||||||
|
"DDIMScheduler"
|
||||||
|
],
|
||||||
|
"text_encoder": [
|
||||||
|
"transformers",
|
||||||
|
"CLIPTextModel"
|
||||||
|
],
|
||||||
|
"tokenizer": [
|
||||||
|
"transformers",
|
||||||
|
"CLIPTokenizer"
|
||||||
|
],
|
||||||
|
"unet": [
|
||||||
|
"modules",
|
||||||
|
"UNet2p5DConditionModel"
|
||||||
|
],
|
||||||
|
"vae": [
|
||||||
|
"diffusers",
|
||||||
|
"AutoencoderKL"
|
||||||
|
],
|
||||||
|
"image_encoder": [
|
||||||
|
"transformers",
|
||||||
|
"CLIPVisionModelWithProjection"
|
||||||
|
]
|
||||||
|
}
|
||||||
15
hunyuan3d-paintpbr-v2-1/scheduler/scheduler_config.json
Normal file
15
hunyuan3d-paintpbr-v2-1/scheduler/scheduler_config.json
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"_class_name": "DDIMScheduler",
|
||||||
|
"_diffusers_version": "0.23.1",
|
||||||
|
"beta_end": 0.012,
|
||||||
|
"beta_schedule": "scaled_linear",
|
||||||
|
"beta_start": 0.00085,
|
||||||
|
"clip_sample": false,
|
||||||
|
"num_train_timesteps": 1000,
|
||||||
|
"prediction_type": "v_prediction",
|
||||||
|
"set_alpha_to_one": true,
|
||||||
|
"steps_offset": 1,
|
||||||
|
"trained_betas": null,
|
||||||
|
"timestep_spacing": "trailing",
|
||||||
|
"rescale_betas_zero_snr": true
|
||||||
|
}
|
||||||
25
hunyuan3d-paintpbr-v2-1/text_encoder/config.json
Normal file
25
hunyuan3d-paintpbr-v2-1/text_encoder/config.json
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"_name_or_path": "stabilityai/stable-diffusion-2",
|
||||||
|
"architectures": [
|
||||||
|
"CLIPTextModel"
|
||||||
|
],
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 0,
|
||||||
|
"dropout": 0.0,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"hidden_act": "gelu",
|
||||||
|
"hidden_size": 1024,
|
||||||
|
"initializer_factor": 1.0,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 4096,
|
||||||
|
"layer_norm_eps": 1e-05,
|
||||||
|
"max_position_embeddings": 77,
|
||||||
|
"model_type": "clip_text_model",
|
||||||
|
"num_attention_heads": 16,
|
||||||
|
"num_hidden_layers": 23,
|
||||||
|
"pad_token_id": 1,
|
||||||
|
"projection_dim": 512,
|
||||||
|
"torch_dtype": "float32",
|
||||||
|
"transformers_version": "4.25.0.dev0",
|
||||||
|
"vocab_size": 49408
|
||||||
|
}
|
||||||
BIN
hunyuan3d-paintpbr-v2-1/text_encoder/pytorch_model.bin
(Stored with Git LFS)
Normal file
BIN
hunyuan3d-paintpbr-v2-1/text_encoder/pytorch_model.bin
(Stored with Git LFS)
Normal file
Binary file not shown.
48895
hunyuan3d-paintpbr-v2-1/tokenizer/merges.txt
Normal file
48895
hunyuan3d-paintpbr-v2-1/tokenizer/merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
24
hunyuan3d-paintpbr-v2-1/tokenizer/special_tokens_map.json
Normal file
24
hunyuan3d-paintpbr-v2-1/tokenizer/special_tokens_map.json
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<|startoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": "!",
|
||||||
|
"unk_token": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
34
hunyuan3d-paintpbr-v2-1/tokenizer/tokenizer_config.json
Normal file
34
hunyuan3d-paintpbr-v2-1/tokenizer/tokenizer_config.json
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"add_prefix_space": false,
|
||||||
|
"bos_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|startoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"do_lower_case": true,
|
||||||
|
"eos_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"errors": "replace",
|
||||||
|
"model_max_length": 77,
|
||||||
|
"name_or_path": "stabilityai/stable-diffusion-2",
|
||||||
|
"pad_token": "<|endoftext|>",
|
||||||
|
"special_tokens_map_file": "./special_tokens_map.json",
|
||||||
|
"tokenizer_class": "CLIPTokenizer",
|
||||||
|
"unk_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
49410
hunyuan3d-paintpbr-v2-1/tokenizer/vocab.json
Normal file
49410
hunyuan3d-paintpbr-v2-1/tokenizer/vocab.json
Normal file
File diff suppressed because it is too large
Load Diff
45
hunyuan3d-paintpbr-v2-1/unet/config.json
Normal file
45
hunyuan3d-paintpbr-v2-1/unet/config.json
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
{
|
||||||
|
"_class_name": "UNet2DConditionModel",
|
||||||
|
"_diffusers_version": "0.10.0.dev0",
|
||||||
|
"act_fn": "silu",
|
||||||
|
"attention_head_dim": [
|
||||||
|
5,
|
||||||
|
10,
|
||||||
|
20,
|
||||||
|
20
|
||||||
|
],
|
||||||
|
"block_out_channels": [
|
||||||
|
320,
|
||||||
|
640,
|
||||||
|
1280,
|
||||||
|
1280
|
||||||
|
],
|
||||||
|
"center_input_sample": false,
|
||||||
|
"cross_attention_dim": 1024,
|
||||||
|
"down_block_types": [
|
||||||
|
"CrossAttnDownBlock2D",
|
||||||
|
"CrossAttnDownBlock2D",
|
||||||
|
"CrossAttnDownBlock2D",
|
||||||
|
"DownBlock2D"
|
||||||
|
],
|
||||||
|
"downsample_padding": 1,
|
||||||
|
"dual_cross_attention": false,
|
||||||
|
"flip_sin_to_cos": true,
|
||||||
|
"freq_shift": 0,
|
||||||
|
"in_channels": 4,
|
||||||
|
"layers_per_block": 2,
|
||||||
|
"mid_block_scale_factor": 1,
|
||||||
|
"norm_eps": 1e-05,
|
||||||
|
"norm_num_groups": 32,
|
||||||
|
"num_class_embeds": null,
|
||||||
|
"only_cross_attention": false,
|
||||||
|
"out_channels": 4,
|
||||||
|
"sample_size": 64,
|
||||||
|
"up_block_types": [
|
||||||
|
"UpBlock2D",
|
||||||
|
"CrossAttnUpBlock2D",
|
||||||
|
"CrossAttnUpBlock2D",
|
||||||
|
"CrossAttnUpBlock2D"
|
||||||
|
],
|
||||||
|
"use_linear_projection": true
|
||||||
|
}
|
||||||
BIN
hunyuan3d-paintpbr-v2-1/unet/diffusion_pytorch_model.bin
(Stored with Git LFS)
Normal file
BIN
hunyuan3d-paintpbr-v2-1/unet/diffusion_pytorch_model.bin
(Stored with Git LFS)
Normal file
Binary file not shown.
29
hunyuan3d-paintpbr-v2-1/vae/config.json
Normal file
29
hunyuan3d-paintpbr-v2-1/vae/config.json
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
{
|
||||||
|
"_class_name": "AutoencoderKL",
|
||||||
|
"_diffusers_version": "0.10.0.dev0",
|
||||||
|
"act_fn": "silu",
|
||||||
|
"block_out_channels": [
|
||||||
|
128,
|
||||||
|
256,
|
||||||
|
512,
|
||||||
|
512
|
||||||
|
],
|
||||||
|
"down_block_types": [
|
||||||
|
"DownEncoderBlock2D",
|
||||||
|
"DownEncoderBlock2D",
|
||||||
|
"DownEncoderBlock2D",
|
||||||
|
"DownEncoderBlock2D"
|
||||||
|
],
|
||||||
|
"in_channels": 3,
|
||||||
|
"latent_channels": 4,
|
||||||
|
"layers_per_block": 2,
|
||||||
|
"norm_num_groups": 32,
|
||||||
|
"out_channels": 3,
|
||||||
|
"sample_size": 768,
|
||||||
|
"up_block_types": [
|
||||||
|
"UpDecoderBlock2D",
|
||||||
|
"UpDecoderBlock2D",
|
||||||
|
"UpDecoderBlock2D",
|
||||||
|
"UpDecoderBlock2D"
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
hunyuan3d-paintpbr-v2-1/vae/diffusion_pytorch_model.bin
(Stored with Git LFS)
Normal file
BIN
hunyuan3d-paintpbr-v2-1/vae/diffusion_pytorch_model.bin
(Stored with Git LFS)
Normal file
Binary file not shown.
Reference in New Issue
Block a user