Upload folder using huggingface_hub

This commit is contained in:
huiwenshi
2025-06-13 16:29:32 +00:00
committed by system
parent 07d6dc9694
commit b1fedf5fb7
16 changed files with 98622 additions and 0 deletions

View File

@ -0,0 +1,53 @@
---
license: openrail++
tags:
- stable-diffusion
- text-to-image
---
# SD v2.1-base with Zero Terminal SNR (LAION Aesthetic 6+)
This model is used in [Diffusion Model with Perceptual Loss](https://arxiv.org/abs/2401.00110) paper as the MSE baseline.
This model is trained using zero terminal SNR schedule following [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/abs/2305.08891) paper on LAION aesthetic 6+ data.
This model is finetuned from [stabilityai/stable-diffusion-2-1-base](https://huggingface.co/stabilityai/stable-diffusion-2-1-base).
This model is meant for research demonstration, not for production use.
## Usage
```python
from diffusers import StableDiffusionPipeline
prompt = "A young girl smiling"
pipe = StableDiffusionPipeline.from_pretrained("ByteDance/sd2.1-base-zsnr-laionaes6").to("cuda")
pipe(prompt, guidance_scale=7.5, guidance_rescale=0.7).images[0].save("out.jpg")
```
## Related Models
* [bytedance/sd2.1-base-zsnr-laionaes5](https://huggingface.co/ByteDance/sd2.1-base-zsnr-laionaes5)
* [bytedance/sd2.1-base-zsnr-laionaes6](https://huggingface.co/ByteDance/sd2.1-base-zsnr-laionaes6)
* [bytedance/sd2.1-base-zsnr-laionaes6-perceptual](https://huggingface.co/ByteDance/sd2.1-base-zsnr-laionaes6-perceptual)
## Cite as
```
@misc{lin2024diffusion,
title={Diffusion Model with Perceptual Loss},
author={Shanchuan Lin and Xiao Yang},
year={2024},
eprint={2401.00110},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{lin2023common,
title={Common Diffusion Noise Schedules and Sample Steps are Flawed},
author={Shanchuan Lin and Bingchen Liu and Jiashi Li and Xiao Yang},
year={2023},
eprint={2305.08891},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```

View File

@ -0,0 +1,20 @@
{
"crop_size": 224,
"do_center_crop": true,
"do_convert_rgb": true,
"do_normalize": true,
"do_resize": true,
"feature_extractor_type": "CLIPFeatureExtractor",
"image_mean": [
0.48145466,
0.4578275,
0.40821073
],
"image_std": [
0.26862954,
0.26130258,
0.27577711
],
"resample": 3,
"size": 224
}

View File

@ -0,0 +1,23 @@
{
"_name_or_path": "vision_encoder",
"architectures": [
"CLIPVisionModelWithProjection"
],
"attention_dropout": 0.0,
"dropout": 0.0,
"hidden_act": "gelu",
"hidden_size": 1280,
"image_size": 224,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 5120,
"layer_norm_eps": 1e-05,
"model_type": "clip_vision_model",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 32,
"patch_size": 14,
"projection_dim": 1024,
"torch_dtype": "float16",
"transformers_version": "4.36.0"
}

BIN
hunyuan3d-paintpbr-v2-1/image_encoder/model.safetensors (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,37 @@
{
"_class_name": "HunyuanPaintPipeline",
"_diffusers_version": "0.24.0",
"feature_extractor": [
"transformers",
"CLIPImageProcessor"
],
"requires_safety_checker": false,
"safety_checker": [
null,
null
],
"scheduler": [
"diffusers",
"DDIMScheduler"
],
"text_encoder": [
"transformers",
"CLIPTextModel"
],
"tokenizer": [
"transformers",
"CLIPTokenizer"
],
"unet": [
"modules",
"UNet2p5DConditionModel"
],
"vae": [
"diffusers",
"AutoencoderKL"
],
"image_encoder": [
"transformers",
"CLIPVisionModelWithProjection"
]
}

View File

@ -0,0 +1,15 @@
{
"_class_name": "DDIMScheduler",
"_diffusers_version": "0.23.1",
"beta_end": 0.012,
"beta_schedule": "scaled_linear",
"beta_start": 0.00085,
"clip_sample": false,
"num_train_timesteps": 1000,
"prediction_type": "v_prediction",
"set_alpha_to_one": true,
"steps_offset": 1,
"trained_betas": null,
"timestep_spacing": "trailing",
"rescale_betas_zero_snr": true
}

View File

@ -0,0 +1,25 @@
{
"_name_or_path": "stabilityai/stable-diffusion-2",
"architectures": [
"CLIPTextModel"
],
"attention_dropout": 0.0,
"bos_token_id": 0,
"dropout": 0.0,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_size": 1024,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 77,
"model_type": "clip_text_model",
"num_attention_heads": 16,
"num_hidden_layers": 23,
"pad_token_id": 1,
"projection_dim": 512,
"torch_dtype": "float32",
"transformers_version": "4.25.0.dev0",
"vocab_size": 49408
}

BIN
hunyuan3d-paintpbr-v2-1/text_encoder/pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,24 @@
{
"bos_token": {
"content": "<|startoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"pad_token": "!",
"unk_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
}
}

View File

@ -0,0 +1,34 @@
{
"add_prefix_space": false,
"bos_token": {
"__type": "AddedToken",
"content": "<|startoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"do_lower_case": true,
"eos_token": {
"__type": "AddedToken",
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"errors": "replace",
"model_max_length": 77,
"name_or_path": "stabilityai/stable-diffusion-2",
"pad_token": "<|endoftext|>",
"special_tokens_map_file": "./special_tokens_map.json",
"tokenizer_class": "CLIPTokenizer",
"unk_token": {
"__type": "AddedToken",
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,45 @@
{
"_class_name": "UNet2DConditionModel",
"_diffusers_version": "0.10.0.dev0",
"act_fn": "silu",
"attention_head_dim": [
5,
10,
20,
20
],
"block_out_channels": [
320,
640,
1280,
1280
],
"center_input_sample": false,
"cross_attention_dim": 1024,
"down_block_types": [
"CrossAttnDownBlock2D",
"CrossAttnDownBlock2D",
"CrossAttnDownBlock2D",
"DownBlock2D"
],
"downsample_padding": 1,
"dual_cross_attention": false,
"flip_sin_to_cos": true,
"freq_shift": 0,
"in_channels": 4,
"layers_per_block": 2,
"mid_block_scale_factor": 1,
"norm_eps": 1e-05,
"norm_num_groups": 32,
"num_class_embeds": null,
"only_cross_attention": false,
"out_channels": 4,
"sample_size": 64,
"up_block_types": [
"UpBlock2D",
"CrossAttnUpBlock2D",
"CrossAttnUpBlock2D",
"CrossAttnUpBlock2D"
],
"use_linear_projection": true
}

BIN
hunyuan3d-paintpbr-v2-1/unet/diffusion_pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,29 @@
{
"_class_name": "AutoencoderKL",
"_diffusers_version": "0.10.0.dev0",
"act_fn": "silu",
"block_out_channels": [
128,
256,
512,
512
],
"down_block_types": [
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D"
],
"in_channels": 3,
"latent_channels": 4,
"layers_per_block": 2,
"norm_num_groups": 32,
"out_channels": 3,
"sample_size": 768,
"up_block_types": [
"UpDecoderBlock2D",
"UpDecoderBlock2D",
"UpDecoderBlock2D",
"UpDecoderBlock2D"
]
}

BIN
hunyuan3d-paintpbr-v2-1/vae/diffusion_pytorch_model.bin (Stored with Git LFS) Normal file

Binary file not shown.