mirror of
https://www.modelscope.cn/deepseek-ai/DeepSeek-OCR.git
synced 2026-04-02 21:02:54 +08:00
Upload folder using ModelScope SDK
This commit is contained in:
3
.gitattributes
vendored
3
.gitattributes
vendored
@ -45,3 +45,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
model-00001-of-000001.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
122
.ipynb_checkpoints/README-checkpoint.md
Normal file
122
.ipynb_checkpoints/README-checkpoint.md
Normal file
@ -0,0 +1,122 @@
|
||||
---
|
||||
pipeline_tag: image-text-to-text
|
||||
language:
|
||||
- multilingual
|
||||
tags:
|
||||
- deepseek
|
||||
- vision-language
|
||||
- ocr
|
||||
- custom_code
|
||||
license: mit
|
||||
---
|
||||
<div align="center">
|
||||
<img src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" width="60%" alt="DeepSeek AI" />
|
||||
</div>
|
||||
<hr>
|
||||
<div align="center">
|
||||
<a href="https://www.deepseek.com/" target="_blank">
|
||||
<img alt="Homepage" src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/badge.svg?raw=true" />
|
||||
</a>
|
||||
<a href="https://huggingface.co/deepseek-ai/DeepSeek-OCR" target="_blank">
|
||||
<img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-DeepSeek%20AI-ffc107?color=ffc107&logoColor=white" />
|
||||
</a>
|
||||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
|
||||
<a href="https://discord.gg/Tc7c45Zzu5" target="_blank">
|
||||
<img alt="Discord" src="https://img.shields.io/badge/Discord-DeepSeek%20AI-7289da?logo=discord&logoColor=white&color=7289da" />
|
||||
</a>
|
||||
<a href="https://twitter.com/deepseek_ai" target="_blank">
|
||||
<img alt="Twitter Follow" src="https://img.shields.io/badge/Twitter-deepseek_ai-white?logo=x&logoColor=white" />
|
||||
</a>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<p align="center">
|
||||
<a href="https://github.com/deepseek-ai/DeepSeek-OCR"><b>🌟 Github</b></a> |
|
||||
<a href="https://huggingface.co/deepseek-ai/DeepSeek-OCR"><b>📥 Model Download</b></a> |
|
||||
<a href="https://github.com/deepseek-ai/DeepSeek-OCR/blob/main/DeepSeek_OCR_paper.pdf"><b>📄 Paper Link</b></a> |
|
||||
<a href=""><b>📄 Arxiv Paper Link</b></a> |
|
||||
</p>
|
||||
<h2>
|
||||
<p align="center">
|
||||
<a href="">DeepSeek-OCR: Contexts Optical Compression</a>
|
||||
</p>
|
||||
</h2>
|
||||
<p align="center">
|
||||
<img src="assets/fig1.png" style="width: 1000px" align=center>
|
||||
</p>
|
||||
<p align="center">
|
||||
<a href="">Explore the boundaries of visual-text compression.</a>
|
||||
</p>
|
||||
|
||||
## Usage
|
||||
Inference using Huggingface transformers on NVIDIA GPUs. Requirements tested on python 3.12.9 + CUDA11.8:
|
||||
|
||||
```
|
||||
torch==2.6.0
|
||||
transformers==4.46.3
|
||||
tokenizers==0.20.3
|
||||
einops
|
||||
addict
|
||||
easydict
|
||||
pip install flash-attn==2.7.3 --no-build-isolation
|
||||
```
|
||||
|
||||
```python
|
||||
from transformers import AutoModel, AutoTokenizer
|
||||
import torch
|
||||
import os
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
|
||||
model_name = 'deepseek-ai/DeepSeek-OCR'
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained(model_name, _attn_implementation='flash_attention_2', trust_remote_code=True, use_safetensors=True)
|
||||
model = model.eval().cuda().to(torch.bfloat16)
|
||||
|
||||
# prompt = "<image>\nFree OCR. "
|
||||
prompt = "<image>\n<|grounding|>Convert the document to markdown. "
|
||||
image_file = 'your_image.jpg'
|
||||
output_path = 'your/output/dir'
|
||||
|
||||
# infer(self, tokenizer, prompt='', image_file='', output_path = ' ', base_size = 1024, image_size = 640, crop_mode = True, test_compress = False, save_results = False):
|
||||
|
||||
# Tiny: base_size = 512, image_size = 512, crop_mode = False
|
||||
# Small: base_size = 640, image_size = 640, crop_mode = False
|
||||
# Base: base_size = 1024, image_size = 1024, crop_mode = False
|
||||
# Large: base_size = 1280, image_size = 1280, crop_mode = False
|
||||
|
||||
# Gundam: base_size = 1024, image_size = 640, crop_mode = True
|
||||
|
||||
res = model.infer(tokenizer, prompt=prompt, image_file=image_file, output_path = output_path, base_size = 1024, image_size = 640, crop_mode=True, save_results = True, test_compress = True)
|
||||
```
|
||||
|
||||
## vLLM
|
||||
Refer to [🌟GitHub](https://github.com/deepseek-ai/DeepSeek-OCR/) for guidance on model inference acceleration and PDF processing, etc.<!-- -->
|
||||
|
||||
## Visualizations
|
||||
<table>
|
||||
<tr>
|
||||
<td><img src="assets/show1.jpg" style="width: 500px"></td>
|
||||
<td><img src="assets/show2.jpg" style="width: 500px"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><img src="assets/show3.jpg" style="width: 500px"></td>
|
||||
<td><img src="assets/show4.jpg" style="width: 500px"></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
||||
## Acknowledgement
|
||||
|
||||
We would like to thank [Vary](https://github.com/Ucas-HaoranWei/Vary/), [GOT-OCR2.0](https://github.com/Ucas-HaoranWei/GOT-OCR2.0/), [MinerU](https://github.com/opendatalab/MinerU), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [OneChart](https://github.com/LingyvKong/OneChart), [Slow Perception](https://github.com/Ucas-HaoranWei/Slow-Perception) for their valuable models and ideas.
|
||||
|
||||
We also appreciate the benchmarks: [Fox](https://github.com/ucaslcl/Fox), [OminiDocBench](https://github.com/opendatalab/OmniDocBench).
|
||||
|
||||
|
||||
## Citation
|
||||
Coming soon!
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 DeepSeek
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
156
README.md
156
README.md
@ -1,48 +1,122 @@
|
||||
---
|
||||
license: Apache License 2.0
|
||||
tags: []
|
||||
|
||||
#model-type:
|
||||
##如 gpt、phi、llama、chatglm、baichuan 等
|
||||
#- gpt
|
||||
|
||||
#domain:
|
||||
##如 nlp、cv、audio、multi-modal
|
||||
#- nlp
|
||||
|
||||
#language:
|
||||
##语言代码列表 https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa
|
||||
#- cn
|
||||
|
||||
#metrics:
|
||||
##如 CIDEr、Blue、ROUGE 等
|
||||
#- CIDEr
|
||||
|
||||
#tags:
|
||||
##各种自定义,包括 pretrained、fine-tuned、instruction-tuned、RL-tuned 等训练方法和其他
|
||||
#- pretrained
|
||||
|
||||
#tools:
|
||||
##如 vllm、fastchat、llamacpp、AdaSeq 等
|
||||
#- vllm
|
||||
pipeline_tag: image-text-to-text
|
||||
language:
|
||||
- multilingual
|
||||
tags:
|
||||
- deepseek
|
||||
- vision-language
|
||||
- ocr
|
||||
- custom_code
|
||||
license: mit
|
||||
---
|
||||
### 当前模型的贡献者未提供更加详细的模型介绍。模型文件和权重,可浏览“模型文件”页面获取。
|
||||
#### 您可以通过如下git clone命令,或者ModelScope SDK来下载模型
|
||||
<div align="center">
|
||||
<img src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" width="60%" alt="DeepSeek AI" />
|
||||
</div>
|
||||
<hr>
|
||||
<div align="center">
|
||||
<a href="https://www.deepseek.com/" target="_blank">
|
||||
<img alt="Homepage" src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/badge.svg?raw=true" />
|
||||
</a>
|
||||
<a href="https://huggingface.co/deepseek-ai/DeepSeek-OCR" target="_blank">
|
||||
<img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-DeepSeek%20AI-ffc107?color=ffc107&logoColor=white" />
|
||||
</a>
|
||||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
|
||||
<a href="https://discord.gg/Tc7c45Zzu5" target="_blank">
|
||||
<img alt="Discord" src="https://img.shields.io/badge/Discord-DeepSeek%20AI-7289da?logo=discord&logoColor=white&color=7289da" />
|
||||
</a>
|
||||
<a href="https://twitter.com/deepseek_ai" target="_blank">
|
||||
<img alt="Twitter Follow" src="https://img.shields.io/badge/Twitter-deepseek_ai-white?logo=x&logoColor=white" />
|
||||
</a>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<p align="center">
|
||||
<a href="https://github.com/deepseek-ai/DeepSeek-OCR"><b>🌟 Github</b></a> |
|
||||
<a href="https://huggingface.co/deepseek-ai/DeepSeek-OCR"><b>📥 Model Download</b></a> |
|
||||
<a href="https://github.com/deepseek-ai/DeepSeek-OCR/blob/main/DeepSeek_OCR_paper.pdf"><b>📄 Paper Link</b></a> |
|
||||
<a href=""><b>📄 Arxiv Paper Link</b></a> |
|
||||
</p>
|
||||
<h2>
|
||||
<p align="center">
|
||||
<a href="">DeepSeek-OCR: Contexts Optical Compression</a>
|
||||
</p>
|
||||
</h2>
|
||||
<p align="center">
|
||||
<img src="assets/fig1.png" style="width: 1000px" align=center>
|
||||
</p>
|
||||
<p align="center">
|
||||
<a href="">Explore the boundaries of visual-text compression.</a>
|
||||
</p>
|
||||
|
||||
## Usage
|
||||
Inference using Huggingface transformers on NVIDIA GPUs. Requirements tested on python 3.12.9 + CUDA11.8:
|
||||
|
||||
SDK下载
|
||||
```bash
|
||||
#安装ModelScope
|
||||
pip install modelscope
|
||||
```
|
||||
torch==2.6.0
|
||||
transformers==4.46.3
|
||||
tokenizers==0.20.3
|
||||
einops
|
||||
addict
|
||||
easydict
|
||||
pip install flash-attn==2.7.3 --no-build-isolation
|
||||
```
|
||||
|
||||
```python
|
||||
#SDK模型下载
|
||||
from modelscope import snapshot_download
|
||||
model_dir = snapshot_download('deepseek-ai/DeepSeek-OCR')
|
||||
```
|
||||
Git下载
|
||||
```
|
||||
#Git模型下载
|
||||
git clone https://www.modelscope.cn/deepseek-ai/DeepSeek-OCR.git
|
||||
from transformers import AutoModel, AutoTokenizer
|
||||
import torch
|
||||
import os
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
|
||||
model_name = 'deepseek-ai/DeepSeek-OCR'
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained(model_name, _attn_implementation='flash_attention_2', trust_remote_code=True, use_safetensors=True)
|
||||
model = model.eval().cuda().to(torch.bfloat16)
|
||||
|
||||
# prompt = "<image>\nFree OCR. "
|
||||
prompt = "<image>\n<|grounding|>Convert the document to markdown. "
|
||||
image_file = 'your_image.jpg'
|
||||
output_path = 'your/output/dir'
|
||||
|
||||
# infer(self, tokenizer, prompt='', image_file='', output_path = ' ', base_size = 1024, image_size = 640, crop_mode = True, test_compress = False, save_results = False):
|
||||
|
||||
# Tiny: base_size = 512, image_size = 512, crop_mode = False
|
||||
# Small: base_size = 640, image_size = 640, crop_mode = False
|
||||
# Base: base_size = 1024, image_size = 1024, crop_mode = False
|
||||
# Large: base_size = 1280, image_size = 1280, crop_mode = False
|
||||
|
||||
# Gundam: base_size = 1024, image_size = 640, crop_mode = True
|
||||
|
||||
res = model.infer(tokenizer, prompt=prompt, image_file=image_file, output_path = output_path, base_size = 1024, image_size = 640, crop_mode=True, save_results = True, test_compress = True)
|
||||
```
|
||||
|
||||
<p style="color: lightgrey;">如果您是本模型的贡献者,我们邀请您根据<a href="https://modelscope.cn/docs/ModelScope%E6%A8%A1%E5%9E%8B%E6%8E%A5%E5%85%A5%E6%B5%81%E7%A8%8B%E6%A6%82%E8%A7%88" style="color: lightgrey; text-decoration: underline;">模型贡献文档</a>,及时完善模型卡片内容。</p>
|
||||
## vLLM
|
||||
Refer to [🌟GitHub](https://github.com/deepseek-ai/DeepSeek-OCR/) for guidance on model inference acceleration and PDF processing, etc.<!-- -->
|
||||
|
||||
## Visualizations
|
||||
<table>
|
||||
<tr>
|
||||
<td><img src="assets/show1.jpg" style="width: 500px"></td>
|
||||
<td><img src="assets/show2.jpg" style="width: 500px"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><img src="assets/show3.jpg" style="width: 500px"></td>
|
||||
<td><img src="assets/show4.jpg" style="width: 500px"></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
||||
## Acknowledgement
|
||||
|
||||
We would like to thank [Vary](https://github.com/Ucas-HaoranWei/Vary/), [GOT-OCR2.0](https://github.com/Ucas-HaoranWei/GOT-OCR2.0/), [MinerU](https://github.com/opendatalab/MinerU), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [OneChart](https://github.com/LingyvKong/OneChart), [Slow Perception](https://github.com/Ucas-HaoranWei/Slow-Perception) for their valuable models and ideas.
|
||||
|
||||
We also appreciate the benchmarks: [Fox](https://github.com/ucaslcl/Fox), [OminiDocBench](https://github.com/opendatalab/OmniDocBench).
|
||||
|
||||
|
||||
## Citation
|
||||
Coming soon!
|
||||
|
||||
BIN
assets/fig1.png
Normal file
BIN
assets/fig1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 387 KiB |
BIN
assets/show1.jpg
Normal file
BIN
assets/show1.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 114 KiB |
BIN
assets/show2.jpg
Normal file
BIN
assets/show2.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 211 KiB |
BIN
assets/show3.jpg
Normal file
BIN
assets/show3.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 241 KiB |
BIN
assets/show4.jpg
Normal file
BIN
assets/show4.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 262 KiB |
118
config.json
Normal file
118
config.json
Normal file
@ -0,0 +1,118 @@
|
||||
{
|
||||
"_name_or_path": "deepseek-ai/DeepSeek-OCR",
|
||||
"candidate_resolutions": [
|
||||
[
|
||||
1024,
|
||||
1024
|
||||
]
|
||||
],
|
||||
"global_view_pos": "head",
|
||||
"architectures": [
|
||||
"DeepseekOCRForCausalLM"
|
||||
],
|
||||
"auto_map": {
|
||||
"AutoConfig": "modeling_deepseekocr.DeepseekOCRConfig",
|
||||
"AutoModel": "modeling_deepseekocr.DeepseekOCRForCausalLM"
|
||||
},
|
||||
"language_config": {
|
||||
"architectures": [
|
||||
"DeepseekV2ForCausalLM"
|
||||
],
|
||||
"auto_map": {
|
||||
"AutoConfig": "configuration_deepseekv2.DeepseekV2Config",
|
||||
"AutoModel": "modeling_deepseek.DeepseekV2Model",
|
||||
"AutoModelForCausalLM": "modeling_deepseek.DeepseekV2ForCausalLM"
|
||||
},
|
||||
"bos_token_id": 0,
|
||||
"eos_token_id": 1,
|
||||
"first_k_dense_replace": 1,
|
||||
"hidden_size": 1280,
|
||||
"intermediate_size": 6848,
|
||||
"kv_lora_rank": null,
|
||||
"lm_head": true,
|
||||
"max_position_embeddings": 8192,
|
||||
"moe_intermediate_size": 896,
|
||||
"n_group": 1,
|
||||
"n_routed_experts": 64,
|
||||
"n_shared_experts": 2,
|
||||
"num_attention_heads": 10,
|
||||
"num_experts_per_tok": 6,
|
||||
"num_hidden_layers": 12,
|
||||
"num_key_value_heads": 10,
|
||||
"q_lora_rank": null,
|
||||
"qk_nope_head_dim": 0,
|
||||
"qk_rope_head_dim": 0,
|
||||
"rm_head": false,
|
||||
"topk_group": 1,
|
||||
"topk_method": "greedy",
|
||||
"torch_dtype": "bfloat16",
|
||||
"use_mla": false,
|
||||
"v_head_dim": 0,
|
||||
"vocab_size": 129280
|
||||
},
|
||||
"model_type": "deepseek_vl_v2",
|
||||
"projector_config": {
|
||||
"input_dim": 2048,
|
||||
"model_type": "mlp_projector",
|
||||
"n_embed": 1280,
|
||||
"projector_type": "linear"
|
||||
},
|
||||
"tile_tag": "2D",
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.46.3",
|
||||
"vision_config": {
|
||||
"image_size": 1024,
|
||||
"mlp_ratio": 3.7362,
|
||||
"model_name": "deeplip_b_l",
|
||||
"model_type": "vision",
|
||||
"width": {
|
||||
"clip-l-14-224": {
|
||||
"heads": 16,
|
||||
"image_size": 224,
|
||||
"layers": 24,
|
||||
"patch_size": 14,
|
||||
"width": 1024
|
||||
},
|
||||
"sam_vit_b": {
|
||||
"downsample_channels": [
|
||||
512,
|
||||
1024
|
||||
],
|
||||
"global_attn_indexes": [
|
||||
2,
|
||||
5,
|
||||
8,
|
||||
11
|
||||
],
|
||||
"heads": 12,
|
||||
"layers": 12,
|
||||
"width": 768
|
||||
}
|
||||
}
|
||||
},
|
||||
"bos_token_id": 0,
|
||||
"eos_token_id": 1,
|
||||
"first_k_dense_replace": 1,
|
||||
"hidden_size": 1280,
|
||||
"intermediate_size": 6848,
|
||||
"kv_lora_rank": null,
|
||||
"lm_head": true,
|
||||
"max_position_embeddings": 8192,
|
||||
"moe_intermediate_size": 896,
|
||||
"n_group": 1,
|
||||
"n_routed_experts": 64,
|
||||
"n_shared_experts": 2,
|
||||
"num_attention_heads": 10,
|
||||
"num_experts_per_tok": 6,
|
||||
"num_hidden_layers": 12,
|
||||
"num_key_value_heads": 10,
|
||||
"q_lora_rank": null,
|
||||
"qk_nope_head_dim": 0,
|
||||
"qk_rope_head_dim": 0,
|
||||
"rm_head": false,
|
||||
"topk_group": 1,
|
||||
"topk_method": "greedy",
|
||||
"use_mla": false,
|
||||
"v_head_dim": 0,
|
||||
"vocab_size": 129280
|
||||
}
|
||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@ -0,0 +1 @@
|
||||
{"framework": "pytorch", "task": "image-text-to-text", "allow_remote": true}
|
||||
210
configuration_deepseek_v2.py
Normal file
210
configuration_deepseek_v2.py
Normal file
@ -0,0 +1,210 @@
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
from transformers.utils import logging
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
DEEPSEEK_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
|
||||
class DeepseekV2Config(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a [`DeepseekV2Model`]. It is used to instantiate an DeepSeek
|
||||
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
||||
defaults will yield a similar configuration to that of the DeepSeek-V2 with multi-latent attention.
|
||||
|
||||
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
||||
documentation from [`PretrainedConfig`] for more information.
|
||||
|
||||
|
||||
Args:
|
||||
vocab_size (`int`, *optional*, defaults to 102400):
|
||||
Vocabulary size of the Deep model. Defines the number of different tokens that can be represented by the
|
||||
`inputs_ids` passed when calling [`DeepseekV2Model`]
|
||||
hidden_size (`int`, *optional*, defaults to 4096):
|
||||
Dimension of the hidden representations.
|
||||
intermediate_size (`int`, *optional*, defaults to 11008):
|
||||
Dimension of the MLP representations.
|
||||
moe_intermediate_size (`int`, *optional*, defaults to 1407):
|
||||
Dimension of the MoE representations.
|
||||
num_hidden_layers (`int`, *optional*, defaults to 32):
|
||||
Number of hidden layers in the Transformer decoder.
|
||||
num_attention_heads (`int`, *optional*, defaults to 32):
|
||||
Number of attention heads for each attention layer in the Transformer decoder.
|
||||
n_shared_experts (`int`, *optional*, defaults to None):
|
||||
Number of shared experts, None means dense model.
|
||||
n_routed_experts (`int`, *optional*, defaults to None):
|
||||
Number of routed experts, None means dense model.
|
||||
routed_scaling_factor (`float`, *optional*, defaults to 1.0):
|
||||
Scaling factor or routed experts.
|
||||
topk_method (`str`, *optional*, defaults to `gready`):
|
||||
Topk method used in routed gate.
|
||||
n_group (`int`, *optional*, defaults to None):
|
||||
Number of groups for routed experts.
|
||||
topk_group (`int`, *optional*, defaults to None):
|
||||
Number of selected groups for each token(for each token, ensuring the selected experts is only within `topk_group` groups).
|
||||
num_experts_per_tok (`int`, *optional*, defaults to None):
|
||||
Number of selected experts, None means dense model.
|
||||
moe_layer_freq (`int`, *optional*, defaults to 1):
|
||||
The frequency of the MoE layer: one expert layer for every `moe_layer_freq - 1` dense layers.
|
||||
first_k_dense_replace (`int`, *optional*, defaults to 0):
|
||||
Number of dense layers in shallow layers(embed->dense->dense->...->dense->moe->moe...->lm_head).
|
||||
\--k dense layers--/
|
||||
norm_topk_prob (`bool`, *optional*, defaults to False):
|
||||
Whether to normalize the weights of the routed experts.
|
||||
scoring_func (`str`, *optional*, defaults to 'softmax'):
|
||||
Method of computing expert weights.
|
||||
aux_loss_alpha (`float`, *optional*, defaults to 0.001):
|
||||
Auxiliary loss weight coefficient.
|
||||
seq_aux = (`bool`, *optional*, defaults to True):
|
||||
Whether to compute the auxiliary loss for each individual sample.
|
||||
num_key_value_heads (`int`, *optional*):
|
||||
This is the number of key_value heads that should be used to implement Grouped Query Attention. If
|
||||
`num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
|
||||
`num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
|
||||
converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
|
||||
by meanpooling all the original heads within that group. For more details checkout [this
|
||||
paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
|
||||
`num_attention_heads`.
|
||||
hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
|
||||
The non-linear activation function (function or string) in the decoder.
|
||||
max_position_embeddings (`int`, *optional*, defaults to 2048):
|
||||
The maximum sequence length that this model might ever be used with.
|
||||
initializer_range (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
rms_norm_eps (`float`, *optional*, defaults to 1e-06):
|
||||
The epsilon used by the rms normalization layers.
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not the model should return the last key/values attentions (not used by all models). Only
|
||||
relevant if `config.is_decoder=True`.
|
||||
pad_token_id (`int`, *optional*):
|
||||
Padding token id.
|
||||
bos_token_id (`int`, *optional*, defaults to 1):
|
||||
Beginning of stream token id.
|
||||
eos_token_id (`int`, *optional*, defaults to 2):
|
||||
End of stream token id.
|
||||
pretraining_tp (`int`, *optional*, defaults to 1):
|
||||
Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
|
||||
document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is
|
||||
necessary to ensure exact reproducibility of the pretraining results. Please refer to [this
|
||||
issue](https://github.com/pytorch/pytorch/issues/76232).
|
||||
tie_word_embeddings (`bool`, *optional*, defaults to `False`):
|
||||
Whether to tie weight embeddings
|
||||
rope_theta (`float`, *optional*, defaults to 10000.0):
|
||||
The base period of the RoPE embeddings.
|
||||
rope_scaling (`Dict`, *optional*):
|
||||
Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
|
||||
strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
|
||||
`{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
|
||||
`max_position_embeddings` to the expected new maximum.
|
||||
attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
|
||||
Whether to use a bias in the query, key, value and output projection layers during self-attention.
|
||||
attention_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout ratio for the attention probabilities.
|
||||
use_mla (`bool`, *optional*, defaults to `True`): Use multi-latent attention or multi-head attention. If True,
|
||||
the model will use multi-latent attention, otherwise, it will use multi-head attention.
|
||||
|
||||
```python
|
||||
>>> from transformers import DeepseekV2Model, DeepseekV2Config
|
||||
|
||||
>>> # Initializing a Deepseek-V2 style configuration
|
||||
>>> configuration = DeepseekV2Config()
|
||||
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "deepseek_v2"
|
||||
keys_to_ignore_at_inference = ["past_key_values"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_size=102400,
|
||||
hidden_size=4096,
|
||||
intermediate_size=11008,
|
||||
moe_intermediate_size = 1407,
|
||||
num_hidden_layers=30,
|
||||
num_attention_heads=32,
|
||||
num_key_value_heads=32,
|
||||
n_shared_experts = None,
|
||||
n_routed_experts = None,
|
||||
ep_size = 1,
|
||||
routed_scaling_factor = 1.0,
|
||||
kv_lora_rank = 512,
|
||||
q_lora_rank = 1536,
|
||||
qk_rope_head_dim = 64,
|
||||
v_head_dim = 128,
|
||||
qk_nope_head_dim = 128,
|
||||
topk_method = 'gready',
|
||||
n_group = None,
|
||||
topk_group = None,
|
||||
num_experts_per_tok = None,
|
||||
moe_layer_freq = 1,
|
||||
first_k_dense_replace = 0,
|
||||
norm_topk_prob = False,
|
||||
scoring_func = 'softmax',
|
||||
aux_loss_alpha = 0.001,
|
||||
seq_aux = True,
|
||||
hidden_act="silu",
|
||||
max_position_embeddings=2048,
|
||||
initializer_range=0.02,
|
||||
rms_norm_eps=1e-6,
|
||||
use_cache=True,
|
||||
pad_token_id=None,
|
||||
bos_token_id=100000,
|
||||
eos_token_id=100001,
|
||||
pretraining_tp=1,
|
||||
tie_word_embeddings=False,
|
||||
rope_theta=10000.0,
|
||||
rope_scaling=None,
|
||||
attention_bias=False,
|
||||
attention_dropout=0.0,
|
||||
use_mla=True,
|
||||
**kwargs,
|
||||
):
|
||||
self.vocab_size = vocab_size
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.hidden_size = hidden_size
|
||||
self.intermediate_size = intermediate_size
|
||||
self.moe_intermediate_size = moe_intermediate_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.n_shared_experts = n_shared_experts
|
||||
self.n_routed_experts = n_routed_experts
|
||||
self.ep_size = ep_size
|
||||
self.routed_scaling_factor = routed_scaling_factor
|
||||
self.kv_lora_rank = kv_lora_rank
|
||||
self.q_lora_rank = q_lora_rank
|
||||
self.qk_rope_head_dim = qk_rope_head_dim
|
||||
self.v_head_dim = v_head_dim
|
||||
self.qk_nope_head_dim = qk_nope_head_dim
|
||||
self.topk_method = topk_method
|
||||
self.n_group = n_group
|
||||
self.topk_group = topk_group
|
||||
self.num_experts_per_tok = num_experts_per_tok
|
||||
self.moe_layer_freq = moe_layer_freq
|
||||
self.first_k_dense_replace = first_k_dense_replace
|
||||
self.norm_topk_prob = norm_topk_prob
|
||||
self.scoring_func = scoring_func
|
||||
self.aux_loss_alpha = aux_loss_alpha
|
||||
self.seq_aux = seq_aux
|
||||
# for backward compatibility
|
||||
if num_key_value_heads is None:
|
||||
num_key_value_heads = num_attention_heads
|
||||
|
||||
self.num_key_value_heads = num_key_value_heads
|
||||
self.hidden_act = hidden_act
|
||||
self.initializer_range = initializer_range
|
||||
self.rms_norm_eps = float(rms_norm_eps)
|
||||
self.pretraining_tp = pretraining_tp
|
||||
self.use_cache = use_cache
|
||||
self.rope_theta = rope_theta
|
||||
self.rope_scaling = rope_scaling
|
||||
self.attention_bias = attention_bias
|
||||
self.attention_dropout = attention_dropout
|
||||
self.use_mla = use_mla
|
||||
|
||||
super().__init__(
|
||||
pad_token_id=pad_token_id,
|
||||
bos_token_id=bos_token_id,
|
||||
eos_token_id=eos_token_id,
|
||||
tie_word_embeddings=tie_word_embeddings,
|
||||
**kwargs,
|
||||
)
|
||||
280
conversation.py
Normal file
280
conversation.py
Normal file
@ -0,0 +1,280 @@
|
||||
"""
|
||||
From https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
|
||||
"""
|
||||
|
||||
import dataclasses
|
||||
from enum import IntEnum, auto
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
class SeparatorStyle(IntEnum):
|
||||
"""Separator styles."""
|
||||
|
||||
DeepSeek = auto()
|
||||
DeepSeekV2 = auto()
|
||||
PLAIN = auto()
|
||||
ALIGNMENT = auto()
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Conversation:
|
||||
"""A class that manages prompt templates and keeps all conversation history."""
|
||||
|
||||
# The name of this template
|
||||
name: str
|
||||
# The template of the system prompt
|
||||
system_template: str = "{system_message}"
|
||||
# The system message
|
||||
system_message: str = ""
|
||||
# The names of two roles
|
||||
roles: List[str] = (("USER", "ASSISTANT"),)
|
||||
# All messages. Each item is (role, message).
|
||||
messages: List[List[str]] = ()
|
||||
# The number of few shot examples
|
||||
offset: int = 0
|
||||
# The separator style and configurations
|
||||
sep_style: SeparatorStyle = SeparatorStyle.DeepSeek
|
||||
sep: str = "\n"
|
||||
sep2: str = None
|
||||
# Stop criteria (the default one is EOS token)
|
||||
stop_str: str = None
|
||||
# Stops generation if meeting any token in this list
|
||||
stop_token_ids: List[int] = None
|
||||
|
||||
def get_prompt(self) -> str:
|
||||
"""Get the prompt for generation."""
|
||||
system_prompt = self.system_template.format(system_message=self.system_message)
|
||||
if self.sep_style == SeparatorStyle.DeepSeek:
|
||||
seps = [self.sep, self.sep2]
|
||||
if system_prompt == "" or system_prompt is None:
|
||||
ret = ""
|
||||
else:
|
||||
ret = system_prompt + seps[0]
|
||||
for i, (role, message) in enumerate(self.messages):
|
||||
if message:
|
||||
ret += role + ": " + message + seps[i % 2]
|
||||
else:
|
||||
ret += role + ":"
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.DeepSeekV2:
|
||||
seps = [self.sep, self.sep2]
|
||||
if system_prompt == "" or system_prompt is None:
|
||||
ret = ""
|
||||
else:
|
||||
ret = system_prompt + seps[0]
|
||||
for i, (role, message) in enumerate(self.messages):
|
||||
if message:
|
||||
if role == "User":
|
||||
ret += "<|sft▁begin|>\n" + message + self.sep #<|sft▁begin|>User Input<|sft▁end|>\nResponse<|end▁of▁sentence|>
|
||||
else:
|
||||
ret += message + self.sep2
|
||||
else:
|
||||
ret = ret
|
||||
return ret
|
||||
|
||||
elif self.sep_style == SeparatorStyle.PLAIN:
|
||||
seps = [self.sep, self.sep2]
|
||||
ret = ""
|
||||
for i, (role, message) in enumerate(self.messages):
|
||||
if message:
|
||||
if type(message) is tuple:
|
||||
message, _, _ = message
|
||||
if i % 2 == 0:
|
||||
ret += message + seps[i % 2]
|
||||
else:
|
||||
ret += message + seps[i % 2]
|
||||
else:
|
||||
ret += ""
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.ALIGNMENT:
|
||||
seps = [self.sep, self.sep2]
|
||||
ret = ""
|
||||
for i, (role, message) in enumerate(self.messages):
|
||||
if message:
|
||||
if type(message) is tuple:
|
||||
message, _, _ = message
|
||||
if i % 2 == 0:
|
||||
ret += '<image>\n' + seps[i % 2]
|
||||
else:
|
||||
ret += message + seps[i % 2]
|
||||
else:
|
||||
ret += ""
|
||||
return ret
|
||||
else:
|
||||
raise ValueError(f"Invalid style: {self.sep_style}")
|
||||
|
||||
def set_system_message(self, system_message: str):
|
||||
"""Set the system message."""
|
||||
self.system_message = system_message
|
||||
|
||||
def append_message(self, role: str, message: str):
|
||||
"""Append a new message."""
|
||||
self.messages.append([role, message])
|
||||
|
||||
def update_last_message(self, message: str):
|
||||
"""Update the last output.
|
||||
|
||||
The last message is typically set to be None when constructing the prompt,
|
||||
so we need to update it in-place after getting the response from a model.
|
||||
"""
|
||||
self.messages[-1][1] = message
|
||||
|
||||
def reset_message(self):
|
||||
"""Reset a new message."""
|
||||
self.messages = []
|
||||
|
||||
def to_gradio_chatbot(self):
|
||||
"""Convert the conversation to gradio chatbot format."""
|
||||
ret = []
|
||||
for i, (role, msg) in enumerate(self.messages[self.offset :]):
|
||||
if i % 2 == 0:
|
||||
ret.append([msg, None])
|
||||
else:
|
||||
ret[-1][-1] = msg
|
||||
return ret
|
||||
|
||||
def to_openai_api_messages(self):
|
||||
"""Convert the conversation to OpenAI chat completion format."""
|
||||
system_prompt = self.system_template.format(system_message=self.system_message)
|
||||
ret = [{"role": "system", "content": system_prompt}]
|
||||
|
||||
for i, (_, msg) in enumerate(self.messages[self.offset :]):
|
||||
if i % 2 == 0:
|
||||
ret.append({"role": "user", "content": msg})
|
||||
else:
|
||||
if msg is not None:
|
||||
ret.append({"role": "assistant", "content": msg})
|
||||
return ret
|
||||
|
||||
def copy(self):
|
||||
return Conversation(
|
||||
name=self.name,
|
||||
system_template=self.system_template,
|
||||
system_message=self.system_message,
|
||||
roles=self.roles,
|
||||
messages=[[x, y] for x, y in self.messages],
|
||||
offset=self.offset,
|
||||
sep_style=self.sep_style,
|
||||
sep=self.sep,
|
||||
sep2=self.sep2,
|
||||
stop_str=self.stop_str,
|
||||
stop_token_ids=self.stop_token_ids,
|
||||
)
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"template_name": self.name,
|
||||
"system_message": self.system_message,
|
||||
"roles": self.roles,
|
||||
"messages": self.messages,
|
||||
"offset": self.offset,
|
||||
}
|
||||
|
||||
|
||||
# A global registry for all conversation templates
|
||||
conv_templates: Dict[str, Conversation] = {}
|
||||
|
||||
|
||||
def register_conv_template(template: Conversation, override: bool = False):
|
||||
"""Register a new conversation template."""
|
||||
if not override:
|
||||
assert template.name not in conv_templates, f"{template.name} has been registered."
|
||||
|
||||
conv_templates[template.name] = template
|
||||
|
||||
|
||||
def get_conv_template(name: str) -> Conversation:
|
||||
"""Get a conversation template."""
|
||||
return conv_templates[name].copy()
|
||||
|
||||
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
name="deepseek",
|
||||
system_template="{system_message}",
|
||||
# system_message="You are a helpful assistant. Please answer truthfully and write out your "
|
||||
# "thinking step by step to be sure you get the right answer.",
|
||||
system_message="",
|
||||
roles=("<|User|>", "<|Assistant|>"),
|
||||
messages=(),
|
||||
offset=0,
|
||||
sep_style=SeparatorStyle.DeepSeek,
|
||||
sep="\n\n",
|
||||
sep2="<|end▁of▁sentence|>",
|
||||
stop_token_ids=[100001],
|
||||
stop_str=["User:", "<|end▁of▁sentence|>"]
|
||||
)
|
||||
)
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
name="deepseekv2",
|
||||
system_template="{system_message}",
|
||||
# system_message="You are a helpful assistant. Please answer truthfully and write out your "
|
||||
# "thinking step by step to be sure you get the right answer.",
|
||||
system_message="",
|
||||
roles=("<|User|>", "<|Assistant|>"),
|
||||
messages=(),
|
||||
offset=0,
|
||||
sep_style=SeparatorStyle.DeepSeek,
|
||||
sep="",
|
||||
sep2="<|end▁of▁sentence|>",
|
||||
stop_token_ids=[100001],
|
||||
stop_str=["User:", "<|end▁of▁sentence|>"]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
name="plain",
|
||||
system_template="",
|
||||
system_message="",
|
||||
roles=("", ""),
|
||||
messages=(),
|
||||
offset=0,
|
||||
sep_style=SeparatorStyle.PLAIN,
|
||||
sep="",
|
||||
sep2="",
|
||||
stop_token_ids=[100001],
|
||||
stop_str=['</s>'],
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
name="alignment",
|
||||
system_template="",
|
||||
system_message="",
|
||||
roles=("", ""),
|
||||
messages=(),
|
||||
offset=0,
|
||||
sep_style=SeparatorStyle.ALIGNMENT,
|
||||
sep="",
|
||||
sep2="",
|
||||
stop_token_ids=[100001],
|
||||
stop_str=['</s>'],
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("deepseek template:")
|
||||
conv = get_conv_template("deepseek")
|
||||
conv.append_message(conv.roles[0], "Hello!")
|
||||
conv.append_message(conv.roles[1], "Hi! This is Tony.")
|
||||
conv.append_message(conv.roles[0], "Who are you?")
|
||||
conv.append_message(conv.roles[1], "I am a helpful assistant.")
|
||||
conv.append_message(conv.roles[0], "How are you?")
|
||||
conv.append_message(conv.roles[1], None)
|
||||
print(conv.get_prompt())
|
||||
|
||||
print("deepseekv2 template:")
|
||||
conv = get_conv_template("deepseekv2")
|
||||
conv.append_message(conv.roles[0], "Hello!")
|
||||
conv.append_message(conv.roles[1], "Hi! This is Tony.")
|
||||
conv.append_message(conv.roles[0], "Who are you?")
|
||||
conv.append_message(conv.roles[1], "I am a helpful assistant.")
|
||||
conv.append_message(conv.roles[0], "How are you?")
|
||||
conv.append_message(conv.roles[1], None)
|
||||
print(conv.get_prompt())
|
||||
1058
deepencoder.py
Normal file
1058
deepencoder.py
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-000001.safetensors
Normal file
3
model-00001-of-000001.safetensors
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1169e7cdc28ff2fb6186556acb2175db148ad26a62097df4c45a17e523180d3f
|
||||
size 6672547120
|
||||
2717
model.safetensors.index.json
Normal file
2717
model.safetensors.index.json
Normal file
File diff suppressed because it is too large
Load Diff
1037
modeling_deepseekocr.py
Normal file
1037
modeling_deepseekocr.py
Normal file
File diff suppressed because it is too large
Load Diff
1992
modeling_deepseekv2.py
Normal file
1992
modeling_deepseekv2.py
Normal file
File diff suppressed because it is too large
Load Diff
28
processor_config.json
Normal file
28
processor_config.json
Normal file
@ -0,0 +1,28 @@
|
||||
{
|
||||
"add_special_token": false,
|
||||
"candidate_resolutions": [
|
||||
[
|
||||
1024,
|
||||
1024
|
||||
]
|
||||
],
|
||||
"downsample_ratio": 4,
|
||||
"ignore_id": -100,
|
||||
"image_mean": [
|
||||
0.5,
|
||||
0.5,
|
||||
0.5
|
||||
],
|
||||
"image_std": [
|
||||
0.5,
|
||||
0.5,
|
||||
0.5
|
||||
],
|
||||
"image_token": "<image>",
|
||||
"mask_prompt": false,
|
||||
"normalize": true,
|
||||
"pad_token": "<\uff5c\u2581pad\u2581\uff5c>",
|
||||
"patch_size": 16,
|
||||
"processor_class": "DeepseekVLV2Processor",
|
||||
"sft_format": "deepseek"
|
||||
}
|
||||
39
special_tokens_map.json
Normal file
39
special_tokens_map.json
Normal file
@ -0,0 +1,39 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
{
|
||||
"content": "<|User|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
{
|
||||
"content": "<|Assistant|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
],
|
||||
"bos_token": {
|
||||
"content": "<|begin▁of▁sentence|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "<|end▁of▁sentence|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|▁pad▁|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a02f8fd5228c90256bb4f6554c34a579d48f909e5beb232dc4afad870b55a8b4
|
||||
size 9979544
|
||||
6661
tokenizer_config.json
Normal file
6661
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user