mirror of
https://www.modelscope.cn/deepseek-ai/DeepSeek-OCR.git
synced 2026-04-02 21:02:54 +08:00
Upload folder using ModelScope SDK
This commit is contained in:
3
.gitattributes
vendored
3
.gitattributes
vendored
@ -45,3 +45,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|||||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
|
||||||
|
model-00001-of-000001.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||||
122
.ipynb_checkpoints/README-checkpoint.md
Normal file
122
.ipynb_checkpoints/README-checkpoint.md
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
---
|
||||||
|
pipeline_tag: image-text-to-text
|
||||||
|
language:
|
||||||
|
- multilingual
|
||||||
|
tags:
|
||||||
|
- deepseek
|
||||||
|
- vision-language
|
||||||
|
- ocr
|
||||||
|
- custom_code
|
||||||
|
license: mit
|
||||||
|
---
|
||||||
|
<div align="center">
|
||||||
|
<img src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" width="60%" alt="DeepSeek AI" />
|
||||||
|
</div>
|
||||||
|
<hr>
|
||||||
|
<div align="center">
|
||||||
|
<a href="https://www.deepseek.com/" target="_blank">
|
||||||
|
<img alt="Homepage" src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/badge.svg?raw=true" />
|
||||||
|
</a>
|
||||||
|
<a href="https://huggingface.co/deepseek-ai/DeepSeek-OCR" target="_blank">
|
||||||
|
<img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-DeepSeek%20AI-ffc107?color=ffc107&logoColor=white" />
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div align="center">
|
||||||
|
|
||||||
|
<a href="https://discord.gg/Tc7c45Zzu5" target="_blank">
|
||||||
|
<img alt="Discord" src="https://img.shields.io/badge/Discord-DeepSeek%20AI-7289da?logo=discord&logoColor=white&color=7289da" />
|
||||||
|
</a>
|
||||||
|
<a href="https://twitter.com/deepseek_ai" target="_blank">
|
||||||
|
<img alt="Twitter Follow" src="https://img.shields.io/badge/Twitter-deepseek_ai-white?logo=x&logoColor=white" />
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<a href="https://github.com/deepseek-ai/DeepSeek-OCR"><b>🌟 Github</b></a> |
|
||||||
|
<a href="https://huggingface.co/deepseek-ai/DeepSeek-OCR"><b>📥 Model Download</b></a> |
|
||||||
|
<a href="https://github.com/deepseek-ai/DeepSeek-OCR/blob/main/DeepSeek_OCR_paper.pdf"><b>📄 Paper Link</b></a> |
|
||||||
|
<a href=""><b>📄 Arxiv Paper Link</b></a> |
|
||||||
|
</p>
|
||||||
|
<h2>
|
||||||
|
<p align="center">
|
||||||
|
<a href="">DeepSeek-OCR: Contexts Optical Compression</a>
|
||||||
|
</p>
|
||||||
|
</h2>
|
||||||
|
<p align="center">
|
||||||
|
<img src="assets/fig1.png" style="width: 1000px" align=center>
|
||||||
|
</p>
|
||||||
|
<p align="center">
|
||||||
|
<a href="">Explore the boundaries of visual-text compression.</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
Inference using Huggingface transformers on NVIDIA GPUs. Requirements tested on python 3.12.9 + CUDA11.8:
|
||||||
|
|
||||||
|
```
|
||||||
|
torch==2.6.0
|
||||||
|
transformers==4.46.3
|
||||||
|
tokenizers==0.20.3
|
||||||
|
einops
|
||||||
|
addict
|
||||||
|
easydict
|
||||||
|
pip install flash-attn==2.7.3 --no-build-isolation
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
from transformers import AutoModel, AutoTokenizer
|
||||||
|
import torch
|
||||||
|
import os
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
|
||||||
|
model_name = 'deepseek-ai/DeepSeek-OCR'
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||||
|
model = AutoModel.from_pretrained(model_name, _attn_implementation='flash_attention_2', trust_remote_code=True, use_safetensors=True)
|
||||||
|
model = model.eval().cuda().to(torch.bfloat16)
|
||||||
|
|
||||||
|
# prompt = "<image>\nFree OCR. "
|
||||||
|
prompt = "<image>\n<|grounding|>Convert the document to markdown. "
|
||||||
|
image_file = 'your_image.jpg'
|
||||||
|
output_path = 'your/output/dir'
|
||||||
|
|
||||||
|
# infer(self, tokenizer, prompt='', image_file='', output_path = ' ', base_size = 1024, image_size = 640, crop_mode = True, test_compress = False, save_results = False):
|
||||||
|
|
||||||
|
# Tiny: base_size = 512, image_size = 512, crop_mode = False
|
||||||
|
# Small: base_size = 640, image_size = 640, crop_mode = False
|
||||||
|
# Base: base_size = 1024, image_size = 1024, crop_mode = False
|
||||||
|
# Large: base_size = 1280, image_size = 1280, crop_mode = False
|
||||||
|
|
||||||
|
# Gundam: base_size = 1024, image_size = 640, crop_mode = True
|
||||||
|
|
||||||
|
res = model.infer(tokenizer, prompt=prompt, image_file=image_file, output_path = output_path, base_size = 1024, image_size = 640, crop_mode=True, save_results = True, test_compress = True)
|
||||||
|
```
|
||||||
|
|
||||||
|
## vLLM
|
||||||
|
Refer to [🌟GitHub](https://github.com/deepseek-ai/DeepSeek-OCR/) for guidance on model inference acceleration and PDF processing, etc.<!-- -->
|
||||||
|
|
||||||
|
## Visualizations
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td><img src="assets/show1.jpg" style="width: 500px"></td>
|
||||||
|
<td><img src="assets/show2.jpg" style="width: 500px"></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><img src="assets/show3.jpg" style="width: 500px"></td>
|
||||||
|
<td><img src="assets/show4.jpg" style="width: 500px"></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
|
||||||
|
## Acknowledgement
|
||||||
|
|
||||||
|
We would like to thank [Vary](https://github.com/Ucas-HaoranWei/Vary/), [GOT-OCR2.0](https://github.com/Ucas-HaoranWei/GOT-OCR2.0/), [MinerU](https://github.com/opendatalab/MinerU), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [OneChart](https://github.com/LingyvKong/OneChart), [Slow Perception](https://github.com/Ucas-HaoranWei/Slow-Perception) for their valuable models and ideas.
|
||||||
|
|
||||||
|
We also appreciate the benchmarks: [Fox](https://github.com/ucaslcl/Fox), [OminiDocBench](https://github.com/opendatalab/OmniDocBench).
|
||||||
|
|
||||||
|
|
||||||
|
## Citation
|
||||||
|
Coming soon!
|
||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2023 DeepSeek
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
156
README.md
156
README.md
@ -1,48 +1,122 @@
|
|||||||
---
|
---
|
||||||
license: Apache License 2.0
|
pipeline_tag: image-text-to-text
|
||||||
tags: []
|
language:
|
||||||
|
- multilingual
|
||||||
#model-type:
|
tags:
|
||||||
##如 gpt、phi、llama、chatglm、baichuan 等
|
- deepseek
|
||||||
#- gpt
|
- vision-language
|
||||||
|
- ocr
|
||||||
#domain:
|
- custom_code
|
||||||
##如 nlp、cv、audio、multi-modal
|
license: mit
|
||||||
#- nlp
|
|
||||||
|
|
||||||
#language:
|
|
||||||
##语言代码列表 https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa
|
|
||||||
#- cn
|
|
||||||
|
|
||||||
#metrics:
|
|
||||||
##如 CIDEr、Blue、ROUGE 等
|
|
||||||
#- CIDEr
|
|
||||||
|
|
||||||
#tags:
|
|
||||||
##各种自定义,包括 pretrained、fine-tuned、instruction-tuned、RL-tuned 等训练方法和其他
|
|
||||||
#- pretrained
|
|
||||||
|
|
||||||
#tools:
|
|
||||||
##如 vllm、fastchat、llamacpp、AdaSeq 等
|
|
||||||
#- vllm
|
|
||||||
---
|
---
|
||||||
### 当前模型的贡献者未提供更加详细的模型介绍。模型文件和权重,可浏览“模型文件”页面获取。
|
<div align="center">
|
||||||
#### 您可以通过如下git clone命令,或者ModelScope SDK来下载模型
|
<img src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" width="60%" alt="DeepSeek AI" />
|
||||||
|
</div>
|
||||||
|
<hr>
|
||||||
|
<div align="center">
|
||||||
|
<a href="https://www.deepseek.com/" target="_blank">
|
||||||
|
<img alt="Homepage" src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/badge.svg?raw=true" />
|
||||||
|
</a>
|
||||||
|
<a href="https://huggingface.co/deepseek-ai/DeepSeek-OCR" target="_blank">
|
||||||
|
<img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-DeepSeek%20AI-ffc107?color=ffc107&logoColor=white" />
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div align="center">
|
||||||
|
|
||||||
|
<a href="https://discord.gg/Tc7c45Zzu5" target="_blank">
|
||||||
|
<img alt="Discord" src="https://img.shields.io/badge/Discord-DeepSeek%20AI-7289da?logo=discord&logoColor=white&color=7289da" />
|
||||||
|
</a>
|
||||||
|
<a href="https://twitter.com/deepseek_ai" target="_blank">
|
||||||
|
<img alt="Twitter Follow" src="https://img.shields.io/badge/Twitter-deepseek_ai-white?logo=x&logoColor=white" />
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<a href="https://github.com/deepseek-ai/DeepSeek-OCR"><b>🌟 Github</b></a> |
|
||||||
|
<a href="https://huggingface.co/deepseek-ai/DeepSeek-OCR"><b>📥 Model Download</b></a> |
|
||||||
|
<a href="https://github.com/deepseek-ai/DeepSeek-OCR/blob/main/DeepSeek_OCR_paper.pdf"><b>📄 Paper Link</b></a> |
|
||||||
|
<a href=""><b>📄 Arxiv Paper Link</b></a> |
|
||||||
|
</p>
|
||||||
|
<h2>
|
||||||
|
<p align="center">
|
||||||
|
<a href="">DeepSeek-OCR: Contexts Optical Compression</a>
|
||||||
|
</p>
|
||||||
|
</h2>
|
||||||
|
<p align="center">
|
||||||
|
<img src="assets/fig1.png" style="width: 1000px" align=center>
|
||||||
|
</p>
|
||||||
|
<p align="center">
|
||||||
|
<a href="">Explore the boundaries of visual-text compression.</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
Inference using Huggingface transformers on NVIDIA GPUs. Requirements tested on python 3.12.9 + CUDA11.8:
|
||||||
|
|
||||||
SDK下载
|
|
||||||
```bash
|
|
||||||
#安装ModelScope
|
|
||||||
pip install modelscope
|
|
||||||
```
|
```
|
||||||
|
torch==2.6.0
|
||||||
|
transformers==4.46.3
|
||||||
|
tokenizers==0.20.3
|
||||||
|
einops
|
||||||
|
addict
|
||||||
|
easydict
|
||||||
|
pip install flash-attn==2.7.3 --no-build-isolation
|
||||||
|
```
|
||||||
|
|
||||||
```python
|
```python
|
||||||
#SDK模型下载
|
from transformers import AutoModel, AutoTokenizer
|
||||||
from modelscope import snapshot_download
|
import torch
|
||||||
model_dir = snapshot_download('deepseek-ai/DeepSeek-OCR')
|
import os
|
||||||
```
|
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
|
||||||
Git下载
|
model_name = 'deepseek-ai/DeepSeek-OCR'
|
||||||
```
|
|
||||||
#Git模型下载
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||||
git clone https://www.modelscope.cn/deepseek-ai/DeepSeek-OCR.git
|
model = AutoModel.from_pretrained(model_name, _attn_implementation='flash_attention_2', trust_remote_code=True, use_safetensors=True)
|
||||||
|
model = model.eval().cuda().to(torch.bfloat16)
|
||||||
|
|
||||||
|
# prompt = "<image>\nFree OCR. "
|
||||||
|
prompt = "<image>\n<|grounding|>Convert the document to markdown. "
|
||||||
|
image_file = 'your_image.jpg'
|
||||||
|
output_path = 'your/output/dir'
|
||||||
|
|
||||||
|
# infer(self, tokenizer, prompt='', image_file='', output_path = ' ', base_size = 1024, image_size = 640, crop_mode = True, test_compress = False, save_results = False):
|
||||||
|
|
||||||
|
# Tiny: base_size = 512, image_size = 512, crop_mode = False
|
||||||
|
# Small: base_size = 640, image_size = 640, crop_mode = False
|
||||||
|
# Base: base_size = 1024, image_size = 1024, crop_mode = False
|
||||||
|
# Large: base_size = 1280, image_size = 1280, crop_mode = False
|
||||||
|
|
||||||
|
# Gundam: base_size = 1024, image_size = 640, crop_mode = True
|
||||||
|
|
||||||
|
res = model.infer(tokenizer, prompt=prompt, image_file=image_file, output_path = output_path, base_size = 1024, image_size = 640, crop_mode=True, save_results = True, test_compress = True)
|
||||||
```
|
```
|
||||||
|
|
||||||
<p style="color: lightgrey;">如果您是本模型的贡献者,我们邀请您根据<a href="https://modelscope.cn/docs/ModelScope%E6%A8%A1%E5%9E%8B%E6%8E%A5%E5%85%A5%E6%B5%81%E7%A8%8B%E6%A6%82%E8%A7%88" style="color: lightgrey; text-decoration: underline;">模型贡献文档</a>,及时完善模型卡片内容。</p>
|
## vLLM
|
||||||
|
Refer to [🌟GitHub](https://github.com/deepseek-ai/DeepSeek-OCR/) for guidance on model inference acceleration and PDF processing, etc.<!-- -->
|
||||||
|
|
||||||
|
## Visualizations
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td><img src="assets/show1.jpg" style="width: 500px"></td>
|
||||||
|
<td><img src="assets/show2.jpg" style="width: 500px"></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><img src="assets/show3.jpg" style="width: 500px"></td>
|
||||||
|
<td><img src="assets/show4.jpg" style="width: 500px"></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
|
||||||
|
## Acknowledgement
|
||||||
|
|
||||||
|
We would like to thank [Vary](https://github.com/Ucas-HaoranWei/Vary/), [GOT-OCR2.0](https://github.com/Ucas-HaoranWei/GOT-OCR2.0/), [MinerU](https://github.com/opendatalab/MinerU), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [OneChart](https://github.com/LingyvKong/OneChart), [Slow Perception](https://github.com/Ucas-HaoranWei/Slow-Perception) for their valuable models and ideas.
|
||||||
|
|
||||||
|
We also appreciate the benchmarks: [Fox](https://github.com/ucaslcl/Fox), [OminiDocBench](https://github.com/opendatalab/OmniDocBench).
|
||||||
|
|
||||||
|
|
||||||
|
## Citation
|
||||||
|
Coming soon!
|
||||||
|
|||||||
BIN
assets/fig1.png
Normal file
BIN
assets/fig1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 387 KiB |
BIN
assets/show1.jpg
Normal file
BIN
assets/show1.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 114 KiB |
BIN
assets/show2.jpg
Normal file
BIN
assets/show2.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 211 KiB |
BIN
assets/show3.jpg
Normal file
BIN
assets/show3.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 241 KiB |
BIN
assets/show4.jpg
Normal file
BIN
assets/show4.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 262 KiB |
118
config.json
Normal file
118
config.json
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
{
|
||||||
|
"_name_or_path": "deepseek-ai/DeepSeek-OCR",
|
||||||
|
"candidate_resolutions": [
|
||||||
|
[
|
||||||
|
1024,
|
||||||
|
1024
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"global_view_pos": "head",
|
||||||
|
"architectures": [
|
||||||
|
"DeepseekOCRForCausalLM"
|
||||||
|
],
|
||||||
|
"auto_map": {
|
||||||
|
"AutoConfig": "modeling_deepseekocr.DeepseekOCRConfig",
|
||||||
|
"AutoModel": "modeling_deepseekocr.DeepseekOCRForCausalLM"
|
||||||
|
},
|
||||||
|
"language_config": {
|
||||||
|
"architectures": [
|
||||||
|
"DeepseekV2ForCausalLM"
|
||||||
|
],
|
||||||
|
"auto_map": {
|
||||||
|
"AutoConfig": "configuration_deepseekv2.DeepseekV2Config",
|
||||||
|
"AutoModel": "modeling_deepseek.DeepseekV2Model",
|
||||||
|
"AutoModelForCausalLM": "modeling_deepseek.DeepseekV2ForCausalLM"
|
||||||
|
},
|
||||||
|
"bos_token_id": 0,
|
||||||
|
"eos_token_id": 1,
|
||||||
|
"first_k_dense_replace": 1,
|
||||||
|
"hidden_size": 1280,
|
||||||
|
"intermediate_size": 6848,
|
||||||
|
"kv_lora_rank": null,
|
||||||
|
"lm_head": true,
|
||||||
|
"max_position_embeddings": 8192,
|
||||||
|
"moe_intermediate_size": 896,
|
||||||
|
"n_group": 1,
|
||||||
|
"n_routed_experts": 64,
|
||||||
|
"n_shared_experts": 2,
|
||||||
|
"num_attention_heads": 10,
|
||||||
|
"num_experts_per_tok": 6,
|
||||||
|
"num_hidden_layers": 12,
|
||||||
|
"num_key_value_heads": 10,
|
||||||
|
"q_lora_rank": null,
|
||||||
|
"qk_nope_head_dim": 0,
|
||||||
|
"qk_rope_head_dim": 0,
|
||||||
|
"rm_head": false,
|
||||||
|
"topk_group": 1,
|
||||||
|
"topk_method": "greedy",
|
||||||
|
"torch_dtype": "bfloat16",
|
||||||
|
"use_mla": false,
|
||||||
|
"v_head_dim": 0,
|
||||||
|
"vocab_size": 129280
|
||||||
|
},
|
||||||
|
"model_type": "deepseek_vl_v2",
|
||||||
|
"projector_config": {
|
||||||
|
"input_dim": 2048,
|
||||||
|
"model_type": "mlp_projector",
|
||||||
|
"n_embed": 1280,
|
||||||
|
"projector_type": "linear"
|
||||||
|
},
|
||||||
|
"tile_tag": "2D",
|
||||||
|
"torch_dtype": "bfloat16",
|
||||||
|
"transformers_version": "4.46.3",
|
||||||
|
"vision_config": {
|
||||||
|
"image_size": 1024,
|
||||||
|
"mlp_ratio": 3.7362,
|
||||||
|
"model_name": "deeplip_b_l",
|
||||||
|
"model_type": "vision",
|
||||||
|
"width": {
|
||||||
|
"clip-l-14-224": {
|
||||||
|
"heads": 16,
|
||||||
|
"image_size": 224,
|
||||||
|
"layers": 24,
|
||||||
|
"patch_size": 14,
|
||||||
|
"width": 1024
|
||||||
|
},
|
||||||
|
"sam_vit_b": {
|
||||||
|
"downsample_channels": [
|
||||||
|
512,
|
||||||
|
1024
|
||||||
|
],
|
||||||
|
"global_attn_indexes": [
|
||||||
|
2,
|
||||||
|
5,
|
||||||
|
8,
|
||||||
|
11
|
||||||
|
],
|
||||||
|
"heads": 12,
|
||||||
|
"layers": 12,
|
||||||
|
"width": 768
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bos_token_id": 0,
|
||||||
|
"eos_token_id": 1,
|
||||||
|
"first_k_dense_replace": 1,
|
||||||
|
"hidden_size": 1280,
|
||||||
|
"intermediate_size": 6848,
|
||||||
|
"kv_lora_rank": null,
|
||||||
|
"lm_head": true,
|
||||||
|
"max_position_embeddings": 8192,
|
||||||
|
"moe_intermediate_size": 896,
|
||||||
|
"n_group": 1,
|
||||||
|
"n_routed_experts": 64,
|
||||||
|
"n_shared_experts": 2,
|
||||||
|
"num_attention_heads": 10,
|
||||||
|
"num_experts_per_tok": 6,
|
||||||
|
"num_hidden_layers": 12,
|
||||||
|
"num_key_value_heads": 10,
|
||||||
|
"q_lora_rank": null,
|
||||||
|
"qk_nope_head_dim": 0,
|
||||||
|
"qk_rope_head_dim": 0,
|
||||||
|
"rm_head": false,
|
||||||
|
"topk_group": 1,
|
||||||
|
"topk_method": "greedy",
|
||||||
|
"use_mla": false,
|
||||||
|
"v_head_dim": 0,
|
||||||
|
"vocab_size": 129280
|
||||||
|
}
|
||||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"framework": "pytorch", "task": "image-text-to-text", "allow_remote": true}
|
||||||
210
configuration_deepseek_v2.py
Normal file
210
configuration_deepseek_v2.py
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
from transformers.configuration_utils import PretrainedConfig
|
||||||
|
from transformers.utils import logging
|
||||||
|
|
||||||
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
DEEPSEEK_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
|
||||||
|
class DeepseekV2Config(PretrainedConfig):
|
||||||
|
r"""
|
||||||
|
This is the configuration class to store the configuration of a [`DeepseekV2Model`]. It is used to instantiate an DeepSeek
|
||||||
|
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
||||||
|
defaults will yield a similar configuration to that of the DeepSeek-V2 with multi-latent attention.
|
||||||
|
|
||||||
|
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
||||||
|
documentation from [`PretrainedConfig`] for more information.
|
||||||
|
|
||||||
|
|
||||||
|
Args:
|
||||||
|
vocab_size (`int`, *optional*, defaults to 102400):
|
||||||
|
Vocabulary size of the Deep model. Defines the number of different tokens that can be represented by the
|
||||||
|
`inputs_ids` passed when calling [`DeepseekV2Model`]
|
||||||
|
hidden_size (`int`, *optional*, defaults to 4096):
|
||||||
|
Dimension of the hidden representations.
|
||||||
|
intermediate_size (`int`, *optional*, defaults to 11008):
|
||||||
|
Dimension of the MLP representations.
|
||||||
|
moe_intermediate_size (`int`, *optional*, defaults to 1407):
|
||||||
|
Dimension of the MoE representations.
|
||||||
|
num_hidden_layers (`int`, *optional*, defaults to 32):
|
||||||
|
Number of hidden layers in the Transformer decoder.
|
||||||
|
num_attention_heads (`int`, *optional*, defaults to 32):
|
||||||
|
Number of attention heads for each attention layer in the Transformer decoder.
|
||||||
|
n_shared_experts (`int`, *optional*, defaults to None):
|
||||||
|
Number of shared experts, None means dense model.
|
||||||
|
n_routed_experts (`int`, *optional*, defaults to None):
|
||||||
|
Number of routed experts, None means dense model.
|
||||||
|
routed_scaling_factor (`float`, *optional*, defaults to 1.0):
|
||||||
|
Scaling factor or routed experts.
|
||||||
|
topk_method (`str`, *optional*, defaults to `gready`):
|
||||||
|
Topk method used in routed gate.
|
||||||
|
n_group (`int`, *optional*, defaults to None):
|
||||||
|
Number of groups for routed experts.
|
||||||
|
topk_group (`int`, *optional*, defaults to None):
|
||||||
|
Number of selected groups for each token(for each token, ensuring the selected experts is only within `topk_group` groups).
|
||||||
|
num_experts_per_tok (`int`, *optional*, defaults to None):
|
||||||
|
Number of selected experts, None means dense model.
|
||||||
|
moe_layer_freq (`int`, *optional*, defaults to 1):
|
||||||
|
The frequency of the MoE layer: one expert layer for every `moe_layer_freq - 1` dense layers.
|
||||||
|
first_k_dense_replace (`int`, *optional*, defaults to 0):
|
||||||
|
Number of dense layers in shallow layers(embed->dense->dense->...->dense->moe->moe...->lm_head).
|
||||||
|
\--k dense layers--/
|
||||||
|
norm_topk_prob (`bool`, *optional*, defaults to False):
|
||||||
|
Whether to normalize the weights of the routed experts.
|
||||||
|
scoring_func (`str`, *optional*, defaults to 'softmax'):
|
||||||
|
Method of computing expert weights.
|
||||||
|
aux_loss_alpha (`float`, *optional*, defaults to 0.001):
|
||||||
|
Auxiliary loss weight coefficient.
|
||||||
|
seq_aux = (`bool`, *optional*, defaults to True):
|
||||||
|
Whether to compute the auxiliary loss for each individual sample.
|
||||||
|
num_key_value_heads (`int`, *optional*):
|
||||||
|
This is the number of key_value heads that should be used to implement Grouped Query Attention. If
|
||||||
|
`num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
|
||||||
|
`num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
|
||||||
|
converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
|
||||||
|
by meanpooling all the original heads within that group. For more details checkout [this
|
||||||
|
paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
|
||||||
|
`num_attention_heads`.
|
||||||
|
hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
|
||||||
|
The non-linear activation function (function or string) in the decoder.
|
||||||
|
max_position_embeddings (`int`, *optional*, defaults to 2048):
|
||||||
|
The maximum sequence length that this model might ever be used with.
|
||||||
|
initializer_range (`float`, *optional*, defaults to 0.02):
|
||||||
|
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||||
|
rms_norm_eps (`float`, *optional*, defaults to 1e-06):
|
||||||
|
The epsilon used by the rms normalization layers.
|
||||||
|
use_cache (`bool`, *optional*, defaults to `True`):
|
||||||
|
Whether or not the model should return the last key/values attentions (not used by all models). Only
|
||||||
|
relevant if `config.is_decoder=True`.
|
||||||
|
pad_token_id (`int`, *optional*):
|
||||||
|
Padding token id.
|
||||||
|
bos_token_id (`int`, *optional*, defaults to 1):
|
||||||
|
Beginning of stream token id.
|
||||||
|
eos_token_id (`int`, *optional*, defaults to 2):
|
||||||
|
End of stream token id.
|
||||||
|
pretraining_tp (`int`, *optional*, defaults to 1):
|
||||||
|
Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
|
||||||
|
document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is
|
||||||
|
necessary to ensure exact reproducibility of the pretraining results. Please refer to [this
|
||||||
|
issue](https://github.com/pytorch/pytorch/issues/76232).
|
||||||
|
tie_word_embeddings (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether to tie weight embeddings
|
||||||
|
rope_theta (`float`, *optional*, defaults to 10000.0):
|
||||||
|
The base period of the RoPE embeddings.
|
||||||
|
rope_scaling (`Dict`, *optional*):
|
||||||
|
Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
|
||||||
|
strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
|
||||||
|
`{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
|
||||||
|
`max_position_embeddings` to the expected new maximum.
|
||||||
|
attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
|
||||||
|
Whether to use a bias in the query, key, value and output projection layers during self-attention.
|
||||||
|
attention_dropout (`float`, *optional*, defaults to 0.0):
|
||||||
|
The dropout ratio for the attention probabilities.
|
||||||
|
use_mla (`bool`, *optional*, defaults to `True`): Use multi-latent attention or multi-head attention. If True,
|
||||||
|
the model will use multi-latent attention, otherwise, it will use multi-head attention.
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> from transformers import DeepseekV2Model, DeepseekV2Config
|
||||||
|
|
||||||
|
>>> # Initializing a Deepseek-V2 style configuration
|
||||||
|
>>> configuration = DeepseekV2Config()
|
||||||
|
|
||||||
|
>>> # Accessing the model configuration
|
||||||
|
>>> configuration = model.config
|
||||||
|
```"""
|
||||||
|
|
||||||
|
model_type = "deepseek_v2"
|
||||||
|
keys_to_ignore_at_inference = ["past_key_values"]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
vocab_size=102400,
|
||||||
|
hidden_size=4096,
|
||||||
|
intermediate_size=11008,
|
||||||
|
moe_intermediate_size = 1407,
|
||||||
|
num_hidden_layers=30,
|
||||||
|
num_attention_heads=32,
|
||||||
|
num_key_value_heads=32,
|
||||||
|
n_shared_experts = None,
|
||||||
|
n_routed_experts = None,
|
||||||
|
ep_size = 1,
|
||||||
|
routed_scaling_factor = 1.0,
|
||||||
|
kv_lora_rank = 512,
|
||||||
|
q_lora_rank = 1536,
|
||||||
|
qk_rope_head_dim = 64,
|
||||||
|
v_head_dim = 128,
|
||||||
|
qk_nope_head_dim = 128,
|
||||||
|
topk_method = 'gready',
|
||||||
|
n_group = None,
|
||||||
|
topk_group = None,
|
||||||
|
num_experts_per_tok = None,
|
||||||
|
moe_layer_freq = 1,
|
||||||
|
first_k_dense_replace = 0,
|
||||||
|
norm_topk_prob = False,
|
||||||
|
scoring_func = 'softmax',
|
||||||
|
aux_loss_alpha = 0.001,
|
||||||
|
seq_aux = True,
|
||||||
|
hidden_act="silu",
|
||||||
|
max_position_embeddings=2048,
|
||||||
|
initializer_range=0.02,
|
||||||
|
rms_norm_eps=1e-6,
|
||||||
|
use_cache=True,
|
||||||
|
pad_token_id=None,
|
||||||
|
bos_token_id=100000,
|
||||||
|
eos_token_id=100001,
|
||||||
|
pretraining_tp=1,
|
||||||
|
tie_word_embeddings=False,
|
||||||
|
rope_theta=10000.0,
|
||||||
|
rope_scaling=None,
|
||||||
|
attention_bias=False,
|
||||||
|
attention_dropout=0.0,
|
||||||
|
use_mla=True,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
self.vocab_size = vocab_size
|
||||||
|
self.max_position_embeddings = max_position_embeddings
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.intermediate_size = intermediate_size
|
||||||
|
self.moe_intermediate_size = moe_intermediate_size
|
||||||
|
self.num_hidden_layers = num_hidden_layers
|
||||||
|
self.num_attention_heads = num_attention_heads
|
||||||
|
self.n_shared_experts = n_shared_experts
|
||||||
|
self.n_routed_experts = n_routed_experts
|
||||||
|
self.ep_size = ep_size
|
||||||
|
self.routed_scaling_factor = routed_scaling_factor
|
||||||
|
self.kv_lora_rank = kv_lora_rank
|
||||||
|
self.q_lora_rank = q_lora_rank
|
||||||
|
self.qk_rope_head_dim = qk_rope_head_dim
|
||||||
|
self.v_head_dim = v_head_dim
|
||||||
|
self.qk_nope_head_dim = qk_nope_head_dim
|
||||||
|
self.topk_method = topk_method
|
||||||
|
self.n_group = n_group
|
||||||
|
self.topk_group = topk_group
|
||||||
|
self.num_experts_per_tok = num_experts_per_tok
|
||||||
|
self.moe_layer_freq = moe_layer_freq
|
||||||
|
self.first_k_dense_replace = first_k_dense_replace
|
||||||
|
self.norm_topk_prob = norm_topk_prob
|
||||||
|
self.scoring_func = scoring_func
|
||||||
|
self.aux_loss_alpha = aux_loss_alpha
|
||||||
|
self.seq_aux = seq_aux
|
||||||
|
# for backward compatibility
|
||||||
|
if num_key_value_heads is None:
|
||||||
|
num_key_value_heads = num_attention_heads
|
||||||
|
|
||||||
|
self.num_key_value_heads = num_key_value_heads
|
||||||
|
self.hidden_act = hidden_act
|
||||||
|
self.initializer_range = initializer_range
|
||||||
|
self.rms_norm_eps = float(rms_norm_eps)
|
||||||
|
self.pretraining_tp = pretraining_tp
|
||||||
|
self.use_cache = use_cache
|
||||||
|
self.rope_theta = rope_theta
|
||||||
|
self.rope_scaling = rope_scaling
|
||||||
|
self.attention_bias = attention_bias
|
||||||
|
self.attention_dropout = attention_dropout
|
||||||
|
self.use_mla = use_mla
|
||||||
|
|
||||||
|
super().__init__(
|
||||||
|
pad_token_id=pad_token_id,
|
||||||
|
bos_token_id=bos_token_id,
|
||||||
|
eos_token_id=eos_token_id,
|
||||||
|
tie_word_embeddings=tie_word_embeddings,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
280
conversation.py
Normal file
280
conversation.py
Normal file
@ -0,0 +1,280 @@
|
|||||||
|
"""
|
||||||
|
From https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import dataclasses
|
||||||
|
from enum import IntEnum, auto
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
|
||||||
|
class SeparatorStyle(IntEnum):
|
||||||
|
"""Separator styles."""
|
||||||
|
|
||||||
|
DeepSeek = auto()
|
||||||
|
DeepSeekV2 = auto()
|
||||||
|
PLAIN = auto()
|
||||||
|
ALIGNMENT = auto()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class Conversation:
|
||||||
|
"""A class that manages prompt templates and keeps all conversation history."""
|
||||||
|
|
||||||
|
# The name of this template
|
||||||
|
name: str
|
||||||
|
# The template of the system prompt
|
||||||
|
system_template: str = "{system_message}"
|
||||||
|
# The system message
|
||||||
|
system_message: str = ""
|
||||||
|
# The names of two roles
|
||||||
|
roles: List[str] = (("USER", "ASSISTANT"),)
|
||||||
|
# All messages. Each item is (role, message).
|
||||||
|
messages: List[List[str]] = ()
|
||||||
|
# The number of few shot examples
|
||||||
|
offset: int = 0
|
||||||
|
# The separator style and configurations
|
||||||
|
sep_style: SeparatorStyle = SeparatorStyle.DeepSeek
|
||||||
|
sep: str = "\n"
|
||||||
|
sep2: str = None
|
||||||
|
# Stop criteria (the default one is EOS token)
|
||||||
|
stop_str: str = None
|
||||||
|
# Stops generation if meeting any token in this list
|
||||||
|
stop_token_ids: List[int] = None
|
||||||
|
|
||||||
|
def get_prompt(self) -> str:
|
||||||
|
"""Get the prompt for generation."""
|
||||||
|
system_prompt = self.system_template.format(system_message=self.system_message)
|
||||||
|
if self.sep_style == SeparatorStyle.DeepSeek:
|
||||||
|
seps = [self.sep, self.sep2]
|
||||||
|
if system_prompt == "" or system_prompt is None:
|
||||||
|
ret = ""
|
||||||
|
else:
|
||||||
|
ret = system_prompt + seps[0]
|
||||||
|
for i, (role, message) in enumerate(self.messages):
|
||||||
|
if message:
|
||||||
|
ret += role + ": " + message + seps[i % 2]
|
||||||
|
else:
|
||||||
|
ret += role + ":"
|
||||||
|
return ret
|
||||||
|
elif self.sep_style == SeparatorStyle.DeepSeekV2:
|
||||||
|
seps = [self.sep, self.sep2]
|
||||||
|
if system_prompt == "" or system_prompt is None:
|
||||||
|
ret = ""
|
||||||
|
else:
|
||||||
|
ret = system_prompt + seps[0]
|
||||||
|
for i, (role, message) in enumerate(self.messages):
|
||||||
|
if message:
|
||||||
|
if role == "User":
|
||||||
|
ret += "<|sft▁begin|>\n" + message + self.sep #<|sft▁begin|>User Input<|sft▁end|>\nResponse<|end▁of▁sentence|>
|
||||||
|
else:
|
||||||
|
ret += message + self.sep2
|
||||||
|
else:
|
||||||
|
ret = ret
|
||||||
|
return ret
|
||||||
|
|
||||||
|
elif self.sep_style == SeparatorStyle.PLAIN:
|
||||||
|
seps = [self.sep, self.sep2]
|
||||||
|
ret = ""
|
||||||
|
for i, (role, message) in enumerate(self.messages):
|
||||||
|
if message:
|
||||||
|
if type(message) is tuple:
|
||||||
|
message, _, _ = message
|
||||||
|
if i % 2 == 0:
|
||||||
|
ret += message + seps[i % 2]
|
||||||
|
else:
|
||||||
|
ret += message + seps[i % 2]
|
||||||
|
else:
|
||||||
|
ret += ""
|
||||||
|
return ret
|
||||||
|
elif self.sep_style == SeparatorStyle.ALIGNMENT:
|
||||||
|
seps = [self.sep, self.sep2]
|
||||||
|
ret = ""
|
||||||
|
for i, (role, message) in enumerate(self.messages):
|
||||||
|
if message:
|
||||||
|
if type(message) is tuple:
|
||||||
|
message, _, _ = message
|
||||||
|
if i % 2 == 0:
|
||||||
|
ret += '<image>\n' + seps[i % 2]
|
||||||
|
else:
|
||||||
|
ret += message + seps[i % 2]
|
||||||
|
else:
|
||||||
|
ret += ""
|
||||||
|
return ret
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid style: {self.sep_style}")
|
||||||
|
|
||||||
|
def set_system_message(self, system_message: str):
|
||||||
|
"""Set the system message."""
|
||||||
|
self.system_message = system_message
|
||||||
|
|
||||||
|
def append_message(self, role: str, message: str):
|
||||||
|
"""Append a new message."""
|
||||||
|
self.messages.append([role, message])
|
||||||
|
|
||||||
|
def update_last_message(self, message: str):
|
||||||
|
"""Update the last output.
|
||||||
|
|
||||||
|
The last message is typically set to be None when constructing the prompt,
|
||||||
|
so we need to update it in-place after getting the response from a model.
|
||||||
|
"""
|
||||||
|
self.messages[-1][1] = message
|
||||||
|
|
||||||
|
def reset_message(self):
|
||||||
|
"""Reset a new message."""
|
||||||
|
self.messages = []
|
||||||
|
|
||||||
|
def to_gradio_chatbot(self):
|
||||||
|
"""Convert the conversation to gradio chatbot format."""
|
||||||
|
ret = []
|
||||||
|
for i, (role, msg) in enumerate(self.messages[self.offset :]):
|
||||||
|
if i % 2 == 0:
|
||||||
|
ret.append([msg, None])
|
||||||
|
else:
|
||||||
|
ret[-1][-1] = msg
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def to_openai_api_messages(self):
|
||||||
|
"""Convert the conversation to OpenAI chat completion format."""
|
||||||
|
system_prompt = self.system_template.format(system_message=self.system_message)
|
||||||
|
ret = [{"role": "system", "content": system_prompt}]
|
||||||
|
|
||||||
|
for i, (_, msg) in enumerate(self.messages[self.offset :]):
|
||||||
|
if i % 2 == 0:
|
||||||
|
ret.append({"role": "user", "content": msg})
|
||||||
|
else:
|
||||||
|
if msg is not None:
|
||||||
|
ret.append({"role": "assistant", "content": msg})
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
return Conversation(
|
||||||
|
name=self.name,
|
||||||
|
system_template=self.system_template,
|
||||||
|
system_message=self.system_message,
|
||||||
|
roles=self.roles,
|
||||||
|
messages=[[x, y] for x, y in self.messages],
|
||||||
|
offset=self.offset,
|
||||||
|
sep_style=self.sep_style,
|
||||||
|
sep=self.sep,
|
||||||
|
sep2=self.sep2,
|
||||||
|
stop_str=self.stop_str,
|
||||||
|
stop_token_ids=self.stop_token_ids,
|
||||||
|
)
|
||||||
|
|
||||||
|
def dict(self):
|
||||||
|
return {
|
||||||
|
"template_name": self.name,
|
||||||
|
"system_message": self.system_message,
|
||||||
|
"roles": self.roles,
|
||||||
|
"messages": self.messages,
|
||||||
|
"offset": self.offset,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# A global registry for all conversation templates
|
||||||
|
conv_templates: Dict[str, Conversation] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def register_conv_template(template: Conversation, override: bool = False):
|
||||||
|
"""Register a new conversation template."""
|
||||||
|
if not override:
|
||||||
|
assert template.name not in conv_templates, f"{template.name} has been registered."
|
||||||
|
|
||||||
|
conv_templates[template.name] = template
|
||||||
|
|
||||||
|
|
||||||
|
def get_conv_template(name: str) -> Conversation:
|
||||||
|
"""Get a conversation template."""
|
||||||
|
return conv_templates[name].copy()
|
||||||
|
|
||||||
|
|
||||||
|
register_conv_template(
|
||||||
|
Conversation(
|
||||||
|
name="deepseek",
|
||||||
|
system_template="{system_message}",
|
||||||
|
# system_message="You are a helpful assistant. Please answer truthfully and write out your "
|
||||||
|
# "thinking step by step to be sure you get the right answer.",
|
||||||
|
system_message="",
|
||||||
|
roles=("<|User|>", "<|Assistant|>"),
|
||||||
|
messages=(),
|
||||||
|
offset=0,
|
||||||
|
sep_style=SeparatorStyle.DeepSeek,
|
||||||
|
sep="\n\n",
|
||||||
|
sep2="<|end▁of▁sentence|>",
|
||||||
|
stop_token_ids=[100001],
|
||||||
|
stop_str=["User:", "<|end▁of▁sentence|>"]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
register_conv_template(
|
||||||
|
Conversation(
|
||||||
|
name="deepseekv2",
|
||||||
|
system_template="{system_message}",
|
||||||
|
# system_message="You are a helpful assistant. Please answer truthfully and write out your "
|
||||||
|
# "thinking step by step to be sure you get the right answer.",
|
||||||
|
system_message="",
|
||||||
|
roles=("<|User|>", "<|Assistant|>"),
|
||||||
|
messages=(),
|
||||||
|
offset=0,
|
||||||
|
sep_style=SeparatorStyle.DeepSeek,
|
||||||
|
sep="",
|
||||||
|
sep2="<|end▁of▁sentence|>",
|
||||||
|
stop_token_ids=[100001],
|
||||||
|
stop_str=["User:", "<|end▁of▁sentence|>"]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_conv_template(
|
||||||
|
Conversation(
|
||||||
|
name="plain",
|
||||||
|
system_template="",
|
||||||
|
system_message="",
|
||||||
|
roles=("", ""),
|
||||||
|
messages=(),
|
||||||
|
offset=0,
|
||||||
|
sep_style=SeparatorStyle.PLAIN,
|
||||||
|
sep="",
|
||||||
|
sep2="",
|
||||||
|
stop_token_ids=[100001],
|
||||||
|
stop_str=['</s>'],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_conv_template(
|
||||||
|
Conversation(
|
||||||
|
name="alignment",
|
||||||
|
system_template="",
|
||||||
|
system_message="",
|
||||||
|
roles=("", ""),
|
||||||
|
messages=(),
|
||||||
|
offset=0,
|
||||||
|
sep_style=SeparatorStyle.ALIGNMENT,
|
||||||
|
sep="",
|
||||||
|
sep2="",
|
||||||
|
stop_token_ids=[100001],
|
||||||
|
stop_str=['</s>'],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("deepseek template:")
|
||||||
|
conv = get_conv_template("deepseek")
|
||||||
|
conv.append_message(conv.roles[0], "Hello!")
|
||||||
|
conv.append_message(conv.roles[1], "Hi! This is Tony.")
|
||||||
|
conv.append_message(conv.roles[0], "Who are you?")
|
||||||
|
conv.append_message(conv.roles[1], "I am a helpful assistant.")
|
||||||
|
conv.append_message(conv.roles[0], "How are you?")
|
||||||
|
conv.append_message(conv.roles[1], None)
|
||||||
|
print(conv.get_prompt())
|
||||||
|
|
||||||
|
print("deepseekv2 template:")
|
||||||
|
conv = get_conv_template("deepseekv2")
|
||||||
|
conv.append_message(conv.roles[0], "Hello!")
|
||||||
|
conv.append_message(conv.roles[1], "Hi! This is Tony.")
|
||||||
|
conv.append_message(conv.roles[0], "Who are you?")
|
||||||
|
conv.append_message(conv.roles[1], "I am a helpful assistant.")
|
||||||
|
conv.append_message(conv.roles[0], "How are you?")
|
||||||
|
conv.append_message(conv.roles[1], None)
|
||||||
|
print(conv.get_prompt())
|
||||||
1058
deepencoder.py
Normal file
1058
deepencoder.py
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-000001.safetensors
Normal file
3
model-00001-of-000001.safetensors
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:1169e7cdc28ff2fb6186556acb2175db148ad26a62097df4c45a17e523180d3f
|
||||||
|
size 6672547120
|
||||||
2717
model.safetensors.index.json
Normal file
2717
model.safetensors.index.json
Normal file
File diff suppressed because it is too large
Load Diff
1037
modeling_deepseekocr.py
Normal file
1037
modeling_deepseekocr.py
Normal file
File diff suppressed because it is too large
Load Diff
1992
modeling_deepseekv2.py
Normal file
1992
modeling_deepseekv2.py
Normal file
File diff suppressed because it is too large
Load Diff
28
processor_config.json
Normal file
28
processor_config.json
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
{
|
||||||
|
"add_special_token": false,
|
||||||
|
"candidate_resolutions": [
|
||||||
|
[
|
||||||
|
1024,
|
||||||
|
1024
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"downsample_ratio": 4,
|
||||||
|
"ignore_id": -100,
|
||||||
|
"image_mean": [
|
||||||
|
0.5,
|
||||||
|
0.5,
|
||||||
|
0.5
|
||||||
|
],
|
||||||
|
"image_std": [
|
||||||
|
0.5,
|
||||||
|
0.5,
|
||||||
|
0.5
|
||||||
|
],
|
||||||
|
"image_token": "<image>",
|
||||||
|
"mask_prompt": false,
|
||||||
|
"normalize": true,
|
||||||
|
"pad_token": "<\uff5c\u2581pad\u2581\uff5c>",
|
||||||
|
"patch_size": 16,
|
||||||
|
"processor_class": "DeepseekVLV2Processor",
|
||||||
|
"sft_format": "deepseek"
|
||||||
|
}
|
||||||
39
special_tokens_map.json
Normal file
39
special_tokens_map.json
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
{
|
||||||
|
"additional_special_tokens": [
|
||||||
|
{
|
||||||
|
"content": "<|User|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content": "<|Assistant|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<|begin▁of▁sentence|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|end▁of▁sentence|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": {
|
||||||
|
"content": "<|▁pad▁|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:a02f8fd5228c90256bb4f6554c34a579d48f909e5beb232dc4afad870b55a8b4
|
||||||
|
size 9979544
|
||||||
6661
tokenizer_config.json
Normal file
6661
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user