diff --git a/.gitattributes b/.gitattributes index 15ba2c6..dfb884e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -45,3 +45,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.wasm filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text + +model-00001-of-000001.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/.ipynb_checkpoints/README-checkpoint.md b/.ipynb_checkpoints/README-checkpoint.md new file mode 100644 index 0000000..e0795e7 --- /dev/null +++ b/.ipynb_checkpoints/README-checkpoint.md @@ -0,0 +1,122 @@ +--- +pipeline_tag: image-text-to-text +language: +- multilingual +tags: +- deepseek +- vision-language +- ocr +- custom_code +license: mit +--- +
+ DeepSeek AI +
+
+
+ + Homepage + + + Hugging Face + + +
+ +
+ + + Discord + + + Twitter Follow + + +
+ + + +

+ 🌟 Github | + šŸ“„ Model Download | + šŸ“„ Paper Link | + šŸ“„ Arxiv Paper Link | +

+

+

+ DeepSeek-OCR: Contexts Optical Compression +

+

+

+ +

+

+Explore the boundaries of visual-text compression. +

+ +## Usage +Inference using Huggingface transformers on NVIDIA GPUs. Requirements tested on python 3.12.9 + CUDA11.8: + +``` +torch==2.6.0 +transformers==4.46.3 +tokenizers==0.20.3 +einops +addict +easydict +pip install flash-attn==2.7.3 --no-build-isolation +``` + +```python +from transformers import AutoModel, AutoTokenizer +import torch +import os +os.environ["CUDA_VISIBLE_DEVICES"] = '0' +model_name = 'deepseek-ai/DeepSeek-OCR' + +tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) +model = AutoModel.from_pretrained(model_name, _attn_implementation='flash_attention_2', trust_remote_code=True, use_safetensors=True) +model = model.eval().cuda().to(torch.bfloat16) + +# prompt = "\nFree OCR. " +prompt = "\n<|grounding|>Convert the document to markdown. " +image_file = 'your_image.jpg' +output_path = 'your/output/dir' + +# infer(self, tokenizer, prompt='', image_file='', output_path = ' ', base_size = 1024, image_size = 640, crop_mode = True, test_compress = False, save_results = False): + +# Tiny: base_size = 512, image_size = 512, crop_mode = False +# Small: base_size = 640, image_size = 640, crop_mode = False +# Base: base_size = 1024, image_size = 1024, crop_mode = False +# Large: base_size = 1280, image_size = 1280, crop_mode = False + +# Gundam: base_size = 1024, image_size = 640, crop_mode = True + +res = model.infer(tokenizer, prompt=prompt, image_file=image_file, output_path = output_path, base_size = 1024, image_size = 640, crop_mode=True, save_results = True, test_compress = True) +``` + +## vLLM +Refer to [🌟GitHub](https://github.com/deepseek-ai/DeepSeek-OCR/) for guidance on model inference acceleration and PDF processing, etc. + +## Visualizations + + + + + + + + + +
+ + +## Acknowledgement + +We would like to thank [Vary](https://github.com/Ucas-HaoranWei/Vary/), [GOT-OCR2.0](https://github.com/Ucas-HaoranWei/GOT-OCR2.0/), [MinerU](https://github.com/opendatalab/MinerU), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [OneChart](https://github.com/LingyvKong/OneChart), [Slow Perception](https://github.com/Ucas-HaoranWei/Slow-Perception) for their valuable models and ideas. + +We also appreciate the benchmarks: [Fox](https://github.com/ucaslcl/Fox), [OminiDocBench](https://github.com/opendatalab/OmniDocBench). + + +## Citation +Coming soon! diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d42fae9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 DeepSeek + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 8df7ca2..e0795e7 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,122 @@ --- -license: Apache License 2.0 -tags: [] - -#model-type: -##如 gpt态phi态llama态chatglm态baichuan ē­‰ -#- gpt - -#domain: -##如 nlp态cv态audio态multi-modal -#- nlp - -#language: -##čÆ­čØ€ä»£ē åˆ—č”Ø https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa -#- cn - -#metrics: -##如 CIDEr态Blue态ROUGE ē­‰ -#- CIDEr - -#tags: -##å„ē§č‡Ŗå®šä¹‰ļ¼ŒåŒ…ę‹¬ pretrained态fine-tuned态instruction-tuned态RL-tuned ē­‰č®­ē»ƒę–¹ę³•å’Œå…¶ä»– -#- pretrained - -#tools: -##如 vllm态fastchat态llamacpp态AdaSeq ē­‰ -#- vllm +pipeline_tag: image-text-to-text +language: +- multilingual +tags: +- deepseek +- vision-language +- ocr +- custom_code +license: mit --- -### å½“å‰ęØ”åž‹ēš„č“”ēŒ®č€…ęœŖęä¾›ę›“åŠ čÆ¦ē»†ēš„ęØ”åž‹ä»‹ē»ć€‚ęØ”åž‹ę–‡ä»¶å’Œęƒé‡ļ¼ŒåÆęµč§ˆā€œęØ”åž‹ę–‡ä»¶ā€é”µé¢čŽ·å–ć€‚ -#### ę‚ØåÆä»„é€ščæ‡å¦‚äø‹git cloneå‘½ä»¤ļ¼Œęˆ–č€…ModelScope SDKę„äø‹č½½ęØ”åž‹ +
+ DeepSeek AI +
+
+
+ + Homepage + + + Hugging Face + + +
+ +
+ + + Discord + + + Twitter Follow + + +
+ + + +

+ 🌟 Github | + šŸ“„ Model Download | + šŸ“„ Paper Link | + šŸ“„ Arxiv Paper Link | +

+

+

+ DeepSeek-OCR: Contexts Optical Compression +

+

+

+ +

+

+Explore the boundaries of visual-text compression. +

+ +## Usage +Inference using Huggingface transformers on NVIDIA GPUs. Requirements tested on python 3.12.9 + CUDA11.8: -SDKäø‹č½½ -```bash -#安装ModelScope -pip install modelscope ``` +torch==2.6.0 +transformers==4.46.3 +tokenizers==0.20.3 +einops +addict +easydict +pip install flash-attn==2.7.3 --no-build-isolation +``` + ```python -#SDKęØ”åž‹äø‹č½½ -from modelscope import snapshot_download -model_dir = snapshot_download('deepseek-ai/DeepSeek-OCR') -``` -Gitäø‹č½½ -``` -#GitęØ”åž‹äø‹č½½ -git clone https://www.modelscope.cn/deepseek-ai/DeepSeek-OCR.git +from transformers import AutoModel, AutoTokenizer +import torch +import os +os.environ["CUDA_VISIBLE_DEVICES"] = '0' +model_name = 'deepseek-ai/DeepSeek-OCR' + +tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) +model = AutoModel.from_pretrained(model_name, _attn_implementation='flash_attention_2', trust_remote_code=True, use_safetensors=True) +model = model.eval().cuda().to(torch.bfloat16) + +# prompt = "\nFree OCR. " +prompt = "\n<|grounding|>Convert the document to markdown. " +image_file = 'your_image.jpg' +output_path = 'your/output/dir' + +# infer(self, tokenizer, prompt='', image_file='', output_path = ' ', base_size = 1024, image_size = 640, crop_mode = True, test_compress = False, save_results = False): + +# Tiny: base_size = 512, image_size = 512, crop_mode = False +# Small: base_size = 640, image_size = 640, crop_mode = False +# Base: base_size = 1024, image_size = 1024, crop_mode = False +# Large: base_size = 1280, image_size = 1280, crop_mode = False + +# Gundam: base_size = 1024, image_size = 640, crop_mode = True + +res = model.infer(tokenizer, prompt=prompt, image_file=image_file, output_path = output_path, base_size = 1024, image_size = 640, crop_mode=True, save_results = True, test_compress = True) ``` -

å¦‚ęžœę‚Øę˜Æęœ¬ęØ”åž‹ēš„č“”ēŒ®č€…ļ¼Œęˆ‘ä»¬é‚€čÆ·ę‚Øę ¹ę®ęØ”åž‹č“”ēŒ®ę–‡ę”£ļ¼ŒåŠę—¶å®Œå–„ęØ”åž‹å”ē‰‡å†…å®¹ć€‚

\ No newline at end of file +## vLLM +Refer to [🌟GitHub](https://github.com/deepseek-ai/DeepSeek-OCR/) for guidance on model inference acceleration and PDF processing, etc. + +## Visualizations + + + + + + + + + +
+ + +## Acknowledgement + +We would like to thank [Vary](https://github.com/Ucas-HaoranWei/Vary/), [GOT-OCR2.0](https://github.com/Ucas-HaoranWei/GOT-OCR2.0/), [MinerU](https://github.com/opendatalab/MinerU), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [OneChart](https://github.com/LingyvKong/OneChart), [Slow Perception](https://github.com/Ucas-HaoranWei/Slow-Perception) for their valuable models and ideas. + +We also appreciate the benchmarks: [Fox](https://github.com/ucaslcl/Fox), [OminiDocBench](https://github.com/opendatalab/OmniDocBench). + + +## Citation +Coming soon! diff --git a/assets/fig1.png b/assets/fig1.png new file mode 100644 index 0000000..723836e Binary files /dev/null and b/assets/fig1.png differ diff --git a/assets/show1.jpg b/assets/show1.jpg new file mode 100644 index 0000000..06c7b12 Binary files /dev/null and b/assets/show1.jpg differ diff --git a/assets/show2.jpg b/assets/show2.jpg new file mode 100644 index 0000000..75759db Binary files /dev/null and b/assets/show2.jpg differ diff --git a/assets/show3.jpg b/assets/show3.jpg new file mode 100644 index 0000000..b8607ee Binary files /dev/null and b/assets/show3.jpg differ diff --git a/assets/show4.jpg b/assets/show4.jpg new file mode 100644 index 0000000..aa214a8 Binary files /dev/null and b/assets/show4.jpg differ diff --git a/config.json b/config.json new file mode 100644 index 0000000..0bc764c --- /dev/null +++ b/config.json @@ -0,0 +1,118 @@ +{ + "_name_or_path": "deepseek-ai/DeepSeek-OCR", + "candidate_resolutions": [ + [ + 1024, + 1024 + ] + ], + "global_view_pos": "head", + "architectures": [ + "DeepseekOCRForCausalLM" + ], + "auto_map": { + "AutoConfig": "modeling_deepseekocr.DeepseekOCRConfig", + "AutoModel": "modeling_deepseekocr.DeepseekOCRForCausalLM" + }, + "language_config": { + "architectures": [ + "DeepseekV2ForCausalLM" + ], + "auto_map": { + "AutoConfig": "configuration_deepseekv2.DeepseekV2Config", + "AutoModel": "modeling_deepseek.DeepseekV2Model", + "AutoModelForCausalLM": "modeling_deepseek.DeepseekV2ForCausalLM" + }, + "bos_token_id": 0, + "eos_token_id": 1, + "first_k_dense_replace": 1, + "hidden_size": 1280, + "intermediate_size": 6848, + "kv_lora_rank": null, + "lm_head": true, + "max_position_embeddings": 8192, + "moe_intermediate_size": 896, + "n_group": 1, + "n_routed_experts": 64, + "n_shared_experts": 2, + "num_attention_heads": 10, + "num_experts_per_tok": 6, + "num_hidden_layers": 12, + "num_key_value_heads": 10, + "q_lora_rank": null, + "qk_nope_head_dim": 0, + "qk_rope_head_dim": 0, + "rm_head": false, + "topk_group": 1, + "topk_method": "greedy", + "torch_dtype": "bfloat16", + "use_mla": false, + "v_head_dim": 0, + "vocab_size": 129280 + }, + "model_type": "deepseek_vl_v2", + "projector_config": { + "input_dim": 2048, + "model_type": "mlp_projector", + "n_embed": 1280, + "projector_type": "linear" + }, + "tile_tag": "2D", + "torch_dtype": "bfloat16", + "transformers_version": "4.46.3", + "vision_config": { + "image_size": 1024, + "mlp_ratio": 3.7362, + "model_name": "deeplip_b_l", + "model_type": "vision", + "width": { + "clip-l-14-224": { + "heads": 16, + "image_size": 224, + "layers": 24, + "patch_size": 14, + "width": 1024 + }, + "sam_vit_b": { + "downsample_channels": [ + 512, + 1024 + ], + "global_attn_indexes": [ + 2, + 5, + 8, + 11 + ], + "heads": 12, + "layers": 12, + "width": 768 + } + } + }, + "bos_token_id": 0, + "eos_token_id": 1, + "first_k_dense_replace": 1, + "hidden_size": 1280, + "intermediate_size": 6848, + "kv_lora_rank": null, + "lm_head": true, + "max_position_embeddings": 8192, + "moe_intermediate_size": 896, + "n_group": 1, + "n_routed_experts": 64, + "n_shared_experts": 2, + "num_attention_heads": 10, + "num_experts_per_tok": 6, + "num_hidden_layers": 12, + "num_key_value_heads": 10, + "q_lora_rank": null, + "qk_nope_head_dim": 0, + "qk_rope_head_dim": 0, + "rm_head": false, + "topk_group": 1, + "topk_method": "greedy", + "use_mla": false, + "v_head_dim": 0, + "vocab_size": 129280 +} \ No newline at end of file diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..4aef15d --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "image-text-to-text", "allow_remote": true} \ No newline at end of file diff --git a/configuration_deepseek_v2.py b/configuration_deepseek_v2.py new file mode 100644 index 0000000..a8622c2 --- /dev/null +++ b/configuration_deepseek_v2.py @@ -0,0 +1,210 @@ +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + +logger = logging.get_logger(__name__) + +DEEPSEEK_PRETRAINED_CONFIG_ARCHIVE_MAP = {} +class DeepseekV2Config(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`DeepseekV2Model`]. It is used to instantiate an DeepSeek + model according to the specified arguments, defining the model architecture. Instantiating a configuration with the + defaults will yield a similar configuration to that of the DeepSeek-V2 with multi-latent attention. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + + Args: + vocab_size (`int`, *optional*, defaults to 102400): + Vocabulary size of the Deep model. Defines the number of different tokens that can be represented by the + `inputs_ids` passed when calling [`DeepseekV2Model`] + hidden_size (`int`, *optional*, defaults to 4096): + Dimension of the hidden representations. + intermediate_size (`int`, *optional*, defaults to 11008): + Dimension of the MLP representations. + moe_intermediate_size (`int`, *optional*, defaults to 1407): + Dimension of the MoE representations. + num_hidden_layers (`int`, *optional*, defaults to 32): + Number of hidden layers in the Transformer decoder. + num_attention_heads (`int`, *optional*, defaults to 32): + Number of attention heads for each attention layer in the Transformer decoder. + n_shared_experts (`int`, *optional*, defaults to None): + Number of shared experts, None means dense model. + n_routed_experts (`int`, *optional*, defaults to None): + Number of routed experts, None means dense model. + routed_scaling_factor (`float`, *optional*, defaults to 1.0): + Scaling factor or routed experts. + topk_method (`str`, *optional*, defaults to `gready`): + Topk method used in routed gate. + n_group (`int`, *optional*, defaults to None): + Number of groups for routed experts. + topk_group (`int`, *optional*, defaults to None): + Number of selected groups for each token(for each token, ensuring the selected experts is only within `topk_group` groups). + num_experts_per_tok (`int`, *optional*, defaults to None): + Number of selected experts, None means dense model. + moe_layer_freq (`int`, *optional*, defaults to 1): + The frequency of the MoE layer: one expert layer for every `moe_layer_freq - 1` dense layers. + first_k_dense_replace (`int`, *optional*, defaults to 0): + Number of dense layers in shallow layers(embed->dense->dense->...->dense->moe->moe...->lm_head). + \--k dense layers--/ + norm_topk_prob (`bool`, *optional*, defaults to False): + Whether to normalize the weights of the routed experts. + scoring_func (`str`, *optional*, defaults to 'softmax'): + Method of computing expert weights. + aux_loss_alpha (`float`, *optional*, defaults to 0.001): + Auxiliary loss weight coefficient. + seq_aux = (`bool`, *optional*, defaults to True): + Whether to compute the auxiliary loss for each individual sample. + num_key_value_heads (`int`, *optional*): + This is the number of key_value heads that should be used to implement Grouped Query Attention. If + `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if + `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When + converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed + by meanpooling all the original heads within that group. For more details checkout [this + paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to + `num_attention_heads`. + hidden_act (`str` or `function`, *optional*, defaults to `"silu"`): + The non-linear activation function (function or string) in the decoder. + max_position_embeddings (`int`, *optional*, defaults to 2048): + The maximum sequence length that this model might ever be used with. + initializer_range (`float`, *optional*, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + rms_norm_eps (`float`, *optional*, defaults to 1e-06): + The epsilon used by the rms normalization layers. + use_cache (`bool`, *optional*, defaults to `True`): + Whether or not the model should return the last key/values attentions (not used by all models). Only + relevant if `config.is_decoder=True`. + pad_token_id (`int`, *optional*): + Padding token id. + bos_token_id (`int`, *optional*, defaults to 1): + Beginning of stream token id. + eos_token_id (`int`, *optional*, defaults to 2): + End of stream token id. + pretraining_tp (`int`, *optional*, defaults to 1): + Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this + document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is + necessary to ensure exact reproducibility of the pretraining results. Please refer to [this + issue](https://github.com/pytorch/pytorch/issues/76232). + tie_word_embeddings (`bool`, *optional*, defaults to `False`): + Whether to tie weight embeddings + rope_theta (`float`, *optional*, defaults to 10000.0): + The base period of the RoPE embeddings. + rope_scaling (`Dict`, *optional*): + Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling + strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is + `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update + `max_position_embeddings` to the expected new maximum. + attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`): + Whether to use a bias in the query, key, value and output projection layers during self-attention. + attention_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for the attention probabilities. + use_mla (`bool`, *optional*, defaults to `True`): Use multi-latent attention or multi-head attention. If True, + the model will use multi-latent attention, otherwise, it will use multi-head attention. + + ```python + >>> from transformers import DeepseekV2Model, DeepseekV2Config + + >>> # Initializing a Deepseek-V2 style configuration + >>> configuration = DeepseekV2Config() + + >>> # Accessing the model configuration + >>> configuration = model.config + ```""" + + model_type = "deepseek_v2" + keys_to_ignore_at_inference = ["past_key_values"] + + def __init__( + self, + vocab_size=102400, + hidden_size=4096, + intermediate_size=11008, + moe_intermediate_size = 1407, + num_hidden_layers=30, + num_attention_heads=32, + num_key_value_heads=32, + n_shared_experts = None, + n_routed_experts = None, + ep_size = 1, + routed_scaling_factor = 1.0, + kv_lora_rank = 512, + q_lora_rank = 1536, + qk_rope_head_dim = 64, + v_head_dim = 128, + qk_nope_head_dim = 128, + topk_method = 'gready', + n_group = None, + topk_group = None, + num_experts_per_tok = None, + moe_layer_freq = 1, + first_k_dense_replace = 0, + norm_topk_prob = False, + scoring_func = 'softmax', + aux_loss_alpha = 0.001, + seq_aux = True, + hidden_act="silu", + max_position_embeddings=2048, + initializer_range=0.02, + rms_norm_eps=1e-6, + use_cache=True, + pad_token_id=None, + bos_token_id=100000, + eos_token_id=100001, + pretraining_tp=1, + tie_word_embeddings=False, + rope_theta=10000.0, + rope_scaling=None, + attention_bias=False, + attention_dropout=0.0, + use_mla=True, + **kwargs, + ): + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.moe_intermediate_size = moe_intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.n_shared_experts = n_shared_experts + self.n_routed_experts = n_routed_experts + self.ep_size = ep_size + self.routed_scaling_factor = routed_scaling_factor + self.kv_lora_rank = kv_lora_rank + self.q_lora_rank = q_lora_rank + self.qk_rope_head_dim = qk_rope_head_dim + self.v_head_dim = v_head_dim + self.qk_nope_head_dim = qk_nope_head_dim + self.topk_method = topk_method + self.n_group = n_group + self.topk_group = topk_group + self.num_experts_per_tok = num_experts_per_tok + self.moe_layer_freq = moe_layer_freq + self.first_k_dense_replace = first_k_dense_replace + self.norm_topk_prob = norm_topk_prob + self.scoring_func = scoring_func + self.aux_loss_alpha = aux_loss_alpha + self.seq_aux = seq_aux + # for backward compatibility + if num_key_value_heads is None: + num_key_value_heads = num_attention_heads + + self.num_key_value_heads = num_key_value_heads + self.hidden_act = hidden_act + self.initializer_range = initializer_range + self.rms_norm_eps = float(rms_norm_eps) + self.pretraining_tp = pretraining_tp + self.use_cache = use_cache + self.rope_theta = rope_theta + self.rope_scaling = rope_scaling + self.attention_bias = attention_bias + self.attention_dropout = attention_dropout + self.use_mla = use_mla + + super().__init__( + pad_token_id=pad_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + tie_word_embeddings=tie_word_embeddings, + **kwargs, + ) diff --git a/conversation.py b/conversation.py new file mode 100644 index 0000000..65c295e --- /dev/null +++ b/conversation.py @@ -0,0 +1,280 @@ +""" +From https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py +""" + +import dataclasses +from enum import IntEnum, auto +from typing import Any, Dict, List + + +class SeparatorStyle(IntEnum): + """Separator styles.""" + + DeepSeek = auto() + DeepSeekV2 = auto() + PLAIN = auto() + ALIGNMENT = auto() + + +@dataclasses.dataclass +class Conversation: + """A class that manages prompt templates and keeps all conversation history.""" + + # The name of this template + name: str + # The template of the system prompt + system_template: str = "{system_message}" + # The system message + system_message: str = "" + # The names of two roles + roles: List[str] = (("USER", "ASSISTANT"),) + # All messages. Each item is (role, message). + messages: List[List[str]] = () + # The number of few shot examples + offset: int = 0 + # The separator style and configurations + sep_style: SeparatorStyle = SeparatorStyle.DeepSeek + sep: str = "\n" + sep2: str = None + # Stop criteria (the default one is EOS token) + stop_str: str = None + # Stops generation if meeting any token in this list + stop_token_ids: List[int] = None + + def get_prompt(self) -> str: + """Get the prompt for generation.""" + system_prompt = self.system_template.format(system_message=self.system_message) + if self.sep_style == SeparatorStyle.DeepSeek: + seps = [self.sep, self.sep2] + if system_prompt == "" or system_prompt is None: + ret = "" + else: + ret = system_prompt + seps[0] + for i, (role, message) in enumerate(self.messages): + if message: + ret += role + ": " + message + seps[i % 2] + else: + ret += role + ":" + return ret + elif self.sep_style == SeparatorStyle.DeepSeekV2: + seps = [self.sep, self.sep2] + if system_prompt == "" or system_prompt is None: + ret = "" + else: + ret = system_prompt + seps[0] + for i, (role, message) in enumerate(self.messages): + if message: + if role == "User": + ret += "<|sft▁begin|>\n" + message + self.sep #<|sft▁begin|>User Input<|sft▁end|>\nResponse<|end▁of▁sentence|> + else: + ret += message + self.sep2 + else: + ret = ret + return ret + + elif self.sep_style == SeparatorStyle.PLAIN: + seps = [self.sep, self.sep2] + ret = "" + for i, (role, message) in enumerate(self.messages): + if message: + if type(message) is tuple: + message, _, _ = message + if i % 2 == 0: + ret += message + seps[i % 2] + else: + ret += message + seps[i % 2] + else: + ret += "" + return ret + elif self.sep_style == SeparatorStyle.ALIGNMENT: + seps = [self.sep, self.sep2] + ret = "" + for i, (role, message) in enumerate(self.messages): + if message: + if type(message) is tuple: + message, _, _ = message + if i % 2 == 0: + ret += '\n' + seps[i % 2] + else: + ret += message + seps[i % 2] + else: + ret += "" + return ret + else: + raise ValueError(f"Invalid style: {self.sep_style}") + + def set_system_message(self, system_message: str): + """Set the system message.""" + self.system_message = system_message + + def append_message(self, role: str, message: str): + """Append a new message.""" + self.messages.append([role, message]) + + def update_last_message(self, message: str): + """Update the last output. + + The last message is typically set to be None when constructing the prompt, + so we need to update it in-place after getting the response from a model. + """ + self.messages[-1][1] = message + + def reset_message(self): + """Reset a new message.""" + self.messages = [] + + def to_gradio_chatbot(self): + """Convert the conversation to gradio chatbot format.""" + ret = [] + for i, (role, msg) in enumerate(self.messages[self.offset :]): + if i % 2 == 0: + ret.append([msg, None]) + else: + ret[-1][-1] = msg + return ret + + def to_openai_api_messages(self): + """Convert the conversation to OpenAI chat completion format.""" + system_prompt = self.system_template.format(system_message=self.system_message) + ret = [{"role": "system", "content": system_prompt}] + + for i, (_, msg) in enumerate(self.messages[self.offset :]): + if i % 2 == 0: + ret.append({"role": "user", "content": msg}) + else: + if msg is not None: + ret.append({"role": "assistant", "content": msg}) + return ret + + def copy(self): + return Conversation( + name=self.name, + system_template=self.system_template, + system_message=self.system_message, + roles=self.roles, + messages=[[x, y] for x, y in self.messages], + offset=self.offset, + sep_style=self.sep_style, + sep=self.sep, + sep2=self.sep2, + stop_str=self.stop_str, + stop_token_ids=self.stop_token_ids, + ) + + def dict(self): + return { + "template_name": self.name, + "system_message": self.system_message, + "roles": self.roles, + "messages": self.messages, + "offset": self.offset, + } + + +# A global registry for all conversation templates +conv_templates: Dict[str, Conversation] = {} + + +def register_conv_template(template: Conversation, override: bool = False): + """Register a new conversation template.""" + if not override: + assert template.name not in conv_templates, f"{template.name} has been registered." + + conv_templates[template.name] = template + + +def get_conv_template(name: str) -> Conversation: + """Get a conversation template.""" + return conv_templates[name].copy() + + +register_conv_template( + Conversation( + name="deepseek", + system_template="{system_message}", + # system_message="You are a helpful assistant. Please answer truthfully and write out your " + # "thinking step by step to be sure you get the right answer.", + system_message="", + roles=("<|User|>", "<|Assistant|>"), + messages=(), + offset=0, + sep_style=SeparatorStyle.DeepSeek, + sep="\n\n", + sep2="<|end▁of▁sentence|>", + stop_token_ids=[100001], + stop_str=["User:", "<|end▁of▁sentence|>"] + ) +) +register_conv_template( + Conversation( + name="deepseekv2", + system_template="{system_message}", + # system_message="You are a helpful assistant. Please answer truthfully and write out your " + # "thinking step by step to be sure you get the right answer.", + system_message="", + roles=("<|User|>", "<|Assistant|>"), + messages=(), + offset=0, + sep_style=SeparatorStyle.DeepSeek, + sep="", + sep2="<|end▁of▁sentence|>", + stop_token_ids=[100001], + stop_str=["User:", "<|end▁of▁sentence|>"] + ) +) + + +register_conv_template( + Conversation( + name="plain", + system_template="", + system_message="", + roles=("", ""), + messages=(), + offset=0, + sep_style=SeparatorStyle.PLAIN, + sep="", + sep2="", + stop_token_ids=[100001], + stop_str=[''], + ) +) + + +register_conv_template( + Conversation( + name="alignment", + system_template="", + system_message="", + roles=("", ""), + messages=(), + offset=0, + sep_style=SeparatorStyle.ALIGNMENT, + sep="", + sep2="", + stop_token_ids=[100001], + stop_str=[''], + ) +) + + +if __name__ == "__main__": + print("deepseek template:") + conv = get_conv_template("deepseek") + conv.append_message(conv.roles[0], "Hello!") + conv.append_message(conv.roles[1], "Hi! This is Tony.") + conv.append_message(conv.roles[0], "Who are you?") + conv.append_message(conv.roles[1], "I am a helpful assistant.") + conv.append_message(conv.roles[0], "How are you?") + conv.append_message(conv.roles[1], None) + print(conv.get_prompt()) + + print("deepseekv2 template:") + conv = get_conv_template("deepseekv2") + conv.append_message(conv.roles[0], "Hello!") + conv.append_message(conv.roles[1], "Hi! This is Tony.") + conv.append_message(conv.roles[0], "Who are you?") + conv.append_message(conv.roles[1], "I am a helpful assistant.") + conv.append_message(conv.roles[0], "How are you?") + conv.append_message(conv.roles[1], None) + print(conv.get_prompt()) diff --git a/deepencoder.py b/deepencoder.py new file mode 100644 index 0000000..de1687d --- /dev/null +++ b/deepencoder.py @@ -0,0 +1,1058 @@ +import torch.nn as nn +import torch +import torch.nn.functional as F +import copy + +from contextlib import nullcontext +import math +from typing import Optional, Tuple +# from megatron.model import LayerNorm + +from einops import rearrange +from easydict import EasyDict as adict + + +from typing import Optional, Tuple, Type +from functools import partial + + + +class MlpProjector(nn.Module): + + def __init__(self, cfg): + + super().__init__() + + self.cfg = cfg + + if cfg.projector_type == "identity": + modules = nn.Identity() + + elif cfg.projector_type == "linear": + modules = nn.Linear(cfg.input_dim, cfg.n_embed) + + elif cfg.projector_type == "mlp_gelu": + mlp_depth = cfg.get("depth", 1) + modules = [nn.Linear(cfg.input_dim, cfg.n_embed)] + for _ in range(1, mlp_depth): + modules.append(nn.GELU()) + modules.append(nn.Linear(cfg.n_embed, cfg.n_embed)) + modules = nn.Sequential(*modules) + + elif cfg.projector_type == "normlayer_downsample_mlp_gelu": + mlp_depth = cfg.get("depth", 1) + mlp_ratio = cfg.get("mlp_ratio", 1) + modules = [ + nn.LayerNorm(cfg.input_dim * cfg.downsample_ratio * cfg.downsample_ratio), + nn.Linear(cfg.input_dim * cfg.downsample_ratio * cfg.downsample_ratio, cfg.n_embed * mlp_ratio) + ] + for _ in range(1, mlp_depth - 1): + modules.append(nn.GELU()) + modules.append(nn.Linear(cfg.n_embed * mlp_ratio, cfg.n_embed * mlp_ratio)) + modules.append(nn.GELU()) + modules.append(nn.Linear(cfg.n_embed * mlp_ratio, cfg.n_embed)) + modules = nn.Sequential(*modules) + + elif cfg.projector_type == "downsample_mlp_gelu": + mlp_depth = cfg.get("depth", 1) + mlp_ratio = cfg.get("mlp_ratio", 1) + modules = [nn.Linear(cfg.input_dim * cfg.downsample_ratio * cfg.downsample_ratio, cfg.n_embed * mlp_ratio)] + for _ in range(1, mlp_depth - 1): + modules.append(nn.GELU()) + modules.append(nn.Linear(cfg.n_embed * mlp_ratio, cfg.n_embed * mlp_ratio)) + modules.append(nn.GELU()) + modules.append(nn.Linear(cfg.n_embed * mlp_ratio, cfg.n_embed)) + modules = nn.Sequential(*modules) + + elif cfg.projector_type == "low_high_hybrid_split_mlp_gelu": + mlp_depth = cfg.get("depth", 1) + self.high_up_proj = nn.Linear(cfg.input_dim, cfg.n_embed // 2) + self.low_up_proj = nn.Linear(cfg.input_dim, cfg.n_embed // 2) + + modules = [] + for _ in range(1, mlp_depth): + modules.append(nn.GELU()) + modules.append(nn.Linear(cfg.n_embed, cfg.n_embed)) + modules = nn.Sequential(*modules) + + elif cfg.projector_type == "hybrid_split_feature_mlp_gelu": + mlp_depth = cfg.get("depth", 1) + channel_div = cfg.get("channel_div", 0.5) + self.high_up_proj = nn.Linear(cfg.input_dim[0], int(cfg.n_embed * channel_div)) + self.low_up_proj = nn.Linear(cfg.input_dim[1], cfg.n_embed - int(cfg.n_embed * channel_div)) + + modules = [] + for _ in range(1, mlp_depth): + modules.append(nn.GELU()) + modules.append(nn.Linear(cfg.n_embed, cfg.n_embed)) + modules = nn.Sequential(*modules) + + elif cfg.projector_type == "low_high_split_mlp_gelu": + mlp_depth = cfg.get("depth", 1) + modules = [] + for _ in range(1, mlp_depth): + modules.append(nn.GELU()) + modules.append(nn.Linear(cfg.n_embed // 2, cfg.n_embed // 2)) + modules = nn.Sequential(*modules) + self.high_layers = nn.Sequential(*modules) + self.low_layers = copy.deepcopy(modules) + + else: + raise ValueError(f"Unknown projector type: {cfg.projector_type}") + + if cfg.get("token_pooling", False): + self.token_pooling_layer = nn.Linear(cfg.input_dim * 4, cfg.input_dim) + + if cfg.get("conv_fusion_high_low_features", False): + self.fusion_layer = nn.Linear(cfg.input_dim, cfg.input_dim) + self.layers = modules + + def forward(self, x): + if self.cfg.get("token_pooling", False): + batch_size, wxh, channels = x.shape + w = h = int(wxh**0.5) + x = x.view(batch_size, w, h, channels) + x = x.permute(0, 3, 1, 2) + # import ipdb; ipdb.set_trace() + patches = x.unfold(2, 2, 2).unfold(3, 2, 2) + batch_size, channels, h_patches, w_patches, _, _ = patches.size() + # åœØé€šé“ē»“åŗ¦äøŠę‹¼ęŽ„ + patches = patches.contiguous().view(batch_size, channels, h_patches * w_patches, -1) + + # é€ščæ‡ēŗæę€§å±‚ + patches = patches.permute(0, 2, 1, 3).contiguous() + patches = patches.view(batch_size, h_patches * w_patches, channels * 4) + + x = self.token_pooling_layer(patches) + + if self.cfg.get("conv_fusion_high_low_features", False): + x = self.fusion_layer(x[:, 0]) + x[:, 1] + + if self.cfg.projector_type == 'low_high_hybrid_split_mlp_gelu': + high_x, low_x = x[0], x[1] + high_x = self.high_up_proj(high_x) + low_x = self.low_up_proj(low_x) + x = torch.concat([high_x, low_x], dim=-1) + + if self.cfg.projector_type == 'hybrid_split_feature_mlp_gelu': + high_x = x[...,:self.cfg.input_dim[0]] + low_x = x[...,self.cfg.input_dim[0]:] + high_x = self.high_up_proj(high_x) + low_x = self.low_up_proj(low_x) + x = torch.concat([high_x, low_x], dim=-1) + + if self.cfg.projector_type == 'low_high_split_mlp_gelu': + high_x, low_x = x[0], x[1] + high_x = self.high_layers(high_x) + low_x = self.low_layers(low_x) + x = torch.concat([high_x, low_x], dim=-1) + return x + + if self.cfg.projector_type == 'downsample_mlp_gelu' or self.cfg.projector_type == 'normlayer_downsample_mlp_gelu': + bs, hw, input_dim = x.shape + h = w = int((hw) ** 0.5) + + """compute padding""" + if h % self.cfg.downsample_ratio: + pad = self.cfg.downsample_ratio - h % self.cfg.downsample_ratio + else: + pad = 0 + x = x.reshape(bs, h, w, input_dim) + if pad > 0: + x = F.pad(x, (0, 0, 0, pad, 0, pad), "constant", 0) + + """4 to 1 concat""" + x = x.permute(0, 3, 1, 2) # B, C, H, W + x = F.unfold(x, kernel_size=self.cfg.downsample_ratio, stride=self.cfg.downsample_ratio, padding=0) # B, C*4, HW // 4 + x = x.permute(0, 2, 1) + + return self.layers(x) + + @staticmethod + def get_flops_per_sample(cfg): + if cfg.projector_type == "linear": + fwd = 2 * cfg.input_dim * cfg.n_embed + + elif "mlp_gelu" in cfg.projector_type : + mlp_depth = cfg.get("depth", 1) + downsample_ratio = cfg.get("downsample_ratio", 1) + input_dim = sum(cfg.input_dim) if isinstance(cfg.input_dim, list) else cfg.input_dim + input_dim = input_dim * downsample_ratio * downsample_ratio + fwd = 2 * input_dim * cfg.n_embed + (mlp_depth - 1) * 2 * cfg.n_embed * cfg.n_embed + else: + fwd = 0 + + return fwd * 3 + + +#===================clip============================================================ + +class LayerNormfp32(torch.nn.LayerNorm): + """Subclass torch's LayerNorm to handle fp16.""" + + def forward(self, x: torch.Tensor): + orig_type = x.dtype + ret = super().forward(x.type(torch.float32)) + return ret.type(orig_type) + + +def get_abs_pos(abs_pos, tgt_size): + # abs_pos: L, C + # tgt_size: M + # return: M, C + + # print(tgt_size) + # print(abs_pos.shape) + # exit() + dim = abs_pos.size(-1) + # print(dim) + abs_pos_new = abs_pos.squeeze(0) + cls_token, old_pos_embed = abs_pos_new[:1], abs_pos_new[1:] + + + + src_size = int(math.sqrt(abs_pos_new.shape[0] - 1)) + tgt_size = int(math.sqrt(tgt_size)) + dtype = abs_pos.dtype + + if src_size != tgt_size: + old_pos_embed = old_pos_embed.view(1, src_size, src_size, dim).permute(0, 3, 1, + 2).contiguous() + old_pos_embed = old_pos_embed.to(torch.float32) + new_pos_embed = F.interpolate( + old_pos_embed, + size=(tgt_size, tgt_size), + mode='bicubic', + antialias=True, + align_corners=False, + ).to(dtype) + new_pos_embed = new_pos_embed.permute(0, 2, 3, 1) + new_pos_embed = new_pos_embed.view(tgt_size * tgt_size, dim) + vision_pos_embed = torch.cat([cls_token, new_pos_embed], dim=0) + vision_pos_embed = vision_pos_embed.view(1, tgt_size * tgt_size + 1, dim) + return vision_pos_embed + else: + return abs_pos + +@torch.jit.script +def quick_gelu(x): + return x * torch.sigmoid(1.702 * x) + + + +class CLIPVisionEmbeddings(nn.Module): + def __init__(self, hidden_size=1024, image_size=224, patch_size=14, num_channels=3): + super().__init__() + self.embed_dim = hidden_size + self.image_size = image_size + self.patch_size = patch_size + + self.class_embedding = torch.nn.Parameter(torch.randn(self.embed_dim)) + + self.patch_embedding = torch.nn.Conv2d( + in_channels=num_channels, + out_channels=self.embed_dim, + kernel_size=self.patch_size, + stride=self.patch_size, + bias=False, + ) + + self.num_patches = (self.image_size // self.patch_size) ** 2 + self.num_positions = self.num_patches + 1 + self.position_embedding = torch.nn.Embedding(self.num_positions, self.embed_dim) + self.register_buffer( + "position_ids", torch.arange(self.num_positions).expand((1, -1)) + ) + + def forward(self, pixel_values, patch_embeds): + batch_size = pixel_values.shape[0] + # patch_embeds = self.patch_embedding( + # pixel_values + # ) # shape = [*, width, grid, grid] + + + if patch_embeds is not None: + patch_embeds = patch_embeds + # print(patch_embeds.shape) + else: + patch_embeds = self.patch_embedding(pixel_values) + # print(111111) + # shape = [*, width, grid, grid] + # patch_embeds = patch_embeds.flatten(2).transpose(1, 2) + + patch_embeds = patch_embeds.flatten(2).transpose(1, 2) + + + class_embeds = self.class_embedding.expand(batch_size, 1, -1) + embeddings = torch.cat([class_embeds, patch_embeds], dim=1) + + # x = torch.cat([cls_token, x], dim=1) + embeddings = embeddings + get_abs_pos(self.position_embedding(self.position_ids), embeddings.size(1)) + # embeddings = embeddings + self.position_embedding(self.position_ids) + return embeddings + + +class NoTPFeedForward(nn.Module): + def __init__( + self, + cfg, + dim: int, + hidden_dim: int, + ): + super().__init__() + + self.fc1 = torch.nn.Linear(dim, hidden_dim, bias=True) + self.fc2 = torch.nn.Linear(hidden_dim, dim, bias=True) + + def forward(self, x): + output = self.fc2(quick_gelu(self.fc1(x))) + return output + + + + +class NoTPAttention(torch.nn.Module): + def __init__(self, cfg): + super().__init__() + self.num_heads = cfg.num_attention_heads + self.n_local_heads = cfg.num_attention_heads + self.head_dim = cfg.hidden_size // cfg.num_attention_heads + self.max_seq_len = cfg.seq_length + self.use_flash_attention = cfg.use_flash_attn + + self.qkv_proj = torch.nn.Linear(cfg.hidden_size, cfg.hidden_size * 3, bias=True) + self.out_proj = torch.nn.Linear(cfg.hidden_size, cfg.hidden_size, bias=True) + + # self.core_attention = CoreAttention(cfg, AttnType.self_attn) + + self.attn_drop = cfg.attention_dropout + + def forward( + self, + x: torch.Tensor, + ): + bsz, seqlen, _ = x.shape + xqkv = self.qkv_proj(x) + xqkv = xqkv.view(bsz, seqlen, 3, self.num_heads, self.head_dim) + + if self.use_flash_attention: + + xq, xk, xv = torch.split(xqkv, 1, dim=2) + xq = xq.squeeze(2) + xk = xk.squeeze(2) + xv = xv.squeeze(2) + # xq, xk, xv = xqkv[:, :, 0, ...], xqkv[:, :, 1, ...], xqkv[:, :, 2, ...] + + # (B, num_head, S, head_size) + xq = xq.permute(0, 2, 1, 3) + xk = xk.permute(0, 2, 1, 3) + xv = xv.permute(0, 2, 1, 3) + # with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False): + output = torch.nn.functional.scaled_dot_product_attention(xq, xk, xv, attn_mask=None) + output = output.permute(0, 2, 1, 3).reshape(bsz, seqlen, -1) + # output = output.permute(0, 2, 1, 3).contiguous().view(bsz, seqlen, -1) + else: + # print(22222) + xq, xk, xv = torch.split(xqkv, 1, dim=2) + xq = xq.squeeze(2) + xk = xk.squeeze(2) + xv = xv.squeeze(2) + # xq, xk, xv = xqkv[:, :, 0, ...], xqkv[:, :, 1, ...], xqkv[:, :, 2, ...] + + # (B, num_head, S, head_size) + xq = xq.permute(0, 2, 1, 3) + xk = xk.permute(0, 2, 1, 3) + xv = xv.permute(0, 2, 1, 3) + # with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False): + output = torch.nn.functional.scaled_dot_product_attention(xq, xk, xv, attn_mask=None) + output = output.permute(0, 2, 1, 3).reshape(bsz, seqlen, -1) + # output = output.permute(0, 2, 1, 3).contiguous().view(bsz, seqlen, -1) + output = self.out_proj(output) + return output + +class NoTPTransformerBlock(nn.Module): + def __init__(self, cfg, layer_id: int, multiple_of=256): + super().__init__() + + self.n_heads = cfg.num_attention_heads + self.dim = cfg.hidden_size + self.head_dim = cfg.hidden_size // cfg.num_attention_heads + self.self_attn = NoTPAttention(cfg) + self.mlp = NoTPFeedForward( + cfg, dim=cfg.hidden_size, hidden_dim=cfg.ffn_hidden_size + ) + self.layer_id = layer_id + self.layer_norm1 = torch.nn.LayerNorm( + cfg.hidden_size, eps=cfg.layernorm_epsilon + ) + self.layer_norm2 = torch.nn.LayerNorm( + cfg.hidden_size, eps=cfg.layernorm_epsilon + ) + + def forward(self, x: torch.Tensor): + residual = self.self_attn.forward(self.layer_norm1(x)) + h = x + residual + out = h + self.mlp.forward(self.layer_norm2(h)) + return out + + +class NoTPTransformer(nn.Module): + def __init__(self, cfg): + super().__init__() + + self.cfg = cfg + # self.recompute_list = self.cfg.get("recompute_list", []) + self.num_layers = cfg.num_layers # _get_num_layers(cfg) + + self.layers = torch.nn.ModuleList() + for layer_id in range(self.num_layers): + self.layers.append( + NoTPTransformerBlock( + cfg, + layer_id + 1, + ) + ) + + def forward( + self, + hidden_states, + ): + + for lid, layer in enumerate(self.layers): + # if lid in self.recompute_list: + # def custom(layer_id): + # def custom_forward(*args, **kwargs): + # x_ = self.layers[layer_id](*args, **kwargs) + # return x_ + + # return custom_forward + + # assert hidden_states.requires_grad == True, logger.warning( + # "When using recalculation, the input must have grad fn" + # ) + # hidden_states = tensor_parallel.checkpoint( + # custom(lid), + # False, + # hidden_states.contiguous() + # ) + # else: + hidden_states = layer(hidden_states) + + return hidden_states + + +# from megatron.core.tensor_parallel.layers import non_tensor_paralleled, local_dp_reduce, local_dp_scatter + +class VitModel(nn.Module): + def __init__( + self, + cfg, + freeze_embed=False, + freeze_pre_norm=False + ) -> None: + super().__init__() + + self.embeddings = CLIPVisionEmbeddings(hidden_size=cfg.hidden_size, image_size=cfg.image_size, patch_size=cfg.patch_size) + + if freeze_embed: + for name, param in self.embeddings.named_parameters(): + param.requires_grad = False + + self.transformer = NoTPTransformer(cfg=cfg) + + if cfg.get("fp32norm", False): + logger.info("Load fp32 layernorm for ViT.") + self.pre_layrnorm = LayerNormfp32( + cfg.hidden_size, + eps=cfg.get("pre_layernorm_epsilon", 1e-5), + ) + else: + self.pre_layrnorm = torch.nn.LayerNorm( + cfg.hidden_size, + eps=cfg.get("pre_layernorm_epsilon", 1e-5), + ) + + # self.pre_layrnorm = RMSNorm( + # cfg.hidden_size, + # eps=cfg.get("pre_layernorm_epsilon", 1e-5), + # sequence_parallel=False, + # use_fp32=True, + # use_optimus=True, + # ) + + if freeze_pre_norm: + for name, param in self.pre_layrnorm.named_parameters(): + param.requires_grad = False + + for p in self.parameters(): + p.micro_dp = True + + def set_input_tensor(self, input_tensor): + if not isinstance(input_tensor, list): + input_tensor = [input_tensor] + self.transformer.set_input_tensor(input_tensor[0]) + + def __str__(self) -> str: + return "open_clip" + + def forward( + self, + x, + patch_embeds + ): + x = self.embeddings(x, patch_embeds) + hidden_states = self.pre_layrnorm(x) + + # hidden_states, dis = local_dp_scatter(hidden_states) + output = self.transformer(hidden_states) + + # output = local_dp_reduce(output, dis) + + return output + + +vit_model_cfg = adict( + num_layers=24, + hidden_size=1024, + num_heads = 16, + num_attention_heads=16, + ffn_hidden_size=4096, + seq_length=256, + max_position_embeddings=256, + use_flash_attn=False, + understand_projector_stride=2, + hidden_dropout = 0.0, + attention_dropout = 0.0, + no_persist_layer_norm = False, + layernorm_epsilon = 1e-5, + pre_layernorm_epsilon = 1e-5, + image_size = 224, + patch_size = 14, + recompute_list = [] +) + +def build_clip_l(): + return VitModel( + cfg=vit_model_cfg, + freeze_embed=False, + freeze_pre_norm=False, + ) + + + + + +#=========================Sam-Vary================================= + + +def get_abs_pos_sam(abs_pos, tgt_size): + + dtype = abs_pos.dtype + + src_size = abs_pos.size(1) + + if src_size != tgt_size: + old_pos_embed = abs_pos.permute(0, 3, 1, 2) + old_pos_embed = old_pos_embed.to(torch.float32) + new_pos_embed = F.interpolate( + old_pos_embed, + size=(tgt_size, tgt_size), + mode='bicubic', + antialias=True, + align_corners=False, + ).to(dtype) + new_pos_embed = new_pos_embed.permute(0, 2, 3, 1) + return new_pos_embed + else: + return abs_pos + + + + +class MLPBlock(nn.Module): + def __init__( + self, + embedding_dim: int, + mlp_dim: int, + act: Type[nn.Module] = nn.GELU, + ) -> None: + super().__init__() + self.lin1 = nn.Linear(embedding_dim, mlp_dim) + self.lin2 = nn.Linear(mlp_dim, embedding_dim) + self.act = act() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.lin2(self.act(self.lin1(x))) + + +# From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa +# Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa +class LayerNorm2d(nn.Module): + def __init__(self, num_channels: int, eps: float = 1e-6) -> None: + super().__init__() + self.weight = nn.Parameter(torch.ones(num_channels)) + self.bias = nn.Parameter(torch.zeros(num_channels)) + self.eps = eps + + def forward(self, x: torch.Tensor) -> torch.Tensor: + u = x.mean(1, keepdim=True) + s = (x - u).pow(2).mean(1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.eps) + x = self.weight[:, None, None] * x + self.bias[:, None, None] + return x + + +# This class and its supporting functions below lightly adapted from the ViTDet backbone available at: https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/backbone/vit.py # noqa +class ImageEncoderViT(nn.Module): + def __init__( + self, + img_size: int = 1024, + patch_size: int = 16, + in_chans: int = 3, + embed_dim: int = 768, + depth: int = 12, + num_heads: int = 12, + mlp_ratio: float = 4.0, + out_chans: int = 256, + qkv_bias: bool = True, + norm_layer: Type[nn.Module] = nn.LayerNorm, + act_layer: Type[nn.Module] = nn.GELU, + use_abs_pos: bool = True, + use_rel_pos: bool = False, + rel_pos_zero_init: bool = True, + window_size: int = 0, + global_attn_indexes: Tuple[int, ...] = (), + ) -> None: + """ + Args: + img_size (int): Input image size. + patch_size (int): Patch size. + in_chans (int): Number of input image channels. + embed_dim (int): Patch embedding dimension. + depth (int): Depth of ViT. + num_heads (int): Number of attention heads in each ViT block. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool): If True, add a learnable bias to query, key, value. + norm_layer (nn.Module): Normalization layer. + act_layer (nn.Module): Activation layer. + use_abs_pos (bool): If True, use absolute positional embeddings. + use_rel_pos (bool): If True, add relative positional embeddings to the attention map. + rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. + window_size (int): Window size for window attention blocks. + global_attn_indexes (list): Indexes for blocks using global attention. + """ + super().__init__() + self.img_size = img_size + + self.patch_embed = PatchEmbed( + kernel_size=(patch_size, patch_size), + stride=(patch_size, patch_size), + in_chans=in_chans, + embed_dim=embed_dim, + ) + + self.pos_embed: Optional[nn.Parameter] = None + if use_abs_pos: + # Initialize absolute positional embedding with pretrain image size. + self.pos_embed = nn.Parameter( + torch.zeros(1, img_size // patch_size, img_size // patch_size, embed_dim) + ) + + self.blocks = nn.ModuleList() + for i in range(depth): + block = Block( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + norm_layer=norm_layer, + act_layer=act_layer, + use_rel_pos=use_rel_pos, + rel_pos_zero_init=rel_pos_zero_init, + window_size=window_size if i not in global_attn_indexes else 0, + input_size=(img_size // patch_size, img_size // patch_size), + ) + self.blocks.append(block) + + self.neck = nn.Sequential( + nn.Conv2d( + embed_dim, + out_chans, + kernel_size=1, + bias=False, + ), + LayerNorm2d(out_chans), + nn.Conv2d( + out_chans, + out_chans, + kernel_size=3, + padding=1, + bias=False, + ), + LayerNorm2d(out_chans), + ) + + self.net_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1, bias=False) + self.net_3 = nn.Conv2d(512, 1024, kernel_size=3, stride=2, padding=1, bias=False) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.patch_embed(x) + if self.pos_embed is not None: + # x = x + self.pos_embed + x = x + get_abs_pos_sam(self.pos_embed, x.size(1)) + + for blk in self.blocks: + x = blk(x) + + x = self.neck(x.permute(0, 3, 1, 2)) + x2 = self.net_2(x) + x3 = self.net_3(x2.clone()) + + return x3 + + +class Block(nn.Module): + """Transformer blocks with support of window attention and residual propagation blocks""" + + def __init__( + self, + dim: int, + num_heads: int, + mlp_ratio: float = 4.0, + qkv_bias: bool = True, + norm_layer: Type[nn.Module] = nn.LayerNorm, + act_layer: Type[nn.Module] = nn.GELU, + use_rel_pos: bool = False, + rel_pos_zero_init: bool = True, + window_size: int = 0, + input_size: Optional[Tuple[int, int]] = None, + ) -> None: + """ + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads in each ViT block. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool): If True, add a learnable bias to query, key, value. + norm_layer (nn.Module): Normalization layer. + act_layer (nn.Module): Activation layer. + use_rel_pos (bool): If True, add relative positional embeddings to the attention map. + rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. + window_size (int): Window size for window attention blocks. If it equals 0, then + use global attention. + input_size (tuple(int, int) or None): Input resolution for calculating the relative + positional parameter size. + """ + super().__init__() + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + use_rel_pos=use_rel_pos, + rel_pos_zero_init=rel_pos_zero_init, + input_size=input_size if window_size == 0 else (window_size, window_size), + ) + + self.norm2 = norm_layer(dim) + self.mlp = MLPBlock(embedding_dim=dim, mlp_dim=int(dim * mlp_ratio), act=act_layer) + + self.window_size = window_size + + def forward(self, x: torch.Tensor) -> torch.Tensor: + shortcut = x + x = self.norm1(x) + # Window partition + if self.window_size > 0: + H, W = x.shape[1], x.shape[2] + x, pad_hw = window_partition(x, self.window_size) + + x = self.attn(x) + # Reverse window partition + if self.window_size > 0: + x = window_unpartition(x, self.window_size, pad_hw, (H, W)) + + x = shortcut + x + x = x + self.mlp(self.norm2(x)) + + return x + + +class Attention(nn.Module): + """Multi-head Attention block with relative position embeddings.""" + + def __init__( + self, + dim: int, + num_heads: int = 8, + qkv_bias: bool = True, + use_rel_pos: bool = False, + rel_pos_zero_init: bool = True, + input_size: Optional[Tuple[int, int]] = None, + ) -> None: + """ + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads. + qkv_bias (bool): If True, add a learnable bias to query, key, value. + rel_pos (bool): If True, add relative positional embeddings to the attention map. + rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. + input_size (tuple(int, int) or None): Input resolution for calculating the relative + positional parameter size. + """ + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = head_dim**-0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.proj = nn.Linear(dim, dim) + + self.use_rel_pos = use_rel_pos + if self.use_rel_pos: + assert ( + input_size is not None + ), "Input size must be provided if using relative positional encoding." + # initialize relative positional embeddings + self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim)) + self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + B, H, W, _ = x.shape + # qkv with shape (3, B, nHead, H * W, C) + qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + # q, k, v with shape (B * nHead, H * W, C) + q, k, v = qkv.reshape(3, B * self.num_heads, H * W, -1).unbind(0) + + rel_h, rel_w = None, None + if self.use_rel_pos: + rel_h, rel_w = add_decomposed_rel_pos(q, self.rel_pos_h, self.rel_pos_w, (H, W), (H, W)) + + q = q.view(B, self.num_heads, H * W, -1) + k = k.view(B, self.num_heads, H * W, -1) + v = v.view(B, self.num_heads, H * W, -1) + + if self.use_rel_pos: + rel_h = rel_h.view(B, self.num_heads, rel_h.size(1), rel_h.size(2), rel_h.size(3)) + rel_w = rel_w.view(B, self.num_heads, rel_w.size(1), rel_w.size(2), rel_w.size(3)) + attn_bias = (rel_h + rel_w).view(B, self.num_heads, rel_h.size(2), rel_h.size(3) * rel_w.size(4)) + x = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=attn_bias) + # x = _attention_rel_h_rel_w(q, k, v, rel_h, rel_w) + else: + x = torch.nn.functional.scaled_dot_product_attention(q, k, v) + + x = x.view(B, self.num_heads, H, W, -1).permute(0, 2, 3, 1, 4).reshape(B, H, W, -1) + + x = self.proj(x) + + return x + + +def window_partition(x: torch.Tensor, window_size: int) -> Tuple[torch.Tensor, Tuple[int, int]]: + """ + Partition into non-overlapping windows with padding if needed. + Args: + x (tensor): input tokens with [B, H, W, C]. + window_size (int): window size. + + Returns: + windows: windows after partition with [B * num_windows, window_size, window_size, C]. + (Hp, Wp): padded height and width before partition + """ + B, H, W, C = x.shape + + pad_h = (window_size - H % window_size) % window_size + pad_w = (window_size - W % window_size) % window_size + if pad_h > 0 or pad_w > 0: + x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h)) + Hp, Wp = H + pad_h, W + pad_w + + x = x.view(B, Hp // window_size, window_size, Wp // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows, (Hp, Wp) + + +def window_unpartition( + windows: torch.Tensor, window_size: int, pad_hw: Tuple[int, int], hw: Tuple[int, int] +) -> torch.Tensor: + """ + Window unpartition into original sequences and removing padding. + Args: + windows (tensor): input tokens with [B * num_windows, window_size, window_size, C]. + window_size (int): window size. + pad_hw (Tuple): padded height and width (Hp, Wp). + hw (Tuple): original height and width (H, W) before padding. + + Returns: + x: unpartitioned sequences with [B, H, W, C]. + """ + Hp, Wp = pad_hw + H, W = hw + B = windows.shape[0] // (Hp * Wp // window_size // window_size) + x = windows.view(B, Hp // window_size, Wp // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, Hp, Wp, -1) + + if Hp > H or Wp > W: + x = x[:, :H, :W, :].contiguous() + return x + + +def get_rel_pos(q_size: int, k_size: int, rel_pos: torch.Tensor) -> torch.Tensor: + """ + Get relative positional embeddings according to the relative positions of + query and key sizes. + Args: + q_size (int): size of query q. + k_size (int): size of key k. + rel_pos (Tensor): relative position embeddings (L, C). + + Returns: + Extracted positional embeddings according to relative positions. + """ + max_rel_dist = int(2 * max(q_size, k_size) - 1) + # Interpolate rel pos if needed. + if rel_pos.shape[0] != max_rel_dist: + # Interpolate rel pos. + dtype = rel_pos.dtype + rel_pos = rel_pos.to(torch.float32) + rel_pos_resized = F.interpolate( + rel_pos.reshape(1, rel_pos.shape[0], -1).permute(0, 2, 1), + size=max_rel_dist, + mode="linear", + ).to(dtype) + rel_pos_resized = rel_pos_resized.reshape(-1, max_rel_dist).permute(1, 0) + else: + rel_pos_resized = rel_pos + + # Scale the coords with short length if shapes for q and k are different. + q_coords = torch.arange(q_size, device=rel_pos.device)[:, None] * max(k_size / q_size, 1.0) + k_coords = torch.arange(k_size, device=rel_pos.device)[None, :] * max(q_size / k_size, 1.0) + relative_coords = (q_coords - k_coords) + (k_size - 1) * max(q_size / k_size, 1.0) + + return rel_pos_resized[relative_coords.long()] + + +def add_decomposed_rel_pos( + q: torch.Tensor, + rel_pos_h: torch.Tensor, + rel_pos_w: torch.Tensor, + q_size: Tuple[int, int], + k_size: Tuple[int, int], +) -> torch.Tensor: + """ + Calculate decomposed Relative Positional Embeddings from :paper:`mvitv2`. + https://github.com/facebookresearch/mvit/blob/19786631e330df9f3622e5402b4a419a263a2c80/mvit/models/attention.py # noqa B950 + Args: + q (Tensor): query q in the attention layer with shape (B, q_h * q_w, C). + rel_pos_h (Tensor): relative position embeddings (Lh, C) for height axis. + rel_pos_w (Tensor): relative position embeddings (Lw, C) for width axis. + q_size (Tuple): spatial sequence size of query q with (q_h, q_w). + k_size (Tuple): spatial sequence size of key k with (k_h, k_w). + + Returns: + attn (Tensor): attention map with added relative positional embeddings. + """ + q_h, q_w = q_size + k_h, k_w = k_size + Rh = get_rel_pos(q_h, k_h, rel_pos_h) + Rw = get_rel_pos(q_w, k_w, rel_pos_w) + + B, _, dim = q.shape + r_q = q.reshape(B, q_h, q_w, dim) + rel_h = torch.einsum("bhwc,hkc->bhwk", r_q, Rh) + rel_w = torch.einsum("bhwc,wkc->bhwk", r_q, Rw) + rel_h = rel_h.unsqueeze(-1) + rel_w = rel_w.unsqueeze(-2) + rel_h = rel_h.reshape(B, q_h * q_w, k_h, 1) + rel_w = rel_w.reshape(B, q_h * q_w, 1, k_w) + + return rel_h, rel_w + + +class PatchEmbed(nn.Module): + """ + Image to Patch Embedding. + """ + + def __init__( + self, + kernel_size: Tuple[int, int] = (16, 16), + stride: Tuple[int, int] = (16, 16), + padding: Tuple[int, int] = (0, 0), + in_chans: int = 3, + embed_dim: int = 768, + ) -> None: + """ + Args: + kernel_size (Tuple): kernel size of the projection layer. + stride (Tuple): stride of the projection layer. + padding (Tuple): padding size of the projection layer. + in_chans (int): Number of input image channels. + embed_dim (int): Patch embedding dimension. + """ + super().__init__() + + self.proj = nn.Conv2d( + in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.proj(x) + # B C H W -> B H W C + x = x.permute(0, 2, 3, 1) + return x + + +def build_sam_vit_b(checkpoint=None): + return _build_sam( + encoder_embed_dim=768, + encoder_depth=12, + encoder_num_heads=12, + encoder_global_attn_indexes=[2, 5, 8, 11], + checkpoint=checkpoint, + ) + +def build_sam_fast_vit_b(checkpoint=None, compile_mode='max-autotune', dtype=torch.bfloat16): + image_encoder = build_sam_vit_b(checkpoint).eval().to(dtype) + # sam = _apply_eval_dtype_sam(sam, dtype) + image_encoder = torch.compile(image_encoder, mode=compile_mode) + return image_encoder + + +def _build_sam( + encoder_embed_dim, + encoder_depth, + encoder_num_heads, + encoder_global_attn_indexes, + checkpoint=None, +): + prompt_embed_dim = 256 + image_size = 1024 + vit_patch_size = 16 + image_embedding_size = image_size // vit_patch_size + image_encoder=ImageEncoderViT( + depth=encoder_depth, + embed_dim=encoder_embed_dim, + img_size=image_size, + mlp_ratio=4, + norm_layer=partial(torch.nn.LayerNorm, eps=1e-6), + num_heads=encoder_num_heads, + patch_size=vit_patch_size, + qkv_bias=True, + use_rel_pos=True, + global_attn_indexes=encoder_global_attn_indexes, + window_size=14, + out_chans=prompt_embed_dim, + ) + image_encoder.eval() + if checkpoint is not None: + # with open(checkpoint, "rb") as f: + state_dict = torch.load(checkpoint) + # print(state_dict.keys()) + # for key in state_dict: + # image_encoder.load_state_dict({k[14:]: v for k, v in state_dict.items() if 'image_encoder' in k}, strict=False) + # ocr-anyting + # image_encoder.load_state_dict(state_dict, strict=True) + # tob + image_encoder.load_state_dict({k[30:]: v for k, v in state_dict.items() if 'vision_tower_high' in k}, strict=True) + print(checkpoint) + return image_encoder \ No newline at end of file diff --git a/model-00001-of-000001.safetensors b/model-00001-of-000001.safetensors new file mode 100644 index 0000000..a97d009 --- /dev/null +++ b/model-00001-of-000001.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1169e7cdc28ff2fb6186556acb2175db148ad26a62097df4c45a17e523180d3f +size 6672547120 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..76a9e99 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,2717 @@ +{ + "metadata": { + "total_size": 6672212480 + }, + "weight_map": { + "model.sam_model.pos_embed": "model-00001-of-000001.safetensors", + "model.sam_model.patch_embed.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.patch_embed.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.norm1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.norm1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.attn.rel_pos_h": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.attn.rel_pos_w": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.attn.qkv.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.attn.qkv.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.attn.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.attn.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.norm2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.norm2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.mlp.lin1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.mlp.lin1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.mlp.lin2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.0.mlp.lin2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.norm1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.norm1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.attn.rel_pos_h": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.attn.rel_pos_w": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.attn.qkv.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.attn.qkv.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.attn.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.attn.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.norm2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.norm2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.mlp.lin1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.mlp.lin1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.mlp.lin2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.1.mlp.lin2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.norm1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.norm1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.attn.rel_pos_h": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.attn.rel_pos_w": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.attn.qkv.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.attn.qkv.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.attn.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.attn.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.norm2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.norm2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.mlp.lin1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.mlp.lin1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.mlp.lin2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.2.mlp.lin2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.norm1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.norm1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.attn.rel_pos_h": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.attn.rel_pos_w": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.attn.qkv.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.attn.qkv.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.attn.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.attn.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.norm2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.norm2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.mlp.lin1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.mlp.lin1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.mlp.lin2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.3.mlp.lin2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.norm1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.norm1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.attn.rel_pos_h": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.attn.rel_pos_w": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.attn.qkv.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.attn.qkv.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.attn.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.attn.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.norm2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.norm2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.mlp.lin1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.mlp.lin1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.mlp.lin2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.4.mlp.lin2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.norm1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.norm1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.attn.rel_pos_h": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.attn.rel_pos_w": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.attn.qkv.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.attn.qkv.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.attn.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.attn.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.norm2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.norm2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.mlp.lin1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.mlp.lin1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.mlp.lin2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.5.mlp.lin2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.norm1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.norm1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.attn.rel_pos_h": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.attn.rel_pos_w": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.attn.qkv.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.attn.qkv.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.attn.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.attn.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.norm2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.norm2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.mlp.lin1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.mlp.lin1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.mlp.lin2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.6.mlp.lin2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.norm1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.norm1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.attn.rel_pos_h": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.attn.rel_pos_w": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.attn.qkv.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.attn.qkv.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.attn.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.attn.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.norm2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.norm2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.mlp.lin1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.mlp.lin1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.mlp.lin2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.7.mlp.lin2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.norm1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.norm1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.attn.rel_pos_h": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.attn.rel_pos_w": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.attn.qkv.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.attn.qkv.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.attn.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.attn.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.norm2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.norm2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.mlp.lin1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.mlp.lin1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.mlp.lin2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.8.mlp.lin2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.norm1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.norm1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.attn.rel_pos_h": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.attn.rel_pos_w": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.attn.qkv.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.attn.qkv.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.attn.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.attn.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.norm2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.norm2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.mlp.lin1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.mlp.lin1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.mlp.lin2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.9.mlp.lin2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.norm1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.norm1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.attn.rel_pos_h": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.attn.rel_pos_w": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.attn.qkv.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.attn.qkv.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.attn.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.attn.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.norm2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.norm2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.mlp.lin1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.mlp.lin1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.mlp.lin2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.10.mlp.lin2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.norm1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.norm1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.attn.rel_pos_h": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.attn.rel_pos_w": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.attn.qkv.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.attn.qkv.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.attn.proj.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.attn.proj.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.norm2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.norm2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.mlp.lin1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.mlp.lin1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.mlp.lin2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.blocks.11.mlp.lin2.bias": "model-00001-of-000001.safetensors", + "model.sam_model.neck.0.weight": "model-00001-of-000001.safetensors", + "model.sam_model.neck.1.weight": "model-00001-of-000001.safetensors", + "model.sam_model.neck.1.bias": "model-00001-of-000001.safetensors", + "model.sam_model.neck.2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.neck.3.weight": "model-00001-of-000001.safetensors", + "model.sam_model.neck.3.bias": "model-00001-of-000001.safetensors", + "model.sam_model.net_2.weight": "model-00001-of-000001.safetensors", + "model.sam_model.net_3.weight": "model-00001-of-000001.safetensors", + "model.vision_model.embeddings.class_embedding": "model-00001-of-000001.safetensors", + "model.vision_model.embeddings.patch_embedding.weight": "model-00001-of-000001.safetensors", + "model.vision_model.embeddings.position_embedding.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.0.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.0.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.0.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.0.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.0.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.0.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.0.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.0.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.0.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.0.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.0.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.0.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.1.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.1.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.1.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.1.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.1.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.1.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.1.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.1.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.1.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.1.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.1.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.1.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.2.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.2.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.2.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.2.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.2.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.2.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.2.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.2.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.2.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.2.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.2.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.2.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.3.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.3.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.3.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.3.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.3.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.3.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.3.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.3.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.3.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.3.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.3.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.3.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.4.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.4.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.4.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.4.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.4.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.4.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.4.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.4.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.4.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.4.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.4.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.4.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.5.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.5.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.5.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.5.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.5.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.5.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.5.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.5.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.5.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.5.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.5.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.5.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.6.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.6.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.6.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.6.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.6.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.6.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.6.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.6.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.6.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.6.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.6.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.6.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.7.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.7.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.7.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.7.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.7.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.7.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.7.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.7.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.7.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.7.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.7.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.7.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.8.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.8.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.8.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.8.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.8.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.8.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.8.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.8.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.8.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.8.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.8.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.8.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.9.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.9.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.9.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.9.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.9.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.9.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.9.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.9.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.9.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.9.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.9.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.9.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.10.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.10.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.10.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.10.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.10.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.10.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.10.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.10.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.10.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.10.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.10.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.10.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.11.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.11.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.11.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.11.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.11.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.11.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.11.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.11.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.11.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.11.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.11.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.11.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.12.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.12.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.12.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.12.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.12.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.12.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.12.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.12.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.12.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.12.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.12.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.12.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.13.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.13.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.13.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.13.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.13.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.13.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.13.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.13.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.13.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.13.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.13.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.13.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.14.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.14.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.14.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.14.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.14.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.14.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.14.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.14.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.14.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.14.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.14.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.14.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.15.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.15.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.15.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.15.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.15.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.15.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.15.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.15.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.15.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.15.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.15.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.15.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.16.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.16.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.16.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.16.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.16.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.16.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.16.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.16.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.16.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.16.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.16.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.16.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.17.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.17.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.17.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.17.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.17.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.17.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.17.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.17.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.17.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.17.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.17.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.17.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.18.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.18.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.18.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.18.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.18.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.18.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.18.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.18.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.18.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.18.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.18.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.18.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.19.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.19.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.19.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.19.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.19.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.19.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.19.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.19.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.19.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.19.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.19.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.19.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.20.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.20.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.20.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.20.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.20.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.20.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.20.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.20.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.20.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.20.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.20.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.20.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.21.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.21.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.21.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.21.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.21.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.21.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.21.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.21.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.21.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.21.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.21.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.21.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.22.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.22.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.22.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.22.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.22.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.22.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.22.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.22.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.22.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.22.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.22.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.22.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.23.self_attn.qkv_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.23.self_attn.qkv_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.23.self_attn.out_proj.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.23.self_attn.out_proj.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.23.mlp.fc1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.23.mlp.fc1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.23.mlp.fc2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.23.mlp.fc2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.23.layer_norm1.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.23.layer_norm1.bias": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.23.layer_norm2.weight": "model-00001-of-000001.safetensors", + "model.vision_model.transformer.layers.23.layer_norm2.bias": "model-00001-of-000001.safetensors", + "model.vision_model.pre_layrnorm.weight": "model-00001-of-000001.safetensors", + "model.vision_model.pre_layrnorm.bias": "model-00001-of-000001.safetensors", + "model.projector.layers.weight": "model-00001-of-000001.safetensors", + "model.projector.layers.bias": "model-00001-of-000001.safetensors", + "model.image_newline": "model-00001-of-000001.safetensors", + "model.view_seperator": "model-00001-of-000001.safetensors", + "model.embed_tokens.weight": "model-00001-of-000001.safetensors", + "model.norm.weight": "model-00001-of-000001.safetensors", + "lm_head.weight": "model-00001-of-000001.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.gate.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.shared_experts.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.shared_experts.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.shared_experts.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.0.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.0.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.0.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.1.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.1.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.1.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.2.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.2.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.2.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.3.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.3.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.3.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.4.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.4.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.4.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.5.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.5.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.5.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.6.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.6.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.6.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.7.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.7.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.7.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.8.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.8.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.8.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.9.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.9.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.9.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.10.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.10.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.10.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.11.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.11.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.11.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.12.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.12.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.12.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.13.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.13.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.13.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.14.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.14.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.14.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.15.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.15.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.15.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.16.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.16.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.16.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.17.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.17.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.17.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.18.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.18.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.18.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.19.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.19.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.19.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.20.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.20.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.20.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.21.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.21.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.21.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.22.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.22.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.22.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.23.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.23.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.23.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.24.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.24.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.24.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.25.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.25.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.25.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.26.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.26.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.26.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.27.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.27.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.27.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.28.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.28.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.28.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.29.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.29.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.29.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.30.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.30.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.30.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.31.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.31.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.31.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.32.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.32.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.32.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.33.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.33.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.33.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.34.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.34.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.34.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.35.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.35.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.35.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.36.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.36.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.36.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.37.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.37.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.37.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.38.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.38.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.38.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.39.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.39.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.39.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.40.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.40.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.40.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.41.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.41.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.41.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.42.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.42.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.42.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.43.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.43.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.43.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.44.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.44.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.44.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.45.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.45.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.45.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.46.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.46.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.46.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.47.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.47.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.47.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.48.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.48.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.48.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.49.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.49.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.49.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.50.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.50.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.50.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.51.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.51.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.51.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.52.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.52.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.52.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.53.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.53.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.53.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.54.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.54.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.54.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.55.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.55.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.55.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.56.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.56.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.56.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.57.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.57.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.57.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.58.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.58.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.58.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.59.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.59.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.59.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.60.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.60.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.60.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.61.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.61.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.61.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.62.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.62.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.62.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.63.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.63.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.mlp.experts.63.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.gate.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.shared_experts.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.shared_experts.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.shared_experts.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.0.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.0.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.0.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.1.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.1.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.1.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.2.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.2.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.2.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.3.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.3.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.3.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.4.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.4.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.4.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.5.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.5.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.5.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.6.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.6.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.6.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.7.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.7.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.7.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.8.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.8.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.8.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.9.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.9.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.9.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.10.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.10.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.10.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.11.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.11.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.11.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.12.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.12.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.12.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.13.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.13.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.13.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.14.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.14.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.14.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.15.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.15.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.15.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.16.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.16.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.16.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.17.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.17.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.17.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.18.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.18.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.18.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.19.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.19.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.19.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.20.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.20.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.20.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.21.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.21.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.21.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.22.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.22.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.22.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.23.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.23.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.23.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.24.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.24.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.24.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.25.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.25.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.25.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.26.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.26.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.26.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.27.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.27.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.27.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.28.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.28.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.28.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.29.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.29.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.29.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.30.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.30.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.30.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.31.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.31.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.31.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.32.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.32.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.32.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.33.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.33.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.33.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.34.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.34.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.34.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.35.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.35.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.35.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.36.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.36.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.36.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.37.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.37.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.37.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.38.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.38.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.38.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.39.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.39.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.39.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.40.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.40.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.40.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.41.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.41.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.41.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.42.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.42.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.42.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.43.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.43.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.43.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.44.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.44.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.44.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.45.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.45.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.45.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.46.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.46.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.46.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.47.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.47.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.47.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.48.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.48.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.48.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.49.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.49.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.49.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.50.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.50.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.50.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.51.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.51.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.51.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.52.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.52.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.52.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.53.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.53.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.53.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.54.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.54.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.54.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.55.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.55.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.55.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.56.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.56.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.56.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.57.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.57.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.57.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.58.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.58.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.58.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.59.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.59.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.59.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.60.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.60.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.60.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.61.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.61.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.61.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.62.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.62.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.62.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.63.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.63.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.mlp.experts.63.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.gate.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.shared_experts.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.shared_experts.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.shared_experts.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.0.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.0.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.0.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.1.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.1.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.1.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.2.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.2.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.2.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.3.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.3.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.3.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.4.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.4.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.4.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.5.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.5.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.5.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.6.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.6.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.6.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.7.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.7.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.7.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.8.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.8.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.8.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.9.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.9.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.9.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.10.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.10.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.10.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.11.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.11.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.11.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.12.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.12.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.12.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.13.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.13.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.13.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.14.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.14.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.14.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.15.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.15.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.15.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.16.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.16.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.16.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.17.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.17.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.17.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.18.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.18.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.18.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.19.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.19.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.19.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.20.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.20.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.20.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.21.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.21.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.21.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.22.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.22.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.22.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.23.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.23.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.23.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.24.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.24.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.24.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.25.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.25.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.25.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.26.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.26.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.26.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.27.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.27.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.27.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.28.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.28.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.28.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.29.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.29.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.29.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.30.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.30.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.30.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.31.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.31.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.31.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.32.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.32.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.32.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.33.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.33.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.33.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.34.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.34.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.34.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.35.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.35.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.35.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.36.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.36.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.36.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.37.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.37.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.37.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.38.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.38.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.38.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.39.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.39.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.39.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.40.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.40.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.40.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.41.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.41.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.41.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.42.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.42.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.42.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.43.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.43.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.43.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.44.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.44.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.44.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.45.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.45.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.45.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.46.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.46.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.46.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.47.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.47.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.47.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.48.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.48.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.48.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.49.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.49.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.49.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.50.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.50.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.50.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.51.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.51.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.51.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.52.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.52.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.52.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.53.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.53.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.53.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.54.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.54.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.54.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.55.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.55.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.55.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.56.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.56.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.56.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.57.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.57.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.57.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.58.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.58.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.58.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.59.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.59.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.59.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.60.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.60.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.60.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.61.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.61.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.61.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.62.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.62.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.62.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.63.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.63.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.mlp.experts.63.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.gate.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.shared_experts.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.shared_experts.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.shared_experts.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.0.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.0.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.0.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.1.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.1.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.1.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.2.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.2.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.2.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.3.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.3.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.3.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.4.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.4.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.4.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.5.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.5.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.5.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.6.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.6.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.6.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.7.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.7.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.7.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.8.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.8.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.8.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.9.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.9.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.9.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.10.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.10.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.10.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.11.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.11.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.11.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.12.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.12.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.12.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.13.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.13.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.13.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.14.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.14.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.14.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.15.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.15.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.15.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.16.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.16.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.16.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.17.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.17.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.17.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.18.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.18.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.18.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.19.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.19.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.19.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.20.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.20.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.20.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.21.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.21.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.21.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.22.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.22.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.22.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.23.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.23.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.23.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.24.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.24.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.24.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.25.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.25.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.25.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.26.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.26.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.26.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.27.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.27.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.27.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.28.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.28.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.28.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.29.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.29.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.29.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.30.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.30.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.30.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.31.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.31.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.31.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.32.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.32.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.32.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.33.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.33.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.33.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.34.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.34.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.34.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.35.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.35.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.35.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.36.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.36.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.36.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.37.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.37.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.37.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.38.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.38.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.38.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.39.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.39.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.39.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.40.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.40.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.40.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.41.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.41.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.41.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.42.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.42.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.42.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.43.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.43.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.43.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.44.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.44.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.44.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.45.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.45.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.45.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.46.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.46.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.46.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.47.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.47.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.47.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.48.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.48.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.48.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.49.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.49.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.49.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.50.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.50.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.50.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.51.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.51.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.51.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.52.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.52.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.52.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.53.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.53.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.53.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.54.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.54.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.54.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.55.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.55.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.55.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.56.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.56.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.56.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.57.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.57.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.57.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.58.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.58.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.58.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.59.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.59.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.59.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.60.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.60.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.60.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.61.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.61.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.61.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.62.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.62.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.62.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.63.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.63.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.mlp.experts.63.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.gate.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.shared_experts.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.shared_experts.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.shared_experts.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.0.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.0.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.0.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.1.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.1.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.1.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.2.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.2.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.2.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.3.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.3.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.3.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.4.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.4.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.4.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.5.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.5.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.5.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.6.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.6.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.6.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.7.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.7.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.7.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.8.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.8.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.8.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.9.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.9.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.9.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.10.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.10.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.10.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.11.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.11.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.11.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.12.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.12.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.12.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.13.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.13.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.13.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.14.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.14.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.14.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.15.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.15.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.15.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.16.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.16.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.16.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.17.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.17.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.17.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.18.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.18.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.18.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.19.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.19.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.19.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.20.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.20.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.20.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.21.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.21.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.21.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.22.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.22.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.22.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.23.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.23.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.23.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.24.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.24.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.24.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.25.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.25.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.25.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.26.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.26.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.26.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.27.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.27.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.27.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.28.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.28.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.28.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.29.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.29.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.29.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.30.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.30.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.30.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.31.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.31.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.31.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.32.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.32.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.32.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.33.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.33.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.33.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.34.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.34.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.34.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.35.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.35.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.35.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.36.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.36.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.36.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.37.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.37.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.37.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.38.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.38.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.38.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.39.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.39.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.39.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.40.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.40.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.40.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.41.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.41.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.41.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.42.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.42.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.42.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.43.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.43.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.43.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.44.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.44.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.44.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.45.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.45.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.45.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.46.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.46.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.46.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.47.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.47.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.47.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.48.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.48.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.48.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.49.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.49.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.49.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.50.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.50.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.50.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.51.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.51.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.51.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.52.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.52.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.52.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.53.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.53.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.53.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.54.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.54.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.54.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.55.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.55.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.55.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.56.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.56.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.56.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.57.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.57.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.57.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.58.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.58.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.58.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.59.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.59.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.59.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.60.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.60.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.60.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.61.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.61.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.61.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.62.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.62.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.62.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.63.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.63.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.mlp.experts.63.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.gate.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.shared_experts.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.shared_experts.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.0.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.0.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.0.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.1.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.1.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.1.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.2.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.2.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.2.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.3.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.3.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.3.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.4.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.4.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.4.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.5.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.5.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.5.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.6.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.6.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.6.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.7.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.7.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.7.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.8.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.8.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.8.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.9.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.9.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.9.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.10.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.10.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.10.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.11.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.11.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.11.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.12.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.12.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.12.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.13.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.13.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.13.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.14.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.14.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.14.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.15.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.15.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.15.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.16.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.16.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.16.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.17.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.17.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.17.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.18.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.18.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.18.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.19.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.19.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.19.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.20.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.20.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.20.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.21.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.21.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.21.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.22.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.22.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.22.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.23.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.23.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.23.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.24.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.24.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.24.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.25.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.25.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.25.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.26.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.26.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.26.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.27.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.27.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.27.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.28.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.28.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.28.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.29.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.29.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.29.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.30.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.30.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.30.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.31.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.31.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.31.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.32.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.32.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.32.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.33.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.33.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.33.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.34.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.34.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.34.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.35.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.35.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.35.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.36.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.36.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.36.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.37.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.37.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.37.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.38.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.38.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.38.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.39.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.39.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.39.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.40.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.40.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.40.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.41.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.41.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.41.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.42.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.42.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.42.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.43.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.43.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.43.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.44.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.44.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.44.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.45.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.45.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.45.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.46.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.46.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.46.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.47.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.47.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.47.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.48.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.48.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.48.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.49.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.49.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.49.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.50.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.50.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.50.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.51.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.51.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.51.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.52.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.52.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.52.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.53.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.53.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.53.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.54.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.54.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.54.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.55.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.55.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.55.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.56.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.56.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.56.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.57.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.57.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.57.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.58.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.58.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.58.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.59.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.59.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.59.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.60.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.60.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.60.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.61.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.61.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.61.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.62.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.62.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.62.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.63.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.63.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.mlp.experts.63.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.gate.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.shared_experts.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.shared_experts.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.0.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.0.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.0.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.1.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.1.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.1.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.2.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.2.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.2.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.3.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.3.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.3.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.4.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.4.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.4.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.5.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.5.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.5.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.6.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.6.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.6.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.7.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.7.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.7.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.8.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.8.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.8.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.9.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.9.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.9.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.10.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.10.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.10.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.11.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.11.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.11.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.12.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.12.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.12.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.13.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.13.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.13.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.14.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.14.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.14.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.15.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.15.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.15.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.16.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.16.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.16.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.17.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.17.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.17.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.18.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.18.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.18.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.19.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.19.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.19.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.20.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.20.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.20.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.21.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.21.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.21.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.22.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.22.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.22.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.23.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.23.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.23.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.24.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.24.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.24.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.25.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.25.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.25.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.26.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.26.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.26.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.27.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.27.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.27.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.28.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.28.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.28.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.29.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.29.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.29.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.30.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.30.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.30.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.31.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.31.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.31.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.32.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.32.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.32.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.33.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.33.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.33.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.34.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.34.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.34.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.35.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.35.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.35.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.36.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.36.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.36.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.37.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.37.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.37.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.38.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.38.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.38.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.39.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.39.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.39.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.40.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.40.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.40.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.41.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.41.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.41.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.42.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.42.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.42.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.43.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.43.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.43.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.44.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.44.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.44.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.45.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.45.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.45.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.46.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.46.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.46.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.47.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.47.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.47.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.48.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.48.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.48.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.49.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.49.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.49.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.50.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.50.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.50.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.51.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.51.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.51.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.52.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.52.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.52.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.53.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.53.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.53.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.54.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.54.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.54.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.55.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.55.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.55.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.56.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.56.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.56.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.57.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.57.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.57.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.58.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.58.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.58.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.59.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.59.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.59.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.60.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.60.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.60.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.61.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.61.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.61.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.62.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.62.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.62.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.63.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.63.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.mlp.experts.63.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.gate.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.shared_experts.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.shared_experts.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.0.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.0.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.0.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.1.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.1.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.1.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.2.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.2.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.2.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.3.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.3.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.3.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.4.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.4.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.4.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.5.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.5.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.5.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.6.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.6.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.6.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.7.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.7.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.7.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.8.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.8.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.8.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.9.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.9.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.9.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.10.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.10.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.10.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.11.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.11.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.11.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.12.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.12.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.12.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.13.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.13.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.13.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.14.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.14.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.14.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.15.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.15.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.15.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.16.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.16.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.16.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.17.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.17.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.17.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.18.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.18.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.18.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.19.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.19.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.19.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.20.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.20.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.20.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.21.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.21.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.21.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.22.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.22.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.22.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.23.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.23.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.23.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.24.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.24.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.24.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.25.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.25.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.25.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.26.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.26.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.26.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.27.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.27.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.27.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.28.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.28.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.28.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.29.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.29.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.29.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.30.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.30.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.30.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.31.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.31.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.31.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.32.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.32.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.32.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.33.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.33.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.33.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.34.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.34.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.34.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.35.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.35.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.35.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.36.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.36.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.36.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.37.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.37.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.37.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.38.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.38.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.38.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.39.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.39.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.39.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.40.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.40.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.40.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.41.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.41.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.41.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.42.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.42.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.42.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.43.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.43.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.43.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.44.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.44.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.44.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.45.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.45.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.45.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.46.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.46.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.46.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.47.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.47.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.47.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.48.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.48.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.48.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.49.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.49.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.49.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.50.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.50.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.50.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.51.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.51.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.51.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.52.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.52.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.52.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.53.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.53.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.53.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.54.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.54.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.54.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.55.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.55.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.55.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.56.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.56.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.56.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.57.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.57.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.57.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.58.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.58.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.58.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.59.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.59.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.59.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.60.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.60.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.60.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.61.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.61.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.61.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.62.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.62.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.62.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.63.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.63.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.mlp.experts.63.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.gate.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.shared_experts.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.shared_experts.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.0.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.0.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.0.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.1.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.1.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.1.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.2.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.2.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.2.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.3.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.3.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.3.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.4.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.4.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.4.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.5.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.5.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.5.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.6.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.6.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.6.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.7.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.7.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.7.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.8.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.8.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.8.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.9.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.9.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.9.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.10.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.10.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.10.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.11.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.11.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.11.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.12.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.12.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.12.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.13.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.13.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.13.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.14.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.14.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.14.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.15.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.15.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.15.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.16.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.16.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.16.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.17.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.17.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.17.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.18.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.18.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.18.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.19.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.19.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.19.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.20.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.20.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.20.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.21.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.21.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.21.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.22.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.22.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.22.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.23.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.23.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.23.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.24.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.24.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.24.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.25.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.25.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.25.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.26.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.26.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.26.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.27.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.27.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.27.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.28.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.28.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.28.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.29.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.29.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.29.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.30.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.30.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.30.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.31.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.31.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.31.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.32.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.32.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.32.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.33.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.33.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.33.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.34.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.34.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.34.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.35.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.35.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.35.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.36.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.36.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.36.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.37.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.37.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.37.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.38.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.38.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.38.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.39.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.39.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.39.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.40.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.40.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.40.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.41.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.41.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.41.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.42.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.42.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.42.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.43.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.43.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.43.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.44.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.44.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.44.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.45.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.45.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.45.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.46.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.46.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.46.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.47.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.47.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.47.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.48.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.48.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.48.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.49.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.49.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.49.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.50.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.50.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.50.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.51.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.51.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.51.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.52.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.52.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.52.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.53.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.53.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.53.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.54.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.54.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.54.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.55.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.55.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.55.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.56.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.56.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.56.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.57.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.57.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.57.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.58.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.58.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.58.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.59.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.59.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.59.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.60.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.60.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.60.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.61.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.61.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.61.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.62.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.62.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.62.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.63.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.63.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.mlp.experts.63.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.gate.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.shared_experts.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.shared_experts.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.0.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.0.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.0.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.1.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.1.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.1.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.2.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.2.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.2.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.3.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.3.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.3.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.4.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.4.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.4.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.5.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.5.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.5.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.6.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.6.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.6.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.7.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.7.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.7.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.8.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.8.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.8.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.9.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.9.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.9.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.10.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.10.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.10.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.11.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.11.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.11.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.12.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.12.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.12.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.13.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.13.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.13.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.14.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.14.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.14.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.15.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.15.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.15.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.16.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.16.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.16.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.17.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.17.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.17.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.18.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.18.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.18.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.19.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.19.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.19.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.20.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.20.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.20.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.21.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.21.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.21.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.22.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.22.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.22.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.23.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.23.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.23.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.24.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.24.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.24.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.25.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.25.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.25.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.26.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.26.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.26.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.27.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.27.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.27.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.28.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.28.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.28.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.29.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.29.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.29.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.30.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.30.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.30.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.31.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.31.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.31.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.32.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.32.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.32.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.33.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.33.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.33.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.34.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.34.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.34.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.35.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.35.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.35.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.36.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.36.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.36.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.37.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.37.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.37.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.38.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.38.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.38.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.39.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.39.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.39.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.40.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.40.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.40.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.41.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.41.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.41.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.42.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.42.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.42.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.43.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.43.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.43.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.44.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.44.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.44.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.45.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.45.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.45.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.46.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.46.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.46.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.47.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.47.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.47.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.48.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.48.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.48.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.49.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.49.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.49.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.50.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.50.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.50.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.51.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.51.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.51.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.52.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.52.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.52.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.53.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.53.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.53.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.54.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.54.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.54.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.55.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.55.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.55.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.56.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.56.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.56.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.57.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.57.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.57.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.58.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.58.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.58.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.59.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.59.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.59.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.60.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.60.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.60.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.61.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.61.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.61.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.62.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.62.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.62.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.63.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.63.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.mlp.experts.63.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.gate.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.shared_experts.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.shared_experts.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.0.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.0.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.0.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.1.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.1.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.1.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.2.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.2.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.2.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.3.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.3.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.3.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.4.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.4.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.4.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.5.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.5.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.5.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.6.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.6.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.6.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.7.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.7.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.7.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.8.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.8.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.8.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.9.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.9.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.9.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.10.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.10.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.10.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.11.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.11.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.11.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.12.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.12.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.12.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.13.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.13.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.13.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.14.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.14.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.14.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.15.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.15.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.15.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.16.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.16.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.16.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.17.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.17.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.17.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.18.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.18.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.18.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.19.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.19.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.19.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.20.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.20.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.20.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.21.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.21.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.21.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.22.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.22.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.22.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.23.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.23.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.23.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.24.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.24.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.24.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.25.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.25.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.25.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.26.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.26.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.26.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.27.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.27.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.27.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.28.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.28.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.28.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.29.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.29.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.29.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.30.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.30.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.30.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.31.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.31.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.31.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.32.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.32.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.32.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.33.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.33.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.33.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.34.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.34.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.34.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.35.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.35.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.35.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.36.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.36.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.36.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.37.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.37.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.37.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.38.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.38.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.38.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.39.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.39.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.39.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.40.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.40.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.40.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.41.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.41.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.41.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.42.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.42.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.42.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.43.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.43.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.43.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.44.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.44.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.44.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.45.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.45.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.45.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.46.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.46.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.46.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.47.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.47.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.47.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.48.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.48.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.48.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.49.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.49.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.49.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.50.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.50.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.50.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.51.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.51.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.51.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.52.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.52.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.52.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.53.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.53.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.53.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.54.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.54.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.54.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.55.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.55.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.55.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.56.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.56.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.56.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.57.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.57.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.57.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.58.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.58.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.58.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.59.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.59.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.59.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.60.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.60.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.60.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.61.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.61.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.61.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.62.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.62.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.62.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.63.gate_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.63.up_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.mlp.experts.63.down_proj.weight": "model-00001-of-000001.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-000001.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-000001.safetensors" + } +} \ No newline at end of file diff --git a/modeling_deepseekocr.py b/modeling_deepseekocr.py new file mode 100644 index 0000000..05ebf94 --- /dev/null +++ b/modeling_deepseekocr.py @@ -0,0 +1,1037 @@ +from .modeling_deepseekv2 import DeepseekV2Model, DeepseekV2ForCausalLM +from .configuration_deepseek_v2 import DeepseekV2Config +from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast +from typing import List, Optional, Tuple, Union +from transformers.cache_utils import Cache +import requests +from PIL import Image, ImageOps, ImageDraw, ImageFont +from io import BytesIO +import torch +import torch.nn as nn +from torch.nn import CrossEntropyLoss +from torchvision import transforms +from torchvision.transforms.functional import InterpolationMode +import os +from .deepencoder import build_sam_vit_b, build_clip_l, MlpProjector +from addict import Dict +from transformers import TextStreamer +from .conversation import get_conv_template +from abc import ABC +import math +import re +from tqdm import tqdm +import numpy as np +import time + + +def load_image(image_path): + + try: + image = Image.open(image_path) + + corrected_image = ImageOps.exif_transpose(image) + + return corrected_image + + except Exception as e: + print(f"error: {e}") + try: + return Image.open(image_path) + except: + return None + + +def re_match(text): + pattern = r'(<\|ref\|>(.*?)<\|/ref\|><\|det\|>(.*?)<\|/det\|>)' + matches = re.findall(pattern, text, re.DOTALL) + + # pattern1 = r'<\|ref\|>.*?<\|/ref\|>\n' + # new_text1 = re.sub(pattern1, '', text, flags=re.DOTALL) + + mathes_image = [] + mathes_other = [] + for a_match in matches: + if '<|ref|>image<|/ref|>' in a_match[0]: + mathes_image.append(a_match[0]) + else: + mathes_other.append(a_match[0]) + return matches, mathes_image, mathes_other + + +def extract_coordinates_and_label(ref_text, image_width, image_height): + + try: + label_type = ref_text[1] + cor_list = eval(ref_text[2]) + except Exception as e: + print(e) + return None + + return (label_type, cor_list) + + +def draw_bounding_boxes(image, refs, ouput_path): + + image_width, image_height = image.size + + img_draw = image.copy() + draw = ImageDraw.Draw(img_draw) + + overlay = Image.new('RGBA', img_draw.size, (0, 0, 0, 0)) + draw2 = ImageDraw.Draw(overlay) + + # try: + # except IOError: + # try: + # font = ImageFont.truetype("DejaVuSans.ttf", 20) + # except IOError: + font = ImageFont.load_default() + + img_idx = 0 + + for i, ref in enumerate(refs): + try: + result = extract_coordinates_and_label(ref, image_width, image_height) + if result: + label_type, points_list = result + + color = (np.random.randint(0, 200), np.random.randint(0, 200), np.random.randint(0, 255)) + + color_a = color + (20, ) + for points in points_list: + x1, y1, x2, y2 = points + + x1 = int(x1 / 999 * image_width) + y1 = int(y1 / 999 * image_height) + + x2 = int(x2 / 999 * image_width) + y2 = int(y2 / 999 * image_height) + + if label_type == 'image': + try: + cropped = image.crop((x1, y1, x2, y2)) + cropped.save(f"{ouput_path}/images/{img_idx}.jpg") + except Exception as e: + print(e) + pass + img_idx += 1 + + try: + if label_type == 'title': + draw.rectangle([x1, y1, x2, y2], outline=color, width=4) + draw2.rectangle([x1, y1, x2, y2], fill=color_a, outline=(0, 0, 0, 0), width=1) + else: + draw.rectangle([x1, y1, x2, y2], outline=color, width=2) + draw2.rectangle([x1, y1, x2, y2], fill=color_a, outline=(0, 0, 0, 0), width=1) + text_x = x1 + text_y = max(0, y1 - 15) + + + text_bbox = draw.textbbox((0, 0), label_type, font=font) + text_width = text_bbox[2] - text_bbox[0] + text_height = text_bbox[3] - text_bbox[1] + draw.rectangle([text_x, text_y, text_x + text_width, text_y + text_height], + fill=(255, 255, 255, 30)) + + draw.text((text_x, text_y), label_type, font=font, fill=color) + except: + pass + except: + continue + img_draw.paste(overlay, (0, 0), overlay) + return img_draw + + +def process_image_with_refs(image, ref_texts, output_path): + + result_image = draw_bounding_boxes(image, ref_texts, output_path) + + return result_image + + + + + +def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size): + best_ratio_diff = float('inf') + best_ratio = (1, 1) + area = width * height + for ratio in target_ratios: + target_aspect_ratio = ratio[0] / ratio[1] + ratio_diff = abs(aspect_ratio - target_aspect_ratio) + if ratio_diff < best_ratio_diff: + best_ratio_diff = ratio_diff + best_ratio = ratio + elif ratio_diff == best_ratio_diff: + if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]: + best_ratio = ratio + # print(f'width: {width}, height: {height}, best_ratio: {best_ratio}') + return best_ratio + + +def dynamic_preprocess(image, min_num=2, max_num=9, image_size=640, use_thumbnail=False): + orig_width, orig_height = image.size + aspect_ratio = orig_width / orig_height + + # calculate the existing image aspect ratio + target_ratios = set( + (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if + i * j <= max_num and i * j >= min_num) + # print(target_ratios) + target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1]) + + # find the closest aspect ratio to the target + target_aspect_ratio = find_closest_aspect_ratio( + aspect_ratio, target_ratios, orig_width, orig_height, image_size) + + # print(target_aspect_ratio) + # calculate the target width and height + target_width = image_size * target_aspect_ratio[0] + target_height = image_size * target_aspect_ratio[1] + blocks = target_aspect_ratio[0] * target_aspect_ratio[1] + + # resize the image + resized_img = image.resize((target_width, target_height)) + processed_images = [] + for i in range(blocks): + box = ( + (i % (target_width // image_size)) * image_size, + (i // (target_width // image_size)) * image_size, + ((i % (target_width // image_size)) + 1) * image_size, + ((i // (target_width // image_size)) + 1) * image_size + ) + # split the image + split_img = resized_img.crop(box) + processed_images.append(split_img) + assert len(processed_images) == blocks + if use_thumbnail and len(processed_images) != 1: + thumbnail_img = image.resize((image_size, image_size)) + processed_images.append(thumbnail_img) + return processed_images, target_aspect_ratio + + + +def normalize_transform(mean, std): + if mean is None and std is None: + transform = None + elif mean is None and std is not None: + mean = [0.] * len(std) + transform = transforms.Normalize(mean=mean, std=std) + elif mean is not None and std is None: + std = [1.] * len(mean) + transform = transforms.Normalize(mean=mean, std=std) + else: + transform = transforms.Normalize(mean=mean, std=std) + + return transform + + + +def format_messages( + conversations: List[Dict[str, str]], + sft_format: str = "deepseek", + system_prompt: str = "", +): + """ + Applies the SFT template to conversation. + + Args: + conversations (List[Dict]): A List of messages. + sft_format (str, optional): The format of the SFT template to use. Defaults to "deepseek". + system_prompt (str, optional): The system prompt to use in the SFT template. Defaults to "". + + Returns: + sft_prompt (str): The formatted text. + """ + + conv = get_conv_template(sft_format) + conv.set_system_message(system_prompt) + for message in conversations: + conv.append_message(message["role"], message["content"].strip()) + sft_prompt = conv.get_prompt().strip() + + return sft_prompt + + +def text_encode(tokenizer, text: str, bos: bool = True, eos: bool = False): + t = tokenizer.encode(text, add_special_tokens=False) + bos_id = 0 + eos_id = 1 + if bos: + t = [bos_id] + t + if eos: + t = t + [eos_id] + + return t + +def load_pil_images(conversations: List[Dict[str, str]]) -> List[Image.Image]: + """ + + Args: + conversations (List[Dict[str, str]]): the conversations with a list of messages. An example is : + [ + { + "role": "User", + "content": "\nExtract all information from this image and convert them into markdown format.", + "images": ["./examples/table_datasets.png"] + }, + {"role": "Assistant", "content": ""}, + ] + + Returns: + pil_images (List[PIL.Image.Image]): the list of PIL images. + + """ + + pil_images = [] + + for message in conversations: + if "images" not in message: + continue + + for image_path in message["images"]: + # print('----------------') + # print(image_path) + # print('----------------') + # exit() + + # pil_img = Image.open(image_path) + pil_img = load_image(image_path) + pil_img = pil_img.convert("RGB") + pil_images.append(pil_img) + + return pil_images + + +class BaseTransform(ABC): + + def set_rng(self, *args, **kwargs): + pass + + def __call__(self, *args, **kwargs) -> torch.Tensor: + pass + + @property + def default_shape(self): + raise NotImplementedError + + +class BasicImageTransform(BaseTransform): + def __init__( + self, + mean: Optional[Tuple[float, float, float]] = (0.5, 0.5, 0.5), + std: Optional[Tuple[float, float, float]] = (0.5, 0.5, 0.5), + normalize: bool = True + ): + self.mean = mean + self.std = std + + transform_pipelines = [ + transforms.ToTensor() + ] + + normalize = normalize_transform(mean, std) if normalize else nn.Identity() + if normalize is not None: + transform_pipelines.append(normalize) + + self.transform = transforms.Compose(transform_pipelines) + + def __call__(self, x): + x = self.transform(x) + return x + +class NoEOSTextStreamer(TextStreamer): + def on_finalized_text(self, text: str, stream_end: bool = False): + + eos_text = self.tokenizer.decode([self.tokenizer.eos_token_id], skip_special_tokens=False) + text = text.replace(eos_text, "\n") + print(text, flush=True, end="") + + +class DeepseekOCRConfig(DeepseekV2Config): + model_type = "DeepseekOCR" + +class DeepseekOCRModel(DeepseekV2Model): + config_class = DeepseekOCRConfig + + def __init__(self, config: DeepseekV2Config): + super(DeepseekOCRModel, self).__init__(config) + + self.sam_model = build_sam_vit_b() + self.vision_model = build_clip_l() + # self.conv_2 = nn.Conv2d(in_channels=1024, out_channels=2048, kernel_size=2, stride=2) + n_embed = 1280 + self.projector = MlpProjector(Dict(projector_type="linear", input_dim=2048, n_embed=n_embed)) + embed_std = 1 / torch.sqrt(torch.tensor(n_embed, dtype=torch.float32)) + self.image_newline = nn.Parameter(torch.randn(n_embed) * embed_std) + self.view_seperator = nn.Parameter(torch.randn(n_embed) * embed_std) + + + + + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + images: Optional[torch.FloatTensor] = None, + images_seq_mask: Optional[torch.FloatTensor] = None, + images_spatial_crop: Optional[torch.FloatTensor] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, BaseModelOutputWithPast]: + + + + + if inputs_embeds is None: + # inputs_embeds = self.embed_tokens(input_ids) + inputs_embeds = self.get_input_embeddings()(input_ids) + + + + sam_model = getattr(self, 'sam_model', None) + # sam_model = self.sam_model + vision_model = getattr(self, 'vision_model', None) + + + + if sam_model is not None and (input_ids.shape[1] != 1 or self.training) and torch.sum(images[0][1]).item() != 0: + + idx = 0 + + # sam_model = torch.jit.script(sam_model) + + # start_time = time.time() + for image, crop_shape in zip(images, images_spatial_crop): + images_in_this_batch = [] + + patches = image[0] + image_ori = image[1] + + with torch.no_grad(): + # with torch.inference_mode(): + + if torch.sum(patches).item() != 0: + # P, C, H, W = patches.shape + crop_flag = 1 + local_features_1 = sam_model(patches) + + local_features_2 = vision_model(patches, local_features_1) + # vit_time = time.time() + local_features = torch.cat((local_features_2[:, 1:], local_features_1.flatten(2).permute(0, 2, 1)), dim=-1) + local_features = self.projector(local_features) + + + global_features_1 = sam_model(image_ori) + global_features_2 = vision_model(image_ori, global_features_1) + global_features = torch.cat((global_features_2[:, 1:], global_features_1.flatten(2).permute(0, 2, 1)), dim=-1) + global_features = self.projector(global_features) + + print('=====================') + print('BASE: ', global_features.shape) + print('PATCHES: ', local_features.shape) + print('=====================') + + _, hw, n_dim = global_features.shape + h = w = int(hw ** 0.5) + + _2, hw2, n_dim2 = local_features.shape + h2 = w2 = int(hw2 ** 0.5) + + width_crop_num, height_crop_num = crop_shape[0], crop_shape[1] + + global_features = global_features.view(h, w, n_dim) + + global_features = torch.cat( + [global_features, self.image_newline[None, None, :].expand(h, 1, n_dim)], dim=1 + ) + + global_features = global_features.view(-1, n_dim) + + + local_features = local_features.view(height_crop_num, width_crop_num, h2, w2, n_dim2).permute(0, 2, 1, 3, 4).reshape(height_crop_num*h2, width_crop_num*w2, n_dim2) + local_features = torch.cat( + [local_features, self.image_newline[None, None, :].expand(height_crop_num * h2, 1, n_dim2)], dim=1 + ) + local_features = local_features.view(-1, n_dim2) + + global_local_features = torch.cat([local_features, global_features, self.view_seperator[None, :]], dim=0) + + # end_time = time.time() + + # print('sam: ', sam_time - start_time) + # print('vit: ', vit_time - sam_time) + # print('all: ', end_time - start_time) + + # exit() + + else: + global_features_1 = sam_model(image_ori) + global_features_2 = vision_model(image_ori, global_features_1) + global_features = torch.cat((global_features_2[:, 1:], global_features_1.flatten(2).permute(0, 2, 1)), dim=-1) + global_features = self.projector(global_features) + print('=====================') + print('BASE: ', global_features.shape) + print('NO PATCHES') + print('=====================') + _, hw, n_dim = global_features.shape + h = w = int(hw ** 0.5) + + + global_features = global_features.view(h, w, n_dim) + + global_features = torch.cat( + [global_features, self.image_newline[None, None, :].expand(h, 1, n_dim)], dim=1 + ) + + global_features = global_features.view(-1, n_dim) + + global_local_features = torch.cat([global_features, self.view_seperator[None, :]], dim=0) + + images_in_this_batch.append(global_local_features) + + + # print(inputs_embeds.shape) + + if images_in_this_batch: + images_in_this_batch = torch.cat(images_in_this_batch, dim=0) + # exit() + + inputs_embeds[idx].masked_scatter_(images_seq_mask[idx].unsqueeze(-1).cuda(), images_in_this_batch) + + idx += 1 + + + return super(DeepseekOCRModel, self).forward( + input_ids=None, attention_mask=attention_mask, past_key_values=past_key_values, + inputs_embeds=inputs_embeds, use_cache=use_cache, position_ids = position_ids, + output_attentions=output_attentions, output_hidden_states=output_hidden_states, + return_dict=return_dict + ) + + +class DeepseekOCRForCausalLM(DeepseekV2ForCausalLM): + + config_class = DeepseekOCRConfig + # supports_gradient_checkpointing = True + + def __init__(self, config): + super(DeepseekV2ForCausalLM, self).__init__(config) + self.model = DeepseekOCRModel(config) + + self.vocab_size = config.vocab_size + + self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + # self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + # Initialize weights and apply final processing + self.post_init() + + def get_model(self): + return self.model + + + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + images: Optional[torch.FloatTensor] = None, + images_seq_mask: Optional[torch.FloatTensor] = None, + images_spatial_crop: Optional[torch.FloatTensor] = None, + return_dict: Optional[bool] = None, + + ) -> Union[Tuple, CausalLMOutputWithPast]: + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + + + outputs = self.model( + input_ids=input_ids, + past_key_values=past_key_values, + attention_mask=attention_mask, + position_ids=position_ids, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + images=images, + images_seq_mask = images_seq_mask, + images_spatial_crop = images_spatial_crop, + return_dict=return_dict + + ) + + + + # print(transformer_outputs) + + hidden_states = outputs[0] + logits = self.lm_head(hidden_states) + logits = logits.float() + + # logits + + loss = None + if labels is not None: + # Shift so that tokens < n predict n + shift_logits = logits[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + # Flatten the tokens + loss_fct = CrossEntropyLoss() + shift_logits = shift_logits.view(-1, self.config.vocab_size) + shift_labels = shift_labels.view(-1) + # Enable model parallelism + shift_labels = shift_labels.to(shift_logits.device) + loss = loss_fct(shift_logits, shift_labels) + + if not return_dict: + output = (logits,) + outputs[1:] + return (loss,) + output if loss is not None else output + + return CausalLMOutputWithPast( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) + + + def prepare_inputs_for_generation( + self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs + ): + # Omit tokens covered by past_key_values + past_length = 0 + if past_key_values is not None: + if isinstance(past_key_values, Cache): + cache_length = past_key_values.get_seq_length() + past_length = past_key_values.seen_tokens + max_cache_length = past_key_values.get_max_length() + else: + cache_length = past_length = past_key_values[0][0].shape[2] + max_cache_length = None + + # Keep only the unprocessed tokens: + # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where + # some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as + # input) + if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]: + input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :] + # 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard + # input_ids based on the past_length. + elif past_length < input_ids.shape[1]: + input_ids = input_ids[:, past_length:] + # 3 - Otherwise (past_length >= input_ids.shape[1]), let's assume input_ids only has unprocessed tokens. + + # If we are about to go beyond the maximum cache length, we need to crop the input attention mask. + if ( + max_cache_length is not None + and attention_mask is not None + and cache_length + input_ids.shape[1] > max_cache_length + ): + attention_mask = attention_mask[:, -max_cache_length:] + + position_ids = kwargs.get("position_ids", None) + if attention_mask is not None and position_ids is None: + # create position_ids on the fly for batch generation + position_ids = attention_mask.long().cumsum(-1) - 1 + position_ids.masked_fill_(attention_mask == 0, 1) + if past_key_values: + position_ids = position_ids[:, -input_ids.shape[1] :] + + # if self.generation_config.cache_implementation == "static": + # # generation with static cache + # cache_position = kwargs.get("cache_position", None) + # if cache_position is None: + # past_length = 0 + # else: + # past_length = cache_position[-1] + 1 + # input_ids = input_ids[:, past_length:] + # position_ids = position_ids[:, past_length:] + + # TODO @gante we should only keep a `cache_position` in generate, and do +=1. + # same goes for position ids. Could also help with continued generation. + cache_position = torch.arange(past_length, past_length + position_ids.shape[-1], device=position_ids.device) + + # if `inputs_embeds` are passed, we only want to use them in the 1st generation step + if inputs_embeds is not None and past_key_values is None: + model_inputs = {"inputs_embeds": inputs_embeds} + else: + model_inputs = {"input_ids": input_ids} + + model_inputs.update( + { + "position_ids": position_ids, + "past_key_values": past_key_values, + "use_cache": kwargs.get("use_cache"), + "attention_mask": attention_mask, + "images": kwargs.get("images", None), + "images_seq_mask": kwargs.get("images_seq_mask", None), + "images_spatial_crop": kwargs.get("images_spatial_crop", None), + } + ) + return model_inputs + + + def disable_torch_init(self): + """ + Disable the redundant torch default initialization to accelerate model creation. + """ + import torch + setattr(torch.nn.Linear, "reset_parameters", lambda self: None) + setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None) + + + + def infer(self, tokenizer, prompt='', image_file='', output_path = '', base_size=1024, image_size=640, crop_mode=True, test_compress=False, save_results=False, eval_mode=False): + self.disable_torch_init() + + os.makedirs(output_path, exist_ok=True) + os.makedirs(f'{output_path}/images', exist_ok=True) + + if prompt and image_file: + conversation = [ + { + "role": "<|User|>", + # "content": "\n<|grounding|>Given the layout of the image. ", + "content": f'{prompt}', + # "content": "å›äøč§é»„ę²³ä¹‹ę°“å¤©äøŠę„ēš„äø‹äø€å„ę˜Æä»€ä¹ˆļ¼Ÿ", + # "content": "\nFree OCR. ", + # "content": "\nParse the figure. ", + # "content": "\nExtract the text in the image. ", + "images": [f'{image_file}'], + }, + {"role": "<|Assistant|>", "content": ""}, + ] + + elif prompt: + conversation = [ + { + "role": "<|User|>", + # "content": "\n<|grounding|>Given the layout of the image. ", + "content": f'{prompt}', + # "content": "å›äøč§é»„ę²³ä¹‹ę°“å¤©äøŠę„ēš„äø‹äø€å„ę˜Æä»€ä¹ˆļ¼Ÿ", + # "content": "\nFree OCR. ", + # "content": "\nParse the figure. ", + # "content": "\nExtract the text in the image. ", + # "images": [f'{image_file}'], + }, + {"role": "<|Assistant|>", "content": ""}, + ] + else: + assert False, f'prompt is none!' + + prompt = format_messages(conversations=conversation, sft_format='plain', system_prompt='') + + patch_size = 16 + downsample_ratio = 4 + images = load_pil_images(conversation) + + valid_img_tokens = 0 + ratio = 1 + + image_draw = images[0].copy() + + w,h = image_draw.size + # print(w, h) + ratio = 1 - ((max(w, h) - min(w, h)) / (max(w, h))) + + + image_transform=BasicImageTransform(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), normalize=True) + images_seq_mask = [] + + image_token = '' + image_token_id = 128815 + text_splits = prompt.split(image_token) + + images_list, images_crop_list, images_seq_mask = [], [], [] + tokenized_str = [] + images_spatial_crop = [] + for text_sep, image in zip(text_splits, images): + + tokenized_sep = text_encode(tokenizer, text_sep, bos=False, eos=False) + tokenized_str += tokenized_sep + images_seq_mask += [False] * len(tokenized_sep) + + if crop_mode: + + if image.size[0] <= 640 and image.size[1] <= 640: + crop_ratio = [1, 1] + + else: + if crop_mode: + # best_width, best_height = select_best_resolution(image.size, self.candidate_resolutions) + images_crop_raw, crop_ratio = dynamic_preprocess(image) + else: + # best_width, best_height = self.image_size, self.image_size + crop_ratio = [1, 1] + + """process the global view""" + # image = image.resize((base_size, base_size)) + global_view = ImageOps.pad(image, (base_size, base_size), + color=tuple(int(x * 255) for x in image_transform.mean)) + + if base_size == 1024: + valid_img_tokens += int(256 * ratio) + elif base_size == 1280: + valid_img_tokens += int(400 * ratio) + # elif base_size == 640: + # valid_img_tokens += int(100 * ratio) + + + + + + images_list.append(image_transform(global_view).to(torch.bfloat16)) + + # global_view_tensor = image_transform(global_view).to(torch.bfloat16) + + width_crop_num, height_crop_num = crop_ratio + + images_spatial_crop.append([width_crop_num, height_crop_num]) + + + if width_crop_num > 1 or height_crop_num > 1: + """process the local views""" + + for i in range(len(images_crop_raw)): + images_crop_list.append(image_transform(images_crop_raw[i]).to(torch.bfloat16)) + + if image_size == 640: + valid_img_tokens += len(images_crop_list) * 100 + + num_queries = math.ceil((image_size // patch_size) / downsample_ratio) + num_queries_base = math.ceil((base_size // patch_size) / downsample_ratio) + + + + """add image tokens""" + + + + tokenized_image = ([image_token_id] * num_queries_base + [image_token_id]) * num_queries_base + tokenized_image += [image_token_id] + if width_crop_num > 1 or height_crop_num > 1: + tokenized_image += ([image_token_id] * (num_queries * width_crop_num) + [image_token_id]) * ( + num_queries * height_crop_num) + tokenized_str += tokenized_image + images_seq_mask += [True] * len(tokenized_image) + # num_image_tokens.append(len(tokenized_image)) + + else: + # best_width, best_height = self.image_size, self.image_size + # print(image.size, (best_width, best_height)) # check the select_best_resolutions func + + """process the global view""" + if image_size <= 640: + print('directly resize') + image = image.resize((image_size, image_size)) + # else: + global_view = ImageOps.pad(image, (image_size, image_size), + color=tuple(int(x * 255) for x in image_transform.mean)) + images_list.append(image_transform(global_view).to(torch.bfloat16)) + + if base_size == 1024: + valid_img_tokens += int(256 * ratio) + elif base_size == 1280: + valid_img_tokens += int(400 * ratio) + elif base_size == 640: + valid_img_tokens += int(100 * 1) + elif base_size == 512: + valid_img_tokens += int(64 * 1) + + width_crop_num, height_crop_num = 1, 1 + + images_spatial_crop.append([width_crop_num, height_crop_num]) + + + """add image tokens""" + num_queries = math.ceil((image_size // patch_size) / downsample_ratio) + + tokenized_image = ([image_token_id] * num_queries + [image_token_id]) * num_queries + tokenized_image += [image_token_id] + # tokenized_image += ([self.image_token_id] * (num_queries * width_crop_num) + [self.image_token_id]) * ( + # num_queries * height_crop_num) + tokenized_str += tokenized_image + images_seq_mask += [True] * len(tokenized_image) + # num_image_tokens.append(len(tokenized_image)) + + + """process the last text split""" + tokenized_sep = text_encode(tokenizer, text_splits[-1], bos=False, eos=False) + tokenized_str += tokenized_sep + images_seq_mask += [False] * len(tokenized_sep) + + """add the bos tokens""" + bos_id = 0 + tokenized_str = [bos_id] + tokenized_str + images_seq_mask = [False] + images_seq_mask + + + + input_ids = torch.LongTensor(tokenized_str) + + + + + images_seq_mask = torch.tensor(images_seq_mask, dtype=torch.bool) + + + if len(images_list) == 0: + images_ori = torch.zeros((1, 3, image_size, image_size)) + images_spatial_crop = torch.zeros((1, 2), dtype=torch.long) + images_crop = torch.zeros((1, 3, base_size, base_size)) + + else: + images_ori = torch.stack(images_list, dim=0) + images_spatial_crop = torch.tensor(images_spatial_crop, dtype=torch.long) + if images_crop_list: + images_crop = torch.stack(images_crop_list, dim=0) + else: + images_crop = torch.zeros((1, 3, base_size, base_size)) + + + + if not eval_mode: + streamer = NoEOSTextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=False) + with torch.autocast("cuda", dtype=torch.bfloat16): + with torch.no_grad(): + output_ids = self.generate( + input_ids.unsqueeze(0).cuda(), + images=[(images_crop.cuda(), images_ori.cuda())], + images_seq_mask = images_seq_mask.unsqueeze(0).cuda(), + images_spatial_crop = images_spatial_crop, + # do_sample=False, + # num_beams = 1, + temperature=0.0, + eos_token_id=tokenizer.eos_token_id, + streamer=streamer, + max_new_tokens=8192, + no_repeat_ngram_size = 20, + use_cache = True + ) + + else: + with torch.autocast("cuda", dtype=torch.bfloat16): + with torch.no_grad(): + output_ids = self.generate( + input_ids.unsqueeze(0).cuda(), + images=[(images_crop.cuda(), images_ori.cuda())], + images_seq_mask = images_seq_mask.unsqueeze(0).cuda(), + images_spatial_crop = images_spatial_crop, + # do_sample=False, + # num_beams = 1, + temperature=0.0, + eos_token_id=tokenizer.eos_token_id, + max_new_tokens=8192, + no_repeat_ngram_size = 35, + use_cache = True + ) + + + if '' in conversation[0]['content'] and eval_mode: + outputs = tokenizer.decode(output_ids[0, input_ids.unsqueeze(0).cuda().shape[1]:]) + stop_str = '<|end▁of▁sentence|>' + if outputs.endswith(stop_str): + outputs = outputs[:-len(stop_str)] + # re_match + outputs = outputs.strip() + + return outputs + + if '' in conversation[0]['content'] and test_compress: + outputs = tokenizer.decode(output_ids[0, input_ids.unsqueeze(0).cuda().shape[1]:]) + pure_texts_outputs_token_length = len(text_encode(tokenizer, outputs, bos=False, eos=False)) + print('='*50) + print('image size: ', (w, h)) + print('valid image tokens: ', int(valid_img_tokens)) + print('output texts tokens (valid): ', pure_texts_outputs_token_length) + print('compression ratio: ', round(pure_texts_outputs_token_length/valid_img_tokens, 2)) + print('='*50) + + + if '' in conversation[0]['content'] and save_results: + outputs = tokenizer.decode(output_ids[0, input_ids.unsqueeze(0).cuda().shape[1]:]) + stop_str = '<|end▁of▁sentence|>' + + print('='*15 + 'save results:' + '='*15) + + # # # # conv.messages[-1][-1] = outputs + if outputs.endswith(stop_str): + outputs = outputs[:-len(stop_str)] + outputs = outputs.strip() + + matches_ref, matches_images, mathes_other = re_match(outputs) + # print(matches_ref) + result = process_image_with_refs(image_draw, matches_ref, output_path) + + + for idx, a_match_image in enumerate(tqdm(matches_images, desc="image")): + outputs = outputs.replace(a_match_image, '![](images/' + str(idx) + '.jpg)\n') + + for idx, a_match_other in enumerate(tqdm(mathes_other, desc="other")): + outputs = outputs.replace(a_match_other, '').replace('\\coloneqq', ':=').replace('\\eqqcolon', '=:') + + + # if 'structural formula' in conversation[0]['content']: + # outputs = '' + outputs + '' + with open(f'{output_path}/result.mmd', 'w', encoding = 'utf-8') as afile: + afile.write(outputs) + + if 'line_type' in outputs: + import matplotlib.pyplot as plt + lines = eval(outputs)['Line']['line'] + + line_type = eval(outputs)['Line']['line_type'] + # print(lines) + + endpoints = eval(outputs)['Line']['line_endpoint'] + + fig, ax = plt.subplots(figsize=(3,3), dpi=200) + ax.set_xlim(-15, 15) + ax.set_ylim(-15, 15) + + for idx, line in enumerate(lines): + try: + p0 = eval(line.split(' -- ')[0]) + p1 = eval(line.split(' -- ')[-1]) + + if line_type[idx] == '--': + ax.plot([p0[0], p1[0]], [p0[1], p1[1]], linewidth=0.8, color='k') + else: + ax.plot([p0[0], p1[0]], [p0[1], p1[1]], linewidth = 0.8, color = 'k') + + ax.scatter(p0[0], p0[1], s=5, color = 'k') + ax.scatter(p1[0], p1[1], s=5, color = 'k') + except: + pass + + for endpoint in endpoints: + + label = endpoint.split(': ')[0] + (x, y) = eval(endpoint.split(': ')[1]) + ax.annotate(label, (x, y), xytext=(1, 1), textcoords='offset points', + fontsize=5, fontweight='light') + + + plt.savefig(f'{output_path}/geo.jpg') + plt.close() + + result.save(f"{output_path}/result_with_boxes.jpg") diff --git a/modeling_deepseekv2.py b/modeling_deepseekv2.py new file mode 100644 index 0000000..ff00847 --- /dev/null +++ b/modeling_deepseekv2.py @@ -0,0 +1,1992 @@ +# coding=utf-8 +# Copyright 2023 DeepSeek-AI and The HuggingFace Inc. team. All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" PyTorch DeepSeek model and compatible with both DeepSeekV2 and DeepSeekV3""" +import math +import warnings +from typing import List, Optional, Tuple, Union +import numpy as np + +import torch +import torch.nn.functional as F +import torch.utils.checkpoint +import torch.distributed as dist +from einops import repeat +from torch import nn +from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss + +from transformers.activations import ACT2FN +from transformers.cache_utils import Cache, DynamicCache +from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask +from transformers.models.llama.modeling_llama import ( + LlamaAttention, + LlamaFlashAttention2 +) +from transformers.modeling_outputs import ( + BaseModelOutputWithPast, + CausalLMOutputWithPast, + SequenceClassifierOutputWithPast, +) +from transformers.modeling_utils import PreTrainedModel +from transformers.pytorch_utils import ( + ALL_LAYERNORM_LAYERS, + is_torch_greater_or_equal_than_1_13, +) +from transformers.utils import ( + add_start_docstrings, + add_start_docstrings_to_model_forward, + is_flash_attn_2_available, + is_flash_attn_greater_or_equal_2_10, + logging, + replace_return_docstrings, +) +from transformers.utils.import_utils import is_torch_fx_available + +from .configuration_deepseek_v2 import DeepseekV2Config + +if is_flash_attn_2_available(): + from flash_attn import flash_attn_func, flash_attn_varlen_func + from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa + +# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph. +# It means that the function will not be traced through and simply appear as a node in the graph. +if is_torch_fx_available(): + if not is_torch_greater_or_equal_than_1_13: + import torch.fx + + _prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask) + +logger = logging.get_logger(__name__) + +_CONFIG_FOR_DOC = "DeepseekV2Config" + + +def _get_unpad_data(attention_mask): + seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32) + indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten() + max_seqlen_in_batch = seqlens_in_batch.max().item() + cu_seqlens = F.pad( + torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.torch.int32), (1, 0) + ) + return ( + indices, + cu_seqlens, + max_seqlen_in_batch, + ) + + +class DeepseekV2RMSNorm(nn.Module): + def __init__(self, hidden_size, eps=1e-6): + """ + DeepseekV2RMSNorm is equivalent to T5LayerNorm + """ + super().__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.variance_epsilon = eps + + def forward(self, hidden_states): + input_dtype = hidden_states.dtype + hidden_states = hidden_states.to(torch.float32) + variance = hidden_states.pow(2).mean(-1, keepdim=True) + hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon) + return self.weight * hidden_states.to(input_dtype) + + +ALL_LAYERNORM_LAYERS.append(DeepseekV2RMSNorm) + + + + +class DeepseekV2RotaryEmbedding(nn.Module): + def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): + super().__init__() + + self.dim = dim + self.max_position_embeddings = max_position_embeddings + self.base = base + inv_freq = 1.0 / ( + self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim) + ) + self.register_buffer("inv_freq", inv_freq, persistent=False) + + # Build here to make `torch.jit.trace` work. + self._set_cos_sin_cache( + seq_len=max_position_embeddings, + device=self.inv_freq.device, + dtype=torch.get_default_dtype(), + ) + self.max_seq_len_cached = None + + def _set_cos_sin_cache(self, seq_len, device, dtype): + self.max_seq_len_cached = seq_len + t = torch.arange( + self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype + ) + + freqs = torch.outer(t, self.inv_freq.to(t.device)) + # Different from paper, but it uses a different permutation in order to obtain the same calculation + emb = torch.cat((freqs, freqs), dim=-1) + self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False) + self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False) + + def forward(self, x, seq_len=None): + # x: [bs, num_attention_heads, seq_len, head_size] + if self.max_seq_len_cached is None or seq_len > self.max_seq_len_cached: + self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype) + + return ( + self.cos_cached[:seq_len].to(dtype=x.dtype), + self.sin_cached[:seq_len].to(dtype=x.dtype), + ) + + +# Copied from transformers.models.llama.modeling_llama.LlamaLinearScalingRotaryEmbedding with Llama->DeepseekV2 +class DeepseekV2LinearScalingRotaryEmbedding(DeepseekV2RotaryEmbedding): + """DeepseekV2RotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev""" + + def __init__( + self, + dim, + max_position_embeddings=2048, + base=10000, + device=None, + scaling_factor=1.0, + ): + self.scaling_factor = scaling_factor + super().__init__(dim, max_position_embeddings, base, device) + + def _set_cos_sin_cache(self, seq_len, device, dtype): + self.max_seq_len_cached = seq_len + t = torch.arange( + self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype + ) + t = t / self.scaling_factor + + freqs = torch.outer(t, self.inv_freq) + # Different from paper, but it uses a different permutation in order to obtain the same calculation + emb = torch.cat((freqs, freqs), dim=-1) + self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False) + self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False) + + +# Copied from transformers.models.llama.modeling_llama.LlamaDynamicNTKScalingRotaryEmbedding with Llama->DeepseekV2 +class DeepseekV2DynamicNTKScalingRotaryEmbedding(DeepseekV2RotaryEmbedding): + """DeepseekV2RotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla""" + + def __init__( + self, + dim, + max_position_embeddings=2048, + base=10000, + device=None, + scaling_factor=1.0, + ): + self.scaling_factor = scaling_factor + super().__init__(dim, max_position_embeddings, base, device) + + def _set_cos_sin_cache(self, seq_len, device, dtype): + self.max_seq_len_cached = seq_len + + if seq_len > self.max_position_embeddings: + base = self.base * ( + (self.scaling_factor * seq_len / self.max_position_embeddings) + - (self.scaling_factor - 1) + ) ** (self.dim / (self.dim - 2)) + inv_freq = 1.0 / ( + base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim) + ) + self.register_buffer("inv_freq", inv_freq, persistent=False) + + t = torch.arange( + self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype + ) + + freqs = torch.outer(t, self.inv_freq) + # Different from paper, but it uses a different permutation in order to obtain the same calculation + emb = torch.cat((freqs, freqs), dim=-1) + self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False) + self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False) + + +# Inverse dim formula to find dim based on number of rotations +def yarn_find_correction_dim( + num_rotations, dim, base=10000, max_position_embeddings=2048 +): + return (dim * math.log(max_position_embeddings / (num_rotations * 2 * math.pi))) / ( + 2 * math.log(base) + ) + + +# Find dim range bounds based on rotations +def yarn_find_correction_range( + low_rot, high_rot, dim, base=10000, max_position_embeddings=2048 +): + low = math.floor( + yarn_find_correction_dim(low_rot, dim, base, max_position_embeddings) + ) + high = math.ceil( + yarn_find_correction_dim(high_rot, dim, base, max_position_embeddings) + ) + return max(low, 0), min(high, dim - 1) # Clamp values just in case + + +def yarn_get_mscale(scale=1, mscale=1): + if scale <= 1: + return 1.0 + return 0.1 * mscale * math.log(scale) + 1.0 + + +def yarn_linear_ramp_mask(min, max, dim): + if min == max: + max += 0.001 # Prevent singularity + + linear_func = (torch.arange(dim, dtype=torch.float32) - min) / (max - min) + ramp_func = torch.clamp(linear_func, 0, 1) + return ramp_func + + +class DeepseekV2YarnRotaryEmbedding(DeepseekV2RotaryEmbedding): + + def __init__( + self, + dim, + max_position_embeddings=2048, + base=10000, + device=None, + scaling_factor=1.0, + original_max_position_embeddings=4096, + beta_fast=32, + beta_slow=1, + mscale=1, + mscale_all_dim=0, + ): + self.scaling_factor = scaling_factor + self.original_max_position_embeddings = original_max_position_embeddings + self.beta_fast = beta_fast + self.beta_slow = beta_slow + self.mscale = mscale + self.mscale_all_dim = mscale_all_dim + super().__init__(dim, max_position_embeddings, base, device) + + def _set_cos_sin_cache(self, seq_len, device, dtype): + self.max_seq_len_cached = seq_len + dim = self.dim + + freq_extra = 1.0 / ( + self.base + ** (torch.arange(0, dim, 2, dtype=torch.float32, device=device) / dim) + ) + freq_inter = 1.0 / ( + self.scaling_factor + * self.base + ** (torch.arange(0, dim, 2, dtype=torch.float32, device=device) / dim) + ) + + low, high = yarn_find_correction_range( + self.beta_fast, + self.beta_slow, + dim, + self.base, + self.original_max_position_embeddings, + ) + inv_freq_mask = 1.0 - yarn_linear_ramp_mask(low, high, dim // 2).to( + device=device, dtype=torch.float32 + ) + inv_freq = freq_inter * (1 - inv_freq_mask) + freq_extra * inv_freq_mask + self.register_buffer("inv_freq", inv_freq, persistent=False) + + t = torch.arange(seq_len, device=device, dtype=torch.float32) + + freqs = torch.outer(t, inv_freq) + + _mscale = float( + yarn_get_mscale(self.scaling_factor, self.mscale) + / yarn_get_mscale(self.scaling_factor, self.mscale_all_dim) + ) + + emb = torch.cat((freqs, freqs), dim=-1) + self.register_buffer( + "cos_cached", (emb.cos() * _mscale).to(dtype), persistent=False + ) + self.register_buffer( + "sin_cached", (emb.sin() * _mscale).to(dtype), persistent=False + ) + + +# Copied from transformers.models.llama.modeling_llama.rotate_half +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + + +# Copied from transformers.models.llama.modeling_llama.apply_rotary_pos_emb +def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): + """Applies Rotary Position Embedding to the query and key tensors. + + Args: + q (`torch.Tensor`): The query tensor. + k (`torch.Tensor`): The key tensor. + cos (`torch.Tensor`): The cosine part of the rotary embedding. + sin (`torch.Tensor`): The sine part of the rotary embedding. + position_ids (`torch.Tensor`): + The position indices of the tokens corresponding to the query and key tensors. For example, this can be + used to pass offsetted position ids when working with a KV-cache. + unsqueeze_dim (`int`, *optional*, defaults to 1): + The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and + sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note + that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and + k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes + cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have + the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. + Returns: + `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding. + """ + cos = cos[position_ids].unsqueeze(unsqueeze_dim) + sin = sin[position_ids].unsqueeze(unsqueeze_dim) + + + # print() + + b, h, s, d = q.shape + q = q.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d) + + b, h, s, d = k.shape + k = k.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d) + + q_embed = (q * cos) + (rotate_half(q) * sin) + k_embed = (k * cos) + (rotate_half(k) * sin) + + + return q_embed, k_embed + + +class DeepseekV2MLP(nn.Module): + def __init__(self, config, hidden_size=None, intermediate_size=None): + super().__init__() + self.config = config + self.hidden_size = config.hidden_size if hidden_size is None else hidden_size + self.intermediate_size = ( + config.intermediate_size if intermediate_size is None else intermediate_size + ) + + self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False) + self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False) + self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False) + self.act_fn = ACT2FN[config.hidden_act] + + def forward(self, x): + down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x)) + return down_proj + + +class MoEGate(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + self.top_k = config.num_experts_per_tok + self.n_routed_experts = config.n_routed_experts + self.routed_scaling_factor = config.routed_scaling_factor + self.scoring_func = config.scoring_func + self.alpha = config.aux_loss_alpha + self.seq_aux = config.seq_aux + self.topk_method = config.topk_method + self.n_group = config.n_group + self.topk_group = config.topk_group + + # topk selection algorithm + self.norm_topk_prob = config.norm_topk_prob + self.gating_dim = config.hidden_size + self.weight = nn.Parameter( + torch.empty((self.n_routed_experts, self.gating_dim)) + ) + if self.topk_method == "noaux_tc": + self.e_score_correction_bias = nn.Parameter( + torch.empty((self.n_routed_experts)) + ) + self.reset_parameters() + + def reset_parameters(self) -> None: + import torch.nn.init as init + + init.kaiming_uniform_(self.weight, a=math.sqrt(5)) + + def forward(self, hidden_states): + bsz, seq_len, h = hidden_states.shape + ### compute gating score + hidden_states = hidden_states.view(-1, h) + logits = F.linear( + hidden_states.type(torch.float32), self.weight.type(torch.float32), None + ) + if self.scoring_func == "softmax": + scores = logits.softmax(dim=-1, dtype=torch.float32) + elif self.scoring_func == "sigmoid": + scores = logits.sigmoid() + else: + raise NotImplementedError( + f"insupportable scoring function for MoE gating: {self.scoring_func}" + ) + + ### select top-k experts + if self.topk_method == "greedy": + topk_weight, topk_idx = torch.topk( + scores, k=self.top_k, dim=-1, sorted=False + ) + elif self.topk_method == "group_limited_greedy": + group_scores = ( + scores.view(bsz * seq_len, self.n_group, -1).max(dim=-1).values + ) # [n, n_group] + group_idx = torch.topk( + group_scores, k=self.topk_group, dim=-1, sorted=False + )[ + 1 + ] # [n, top_k_group] + group_mask = torch.zeros_like(group_scores) # [n, n_group] + group_mask.scatter_(1, group_idx, 1) # [n, n_group] + score_mask = ( + group_mask.unsqueeze(-1) + .expand( + bsz * seq_len, self.n_group, self.n_routed_experts // self.n_group + ) + .reshape(bsz * seq_len, -1) + ) # [n, e] + tmp_scores = scores.masked_fill(~score_mask.bool(), 0.0) # [n, e] + topk_weight, topk_idx = torch.topk( + tmp_scores, k=self.top_k, dim=-1, sorted=False + ) + elif self.topk_method == "noaux_tc": + assert not self.training + scores_for_choice = scores.view(bsz * seq_len, -1) + self.e_score_correction_bias.unsqueeze(0) + group_scores = ( + scores_for_choice.view(bsz * seq_len, self.n_group, -1).topk(2, dim=-1)[0].sum(dim = -1) + ) # [n, n_group] + group_idx = torch.topk( + group_scores, k=self.topk_group, dim=-1, sorted=False + )[ + 1 + ] # [n, top_k_group] + group_mask = torch.zeros_like(group_scores) # [n, n_group] + group_mask.scatter_(1, group_idx, 1) # [n, n_group] + score_mask = ( + group_mask.unsqueeze(-1) + .expand( + bsz * seq_len, self.n_group, self.n_routed_experts // self.n_group + ) + .reshape(bsz * seq_len, -1) + ) # [n, e] + tmp_scores = scores_for_choice.masked_fill(~score_mask.bool(), 0.0) # [n, e] + _, topk_idx = torch.topk( + tmp_scores, k=self.top_k, dim=-1, sorted=False + ) + topk_weight = scores.gather(1, topk_idx) + + ### norm gate to sum 1 + if self.top_k > 1 and self.norm_topk_prob: + denominator = topk_weight.sum(dim=-1, keepdim=True) + 1e-20 + topk_weight = topk_weight / denominator * self.routed_scaling_factor + else: + topk_weight = topk_weight * self.routed_scaling_factor + ### expert-level computation auxiliary loss + if self.training and self.alpha > 0.0: + scores_for_aux = scores + aux_topk = self.top_k + # always compute aux loss based on the naive greedy topk method + topk_idx_for_aux_loss = topk_idx.view(bsz, -1) + if self.seq_aux: + scores_for_seq_aux = scores_for_aux.view(bsz, seq_len, -1) + ce = torch.zeros( + bsz, self.n_routed_experts, device=hidden_states.device + ) + ce.scatter_add_( + 1, + topk_idx_for_aux_loss, + torch.ones(bsz, seq_len * aux_topk, device=hidden_states.device), + ).div_(seq_len * aux_topk / self.n_routed_experts) + aux_loss = (ce * scores_for_seq_aux.mean(dim=1)).sum( + dim=1 + ).mean() * self.alpha + else: + mask_ce = F.one_hot( + topk_idx_for_aux_loss.view(-1), num_classes=self.n_routed_experts + ) + ce = mask_ce.float().mean(0) + Pi = scores_for_aux.mean(0) + fi = ce * self.n_routed_experts + aux_loss = (Pi * fi).sum() * self.alpha + else: + aux_loss = None + return topk_idx, topk_weight, aux_loss + + +class AddAuxiliaryLoss(torch.autograd.Function): + """ + The trick function of adding auxiliary (aux) loss, + which includes the gradient of the aux loss during backpropagation. + """ + + @staticmethod + def forward(ctx, x, loss): + assert loss.numel() == 1 + ctx.dtype = loss.dtype + ctx.required_aux_loss = loss.requires_grad + return x + + @staticmethod + def backward(ctx, grad_output): + grad_loss = None + if ctx.required_aux_loss: + grad_loss = torch.ones(1, dtype=ctx.dtype, device=grad_output.device) + return grad_output, grad_loss + + +class DeepseekV2MoE(nn.Module): + """ + A mixed expert module containing shared experts. + """ + + def __init__(self, config): + super().__init__() + self.config = config + self.num_experts_per_tok = config.num_experts_per_tok + + if hasattr(config, "ep_size") and config.ep_size > 1: + assert config.ep_size == dist.get_world_size() + self.ep_size = config.ep_size + self.experts_per_rank = config.n_routed_experts // config.ep_size + self.ep_rank = dist.get_rank() + self.experts = nn.ModuleList( + [ + ( + DeepseekV2MLP( + config, intermediate_size=config.moe_intermediate_size + ) + if i >= self.ep_rank * self.experts_per_rank + and i < (self.ep_rank + 1) * self.experts_per_rank + else None + ) + for i in range(config.n_routed_experts) + ] + ) + else: + self.ep_size = 1 + self.experts_per_rank = config.n_routed_experts + self.ep_rank = 0 + self.experts = nn.ModuleList( + [ + DeepseekV2MLP( + config, intermediate_size=config.moe_intermediate_size + ) + for i in range(config.n_routed_experts) + ] + ) + self.gate = MoEGate(config) + if config.n_shared_experts is not None: + intermediate_size = config.moe_intermediate_size * config.n_shared_experts + self.shared_experts = DeepseekV2MLP( + config=config, intermediate_size=intermediate_size + ) + + def forward(self, hidden_states): + identity = hidden_states + orig_shape = hidden_states.shape + topk_idx, topk_weight, aux_loss = self.gate(hidden_states) + hidden_states = hidden_states.view(-1, hidden_states.shape[-1]) + flat_topk_idx = topk_idx.view(-1) + if self.training: + hidden_states = hidden_states.repeat_interleave( + self.num_experts_per_tok, dim=0 + ) + y = torch.empty_like(hidden_states) + for i, expert in enumerate(self.experts): + y[flat_topk_idx == i] = expert(hidden_states[flat_topk_idx == i]) + y = (y.view(*topk_weight.shape, -1) * topk_weight.unsqueeze(-1)).sum(dim=1) + y = y.to(hidden_states.dtype).view(*orig_shape) + y = AddAuxiliaryLoss.apply(y, aux_loss) + else: + y = self.moe_infer(hidden_states, topk_idx, topk_weight).view(*orig_shape) + if self.config.n_shared_experts is not None: + y = y + self.shared_experts(identity) + return y + + @torch.no_grad() + def moe_infer(self, x, topk_ids, topk_weight): + cnts = topk_ids.new_zeros((topk_ids.shape[0], len(self.experts))) + cnts.scatter_(1, topk_ids, 1) + tokens_per_expert = cnts.sum(dim=0) + idxs = topk_ids.view(-1).argsort() + sorted_tokens = x[idxs // topk_ids.shape[1]] + sorted_tokens_shape = sorted_tokens.shape + if self.ep_size > 1: + tokens_per_ep_rank = tokens_per_expert.view(self.ep_size, -1).sum(dim=1) + tokens_per_expert_group = tokens_per_expert.new_empty( + tokens_per_expert.shape[0] + ) + dist.all_to_all_single(tokens_per_expert_group, tokens_per_expert) + output_splits = ( + tokens_per_expert_group.view(self.ep_size, -1) + .sum(1) + .cpu() + .numpy() + .tolist() + ) + gathered_tokens = sorted_tokens.new_empty( + tokens_per_expert_group.sum(dim=0).cpu().item(), sorted_tokens.shape[1] + ) + input_split_sizes = tokens_per_ep_rank.cpu().numpy().tolist() + dist.all_to_all( + list(gathered_tokens.split(output_splits)), + list(sorted_tokens.split(input_split_sizes)), + ) + tokens_per_expert_post_gather = tokens_per_expert_group.view( + self.ep_size, self.experts_per_rank + ).sum(dim=0) + gatherd_idxs = np.zeros(shape=(gathered_tokens.shape[0],), dtype=np.int32) + s = 0 + for i, k in enumerate(tokens_per_expert_group.cpu().numpy()): + gatherd_idxs[s : s + k] = i % self.experts_per_rank + s += k + gatherd_idxs = gatherd_idxs.argsort() + sorted_tokens = gathered_tokens[gatherd_idxs] + tokens_per_expert = tokens_per_expert_post_gather + tokens_per_expert = tokens_per_expert.cpu().numpy() + + outputs = [] + start_idx = 0 + for i, num_tokens in enumerate(tokens_per_expert): + end_idx = start_idx + num_tokens + if num_tokens == 0: + continue + expert = self.experts[i + self.ep_rank * self.experts_per_rank] + tokens_for_this_expert = sorted_tokens[start_idx:end_idx] + expert_out = expert(tokens_for_this_expert) + outputs.append(expert_out) + start_idx = end_idx + + outs = torch.cat(outputs, dim=0) if len(outputs) else sorted_tokens.new_empty(0) + if self.ep_size > 1: + new_x = torch.empty_like(outs) + new_x[gatherd_idxs] = outs + gathered_tokens = new_x.new_empty(*sorted_tokens_shape) + dist.all_to_all( + list(gathered_tokens.split(input_split_sizes)), + list(new_x.split(output_splits)), + ) + outs = gathered_tokens + + new_x = torch.empty_like(outs) + new_x[idxs] = outs + final_out = ( + new_x.view(*topk_ids.shape, -1) + .type(topk_weight.dtype) + .mul_(topk_weight.unsqueeze(dim=-1)) + .sum(dim=1) + .type(new_x.dtype) + ) + return final_out + + +# Copied from transformers.models.llama.modeling_llama.repeat_kv +def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: + """ + This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch, + num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim) + """ + batch, num_key_value_heads, slen, head_dim = hidden_states.shape + if n_rep == 1: + return hidden_states + hidden_states = hidden_states[:, :, None, :, :].expand( + batch, num_key_value_heads, n_rep, slen, head_dim + ) + return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim) + + +# Copied from transformers.models.llama.modeling_llama.LlamaAttention with Llama->DeepseekV2 +class DeepseekV2Attention(nn.Module): + """Multi-headed attention from 'Attention Is All You Need' paper""" + + def __init__(self, config: DeepseekV2Config, layer_idx: Optional[int] = None): + super().__init__() + self.config = config + self.layer_idx = layer_idx + if layer_idx is None: + logger.warning_once( + f"Instantiating {self.__class__.__name__} without passing `layer_idx` is not recommended and will " + "to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` " + "when creating this class." + ) + + self.attention_dropout = config.attention_dropout + self.hidden_size = config.hidden_size + self.num_heads = config.num_attention_heads + + self.max_position_embeddings = config.max_position_embeddings + self.rope_theta = config.rope_theta + self.q_lora_rank = config.q_lora_rank + self.qk_rope_head_dim = config.qk_rope_head_dim + self.kv_lora_rank = config.kv_lora_rank + self.v_head_dim = config.v_head_dim + self.qk_nope_head_dim = config.qk_nope_head_dim + self.q_head_dim = config.qk_nope_head_dim + config.qk_rope_head_dim + + self.is_causal = True + + if self.q_lora_rank is None: + self.q_proj = nn.Linear( + self.hidden_size, self.num_heads * self.q_head_dim, bias=False + ) + else: + self.q_a_proj = nn.Linear( + self.hidden_size, config.q_lora_rank, bias=config.attention_bias + ) + self.q_a_layernorm = DeepseekV2RMSNorm(config.q_lora_rank) + self.q_b_proj = nn.Linear( + config.q_lora_rank, self.num_heads * self.q_head_dim, bias=False + ) + # config.kv_lora_rank + config.qk_rope_head_dim, + self.kv_a_proj_with_mqa = nn.Linear( + self.hidden_size, + config.kv_lora_rank + config.qk_rope_head_dim, + bias=config.attention_bias, + ) + self.kv_a_layernorm = DeepseekV2RMSNorm(config.kv_lora_rank) + self.kv_b_proj = nn.Linear( + config.kv_lora_rank, + self.num_heads + * (self.q_head_dim - self.qk_rope_head_dim + self.v_head_dim), + bias=False, + ) + + self.o_proj = nn.Linear( + self.num_heads * self.v_head_dim, + self.hidden_size, + bias=config.attention_bias, + ) + self._init_rope() + + self.softmax_scale = self.q_head_dim ** (-0.5) + if self.config.rope_scaling is not None: + mscale_all_dim = self.config.rope_scaling.get("mscale_all_dim", 0) + scaling_factor = self.config.rope_scaling["factor"] + if mscale_all_dim: + mscale = yarn_get_mscale(scaling_factor, mscale_all_dim) + self.softmax_scale = self.softmax_scale * mscale * mscale + + def _init_rope(self): + if self.config.rope_scaling is None: + self.rotary_emb = DeepseekV2RotaryEmbedding( + self.qk_rope_head_dim, + max_position_embeddings=self.max_position_embeddings, + base=self.rope_theta, + ) + # self.rotary_emb = DeepseekV2LinearScalingRotaryEmbedding( + # self.qk_rope_head_dim, + # max_position_embeddings=self.max_position_embeddings, + # scaling_factor=scaling_factor, + # base=self.rope_theta, + # ) + else: + scaling_type = self.config.rope_scaling["type"] + scaling_factor = self.config.rope_scaling["factor"] + if scaling_type == "linear": + self.rotary_emb = DeepseekV2LinearScalingRotaryEmbedding( + self.qk_rope_head_dim, + max_position_embeddings=self.max_position_embeddings, + scaling_factor=scaling_factor, + base=self.rope_theta, + ) + elif scaling_type == "dynamic": + self.rotary_emb = DeepseekV2DynamicNTKScalingRotaryEmbedding( + self.qk_rope_head_dim, + max_position_embeddings=self.max_position_embeddings, + scaling_factor=scaling_factor, + base=self.rope_theta, + ) + elif scaling_type == "yarn": + kwargs = { + key: self.config.rope_scaling[key] + for key in [ + "original_max_position_embeddings", + "beta_fast", + "beta_slow", + "mscale", + "mscale_all_dim", + ] + if key in self.config.rope_scaling + } + self.rotary_emb = DeepseekV2YarnRotaryEmbedding( + self.qk_rope_head_dim, + max_position_embeddings=self.max_position_embeddings, + scaling_factor=scaling_factor, + base=self.rope_theta, + **kwargs, + ) + else: + raise ValueError(f"Unknown RoPE scaling type {scaling_type}") + + def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): + return ( + tensor.view(bsz, seq_len, self.num_heads, self.v_head_dim) + .transpose(1, 2) + .contiguous() + ) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Cache] = None, + output_attentions: bool = False, + use_cache: bool = False, + **kwargs, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + if "padding_mask" in kwargs: + warnings.warn( + "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`" + ) + bsz, q_len, _ = hidden_states.size() + + if self.q_lora_rank is None: + q = self.q_proj(hidden_states) + else: + q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states))) + q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2) + + + q_nope, q_pe = torch.split( + q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1 + ) + + compressed_kv = self.kv_a_proj_with_mqa(hidden_states) + compressed_kv, k_pe = torch.split( + compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1 + ) + compressed_kv = self.kv_a_layernorm(compressed_kv) + k_pe = k_pe.view(bsz, q_len, 1, self.qk_rope_head_dim).transpose(1, 2) + + kv_seq_len = k_pe.shape[-2] + if past_key_value is not None: + if self.layer_idx is None: + raise ValueError( + f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} " + "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class " + "with a layer index." + ) + kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx) + + cos, sin = self.rotary_emb(q_pe, seq_len=kv_seq_len) + q_pe, k_pe = apply_rotary_pos_emb(q_pe, k_pe, cos, sin, position_ids) + + if past_key_value is not None: + cache_kwargs = {"sin": sin, "cos": cos} # Specific to RoPE models + compressed_kv = compressed_kv.unsqueeze(1) + k_pe, compressed_kv = past_key_value.update(k_pe, compressed_kv, self.layer_idx, cache_kwargs) + compressed_kv = compressed_kv.squeeze(1) + + kv_b_proj = self.kv_b_proj.weight.view(self.num_heads, -1, self.kv_lora_rank) + q_absorb = kv_b_proj[:, :self.qk_nope_head_dim, :] + out_absorb = kv_b_proj[:, self.qk_nope_head_dim:, :] + + q_nope = torch.matmul(q_nope, q_absorb) + attn_weights = (torch.matmul(q_pe, k_pe.mT) + + torch.matmul(q_nope, compressed_kv.unsqueeze(-3).mT)) * self.softmax_scale + if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): + raise ValueError( + f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is" + f" {attn_weights.size()}" + ) + assert attention_mask is not None + if attention_mask is not None: + if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): + raise ValueError( + f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}" + ) + attn_weights = attn_weights + attention_mask + + # upcast attention to fp32 + attn_weights = nn.functional.softmax( + attn_weights, dim=-1, dtype=torch.float32 + ).to(q_pe.dtype) + attn_weights = nn.functional.dropout( + attn_weights, p=self.attention_dropout, training=self.training + ) + attn_output = torch.einsum('bhql,blc->bhqc', attn_weights, compressed_kv) + + attn_output = torch.matmul(attn_output, out_absorb.mT) + + if attn_output.size() != (bsz, self.num_heads, q_len, self.v_head_dim): + raise ValueError( + f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.v_head_dim)}, but is" + f" {attn_output.size()}" + ) + + attn_output = attn_output.transpose(1, 2).contiguous() + + attn_output = attn_output.reshape(bsz, q_len, self.num_heads * self.v_head_dim) + + attn_output = self.o_proj(attn_output) + + if not output_attentions: + attn_weights = None + + return attn_output, attn_weights, past_key_value + + +# Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2 with Llama->DeepseekV2 +class DeepseekV2FlashAttention2(DeepseekV2Attention): + """ + DeepseekV2 flash attention module. This module inherits from `DeepseekV2Attention` as the weights of the module stays + untouched. The only required change would be on the forward pass where it needs to correctly call the public API of + flash attention and deal with padding tokens in case the input contains any of them. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1. + # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0. + # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left). + self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10() + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.LongTensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Cache] = None, + output_attentions: bool = False, + use_cache: bool = False, + **kwargs, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + # DeepseekV2FlashAttention2 attention does not support output_attentions + if "padding_mask" in kwargs: + warnings.warn( + "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`" + ) + + # overwrite attention_mask with padding_mask + attention_mask = kwargs.pop("padding_mask") + + output_attentions = False + + bsz, q_len, _ = hidden_states.size() + + if self.q_lora_rank is None: + q = self.q_proj(hidden_states) + else: + q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states))) + q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2) + q_nope, q_pe = torch.split( + q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1 + ) + + # Flash attention requires the input to have the shape + # batch_size x seq_length x head_dim x hidden_dim + # therefore we just need to keep the original shape + compressed_kv = self.kv_a_proj_with_mqa(hidden_states) + compressed_kv, k_pe = torch.split( + compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1 + ) + k_pe = k_pe.view(bsz, q_len, 1, self.qk_rope_head_dim).transpose(1, 2) + kv = ( + self.kv_b_proj(self.kv_a_layernorm(compressed_kv)) + .view(bsz, q_len, self.num_heads, self.qk_nope_head_dim + self.v_head_dim) + .transpose(1, 2) + ) + + k_nope, value_states = torch.split( + kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1 + ) + kv_seq_len = value_states.shape[-2] + + kv_seq_len = value_states.shape[-2] + if past_key_value is not None: + kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx) + + cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) + q_pe, k_pe = apply_rotary_pos_emb(q_pe, k_pe, cos, sin, position_ids) + + query_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim) + query_states[:, :, :, : self.qk_nope_head_dim] = q_nope + query_states[:, :, :, self.qk_nope_head_dim :] = q_pe + + key_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim) + key_states[:, :, :, : self.qk_nope_head_dim] = k_nope + key_states[:, :, :, self.qk_nope_head_dim :] = k_pe + + if self.q_head_dim != self.v_head_dim: + value_states = F.pad(value_states, [0, self.q_head_dim - self.v_head_dim]) + + # TODO: support compressed_kv for kv_cache (instead of key_states, value_states) in flash_attention version + if past_key_value is not None: + cache_kwargs = {"sin": sin, "cos": cos} # Specific to RoPE models + key_states, value_states = past_key_value.update( + key_states, value_states, self.layer_idx, cache_kwargs + ) + + # TODO: These transpose are quite inefficient but Flash Attention requires the layout [batch_size, sequence_length, num_heads, head_dim]. We would need to refactor the KV cache + # to be able to avoid many of these transpose/reshape/view. + query_states = query_states.transpose(1, 2) + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + + dropout_rate = self.attention_dropout if self.training else 0.0 + + # In PEFT, usually we cast the layer norms in float32 for training stability reasons + # therefore the input hidden states gets silently casted in float32. Hence, we need + # cast them back in the correct dtype just to be sure everything works as expected. + # This might slowdown training & inference so it is recommended to not cast the LayerNorms + # in fp32. (DeepseekV2RMSNorm handles it correctly) + + input_dtype = query_states.dtype + if input_dtype == torch.float32: + # Handle the case where the model is quantized + if hasattr(self.config, "_pre_quantization_dtype"): + target_dtype = self.config._pre_quantization_dtype + elif torch.is_autocast_enabled(): + target_dtype = torch.get_autocast_gpu_dtype() + else: + target_dtype = ( + self.q_proj.weight.dtype + if self.q_lora_rank is None + else self.q_a_proj.weight.dtype + ) + + logger.warning_once( + f"The input hidden states seems to be silently casted in float32, this might be related to" + f" the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in" + f" {target_dtype}." + ) + + query_states = query_states.to(target_dtype) + key_states = key_states.to(target_dtype) + value_states = value_states.to(target_dtype) + + attn_output = self._flash_attention_forward( + query_states, + key_states, + value_states, + attention_mask, + q_len, + dropout=dropout_rate, + softmax_scale=self.softmax_scale, + ) + if self.q_head_dim != self.v_head_dim: + attn_output = attn_output[:, :, :, : self.v_head_dim] + + attn_output = attn_output.reshape( + bsz, q_len, self.num_heads * self.v_head_dim + ).contiguous() + attn_output = self.o_proj(attn_output) + + if not output_attentions: + attn_weights = None + + return attn_output, attn_weights, past_key_value + + def _flash_attention_forward( + self, + query_states, + key_states, + value_states, + attention_mask, + query_length, + dropout=0.0, + softmax_scale=None, + ): + """ + Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token + first unpad the input, then computes the attention scores and pad the final attention scores. + + Args: + query_states (`torch.Tensor`): + Input query states to be passed to Flash Attention API + key_states (`torch.Tensor`): + Input key states to be passed to Flash Attention API + value_states (`torch.Tensor`): + Input value states to be passed to Flash Attention API + attention_mask (`torch.Tensor`): + The padding mask - corresponds to a tensor of size `(batch_size, seq_len)` where 0 stands for the + position of padding tokens and 1 for the position of non-padding tokens. + dropout (`int`, *optional*): + Attention dropout + softmax_scale (`float`, *optional*): + The scaling of QK^T before applying softmax. Default to 1 / sqrt(head_dim) + """ + if not self._flash_attn_uses_top_left_mask: + causal = self.is_causal + else: + # TODO: Remove the `query_length != 1` check once Flash Attention for RoCm is bumped to 2.1. For details, please see the comment in DeepseekV2FlashAttention2 __init__. + causal = self.is_causal and query_length != 1 + + # Contains at least one padding token in the sequence + if attention_mask is not None: + batch_size = query_states.shape[0] + ( + query_states, + key_states, + value_states, + indices_q, + cu_seq_lens, + max_seq_lens, + ) = self._upad_input( + query_states, key_states, value_states, attention_mask, query_length + ) + + cu_seqlens_q, cu_seqlens_k = cu_seq_lens + max_seqlen_in_batch_q, max_seqlen_in_batch_k = max_seq_lens + + attn_output_unpad = flash_attn_varlen_func( + query_states, + key_states, + value_states, + cu_seqlens_q=cu_seqlens_q, + cu_seqlens_k=cu_seqlens_k, + max_seqlen_q=max_seqlen_in_batch_q, + max_seqlen_k=max_seqlen_in_batch_k, + dropout_p=dropout, + softmax_scale=softmax_scale, + causal=causal, + ) + + attn_output = pad_input( + attn_output_unpad, indices_q, batch_size, query_length + ) + else: + attn_output = flash_attn_func( + query_states, + key_states, + value_states, + dropout, + softmax_scale=softmax_scale, + causal=causal, + ) + + return attn_output + + def _upad_input( + self, query_layer, key_layer, value_layer, attention_mask, query_length + ): + indices_k, cu_seqlens_k, max_seqlen_in_batch_k = _get_unpad_data(attention_mask) + batch_size, kv_seq_len, num_key_value_heads, head_dim = key_layer.shape + + key_layer = index_first_axis( + key_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim), + indices_k, + ) + value_layer = index_first_axis( + value_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim), + indices_k, + ) + if query_length == kv_seq_len: + query_layer = index_first_axis( + query_layer.reshape(batch_size * kv_seq_len, self.num_heads, head_dim), + indices_k, + ) + cu_seqlens_q = cu_seqlens_k + max_seqlen_in_batch_q = max_seqlen_in_batch_k + indices_q = indices_k + elif query_length == 1: + max_seqlen_in_batch_q = 1 + cu_seqlens_q = torch.arange( + batch_size + 1, dtype=torch.int32, device=query_layer.device + ) # There is a memcpy here, that is very bad. + indices_q = cu_seqlens_q[:-1] + query_layer = query_layer.squeeze(1) + else: + # The -q_len: slice assumes left padding. + attention_mask = attention_mask[:, -query_length:] + query_layer, indices_q, cu_seqlens_q, max_seqlen_in_batch_q = unpad_input( + query_layer, attention_mask + ) + + return ( + query_layer, + key_layer, + value_layer, + indices_q, + (cu_seqlens_q, cu_seqlens_k), + (max_seqlen_in_batch_q, max_seqlen_in_batch_k), + ) + + +ATTENTION_CLASSES = { + "eager": DeepseekV2Attention, + "flash_attention_2": DeepseekV2FlashAttention2, + + "mla_eager": DeepseekV2Attention, + "mla_flash_attention_2": DeepseekV2FlashAttention2, + + "mha_eager": LlamaAttention, + "mha_flash_attention_2": LlamaFlashAttention2 +} + + +class DeepseekV2DecoderLayer(nn.Module): + def __init__(self, config: DeepseekV2Config, layer_idx: int): + super().__init__() + self.hidden_size = config.hidden_size + + + if config.use_mla: + attn_implementation = "mla_" + config._attn_implementation + else: + attn_implementation = "mha_" + config._attn_implementation + + self.self_attn = ATTENTION_CLASSES[attn_implementation]( + config=config, layer_idx=layer_idx + ) + + self.mlp = ( + DeepseekV2MoE(config) + if ( + config.n_routed_experts is not None + and layer_idx >= config.first_k_dense_replace + and layer_idx % config.moe_layer_freq == 0 + ) + else DeepseekV2MLP(config) + ) + self.input_layernorm = DeepseekV2RMSNorm( + config.hidden_size, eps=config.rms_norm_eps + ) + self.post_attention_layernorm = DeepseekV2RMSNorm( + config.hidden_size, eps=config.rms_norm_eps + ) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: Optional[bool] = False, + use_cache: Optional[bool] = False, + **kwargs, + ) -> Tuple[ + torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]] + ]: + """ + Args: + hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` + attention_mask (`torch.FloatTensor`, *optional*): + attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1, + query_sequence_length, key_sequence_length)` if default attention is used. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + use_cache (`bool`, *optional*): + If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding + (see `past_key_values`). + past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states + """ + if "padding_mask" in kwargs: + warnings.warn( + "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`" + ) + residual = hidden_states + + hidden_states = self.input_layernorm(hidden_states) + + # Self Attention + hidden_states, self_attn_weights, present_key_value = self.self_attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + **kwargs, + ) + hidden_states = residual + hidden_states + + # Fully Connected + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + hidden_states = self.mlp(hidden_states) + hidden_states = residual + hidden_states + + outputs = (hidden_states,) + + if output_attentions: + outputs += (self_attn_weights,) + + if use_cache: + outputs += (present_key_value,) + + return outputs + + +DeepseekV2_START_DOCSTRING = r""" + This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the + library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads + etc.) + + This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. + Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage + and behavior. + + Parameters: + config ([`DeepseekV2Config`]): + Model configuration class with all the parameters of the model. Initializing with a config file does not + load the weights associated with the model, only the configuration. Check out the + [`~PreTrainedModel.from_pretrained`] method to load the model weights. +""" + + +@add_start_docstrings( + "The bare DeepseekV2 Model outputting raw hidden-states without any specific head on top.", + DeepseekV2_START_DOCSTRING, +) +class DeepseekV2PreTrainedModel(PreTrainedModel): + config_class = DeepseekV2Config + base_model_prefix = "model" + supports_gradient_checkpointing = True + _no_split_modules = ["DeepseekV2DecoderLayer"] + _skip_keys_device_placement = "past_key_values" + _supports_flash_attn_2 = True + _supports_cache_class = True + + def _init_weights(self, module): + std = self.config.initializer_range + if isinstance(module, nn.Linear): + module.weight.data.normal_(mean=0.0, std=std) + if module.bias is not None: + module.bias.data.zero_() + elif isinstance(module, nn.Embedding): + module.weight.data.normal_(mean=0.0, std=std) + if module.padding_idx is not None: + module.weight.data[module.padding_idx].zero_() + + +DeepseekV2_INPUTS_DOCSTRING = r""" + Args: + input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): + Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide + it. + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are input IDs?](../glossary#input-ids) + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + If `past_key_values` is used, optionally only the last `input_ids` have to be input (see + `past_key_values`). + + If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`] + and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more + information on the default strategy. + + - 1 indicates the head is **not masked**, + - 0 indicates the head is **masked**. + position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0, + config.n_positions - 1]`. + + [What are position IDs?](../glossary#position-ids) + past_key_values (`Cache` or `tuple(tuple(torch.FloatTensor))`, *optional*): + Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention + blocks) that can be used to speed up sequential decoding. This typically consists in the `past_key_values` + returned by the model at a previous stage of decoding, when `use_cache=True` or `config.use_cache=True`. + + Two formats are allowed: + - a [`~cache_utils.Cache`] instance; + - Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of + shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`). This is also known as the legacy + cache format. + + The model will output the same cache format that is fed as input. If no `past_key_values` are passed, the + legacy cache format will be returned. + + If `past_key_values` are used, the user can optionally input only the last `input_ids` (those that don't + have their past key value states given to this model) of shape `(batch_size, 1)` instead of all `input_ids` + of shape `(batch_size, sequence_length)`. + inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): + Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This + is useful if you want more control over how to convert `input_ids` indices into associated vectors than the + model's internal embedding lookup matrix. + use_cache (`bool`, *optional*): + If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see + `past_key_values`). + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. +""" + + +@add_start_docstrings( + "The bare DeepseekV2 Model outputting raw hidden-states without any specific head on top.", + DeepseekV2_START_DOCSTRING, +) +class DeepseekV2Model(DeepseekV2PreTrainedModel): + """ + Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`DeepseekV2DecoderLayer`] + + Args: + config: DeepseekV2Config + """ + + def __init__(self, config: DeepseekV2Config): + super().__init__(config) + self.padding_idx = config.pad_token_id + self.vocab_size = config.vocab_size + + self.embed_tokens = nn.Embedding( + config.vocab_size, config.hidden_size, self.padding_idx + ) + self.layers = nn.ModuleList( + [ + DeepseekV2DecoderLayer(config, layer_idx) + for layer_idx in range(config.num_hidden_layers) + ] + ) + # print(config._attn_implementation) + self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2" + self.norm = DeepseekV2RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + + self.gradient_checkpointing = False + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.embed_tokens + + def set_input_embeddings(self, value): + self.embed_tokens = value + + @add_start_docstrings_to_model_forward(DeepseekV2_INPUTS_DOCSTRING) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None + ) -> Union[Tuple, BaseModelOutputWithPast]: + output_attentions = ( + output_attentions + if output_attentions is not None + else self.config.output_attentions + ) + output_hidden_states = ( + output_hidden_states + if output_hidden_states is not None + else self.config.output_hidden_states + ) + use_cache = use_cache if use_cache is not None else self.config.use_cache + + return_dict = ( + return_dict if return_dict is not None else self.config.use_return_dict + ) + + # retrieve input_ids and inputs_embeds + if input_ids is not None and inputs_embeds is not None: + raise ValueError( + "You cannot specify both input_ids and inputs_embeds at the same time" + ) + elif input_ids is not None: + batch_size, seq_length = input_ids.shape[:2] + elif inputs_embeds is not None: + batch_size, seq_length = inputs_embeds.shape[:2] + else: + raise ValueError("You have to specify either input_ids or inputs_embeds") + + if self.gradient_checkpointing and self.training: + if use_cache: + logger.warning_once( + "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`transformers." + ) + use_cache = False + + past_key_values_length = 0 + if use_cache: + use_legacy_cache = not isinstance(past_key_values, Cache) + if use_legacy_cache: + past_key_values = DynamicCache.from_legacy_cache(past_key_values) + past_key_values_length = past_key_values.get_usable_length(seq_length) + + if position_ids is None: + device = input_ids.device if input_ids is not None else inputs_embeds.device + position_ids = torch.arange( + past_key_values_length, + seq_length + past_key_values_length, + dtype=torch.long, + device=device, + ) + position_ids = position_ids.unsqueeze(0) + + if inputs_embeds is None: + inputs_embeds = self.embed_tokens(input_ids) + + if self._use_flash_attention_2: + # 2d mask is passed through the layers + attention_mask = ( + attention_mask + if (attention_mask is not None and 0 in attention_mask) + else None + ) + else: + # 4d mask is passed through the layers + attention_mask = _prepare_4d_causal_attention_mask( + attention_mask, + (batch_size, seq_length), + inputs_embeds, + past_key_values_length, + ) + + # embed positions + hidden_states = inputs_embeds + + # decoder layers + all_hidden_states = () if output_hidden_states else None + all_self_attns = () if output_attentions else None + next_decoder_cache = None + + for decoder_layer in self.layers: + if output_hidden_states: + all_hidden_states += (hidden_states,) + + if self.gradient_checkpointing and self.training: + layer_outputs = self._gradient_checkpointing_func( + decoder_layer.__call__, + hidden_states, + attention_mask, + position_ids, + past_key_values, + output_attentions, + use_cache, + ) + else: + layer_outputs = decoder_layer( + hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_values, + output_attentions=output_attentions, + use_cache=use_cache, + ) + + hidden_states = layer_outputs[0] + + if use_cache: + next_decoder_cache = layer_outputs[2 if output_attentions else 1] + + if output_attentions: + all_self_attns += (layer_outputs[1],) + + hidden_states = self.norm(hidden_states) + + # add hidden states from the last decoder layer + if output_hidden_states: + all_hidden_states += (hidden_states,) + + next_cache = None + if use_cache: + next_cache = ( + next_decoder_cache.to_legacy_cache() + if use_legacy_cache + else next_decoder_cache + ) + if not return_dict: + return tuple( + v + for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] + if v is not None + ) + return BaseModelOutputWithPast( + last_hidden_state=hidden_states, + past_key_values=next_cache, + hidden_states=all_hidden_states, + attentions=all_self_attns, + ) + + +class DeepseekV2ForCausalLM(DeepseekV2PreTrainedModel): + _tied_weights_keys = ["lm_head.weight"] + + def __init__(self, config): + super().__init__(config) + self.model = DeepseekV2Model(config) + self.vocab_size = config.vocab_size + self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + def get_output_embeddings(self): + return self.lm_head + + def set_output_embeddings(self, new_embeddings): + self.lm_head = new_embeddings + + def set_decoder(self, decoder): + self.model = decoder + + def get_decoder(self): + return self.model + + @add_start_docstrings_to_model_forward(DeepseekV2_INPUTS_DOCSTRING) + @replace_return_docstrings( + output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC + ) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None + ) -> Union[Tuple, CausalLMOutputWithPast]: + r""" + Args: + labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers., + config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored + (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`. + + Returns: + + Example: + + ```python + >>> from transformers import AutoTokenizer, DeepseekV2ForCausalLM + + >>> model = DeepseekV2ForCausalLM.from_pretrained(PATH_TO_CONVERTED_WEIGHTS) + >>> tokenizer = AutoTokenizer.from_pretrained(PATH_TO_CONVERTED_TOKENIZER) + + >>> prompt = "Hey, are you conscious? Can you talk to me?" + >>> inputs = tokenizer(prompt, return_tensors="pt") + + >>> # Generate + >>> generate_ids = model.generate(inputs.input_ids, max_length=30) + >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you." + ```""" + output_attentions = ( + output_attentions + if output_attentions is not None + else self.config.output_attentions + ) + output_hidden_states = ( + output_hidden_states + if output_hidden_states is not None + else self.config.output_hidden_states + ) + return_dict = ( + return_dict if return_dict is not None else self.config.use_return_dict + ) + + # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn) + outputs = self.model( + input_ids=input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + cache_position=cache_position + ) + + hidden_states = outputs[0] + logits = self.lm_head(hidden_states) + logits = logits.float() + + loss = None + if labels is not None: + # Shift so that tokens < n predict n + shift_logits = logits[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + # Flatten the tokens + loss_fct = CrossEntropyLoss() + shift_logits = shift_logits.view(-1, self.config.vocab_size) + shift_labels = shift_labels.view(-1) + # Enable model parallelism + shift_labels = shift_labels.to(shift_logits.device) + loss = loss_fct(shift_logits, shift_labels) + + if not return_dict: + output = (logits,) + outputs[1:] + return (loss,) + output if loss is not None else output + + return CausalLMOutputWithPast( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) + + def prepare_inputs_for_generation( + self, + input_ids, + past_key_values=None, + attention_mask=None, + inputs_embeds=None, + **kwargs, + ): + past_length = 0 + if past_key_values is not None: + if isinstance(past_key_values, Cache): + cache_length = past_key_values.get_seq_length() + past_length = past_key_values.seen_tokens + max_cache_length = past_key_values.get_max_length() + else: + cache_length = past_length = past_key_values[0][0].shape[2] + max_cache_length = None + + # Keep only the unprocessed tokens: + # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where + # some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as + # input) + if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]: + input_ids = input_ids[:, -(attention_mask.shape[1] - past_length):] + # 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard + # input_ids based on the past_length. + elif past_length < input_ids.shape[1]: + input_ids = input_ids[:, past_length:] + # 3 - Otherwise (past_length >= input_ids.shape[1]), let's assume input_ids only has unprocessed tokens. + + # If we are about to go beyond the maximum cache length, we need to crop the input attention mask. + if ( + max_cache_length is not None + and attention_mask is not None + and cache_length + input_ids.shape[1] > max_cache_length + ): + attention_mask = attention_mask[:, -max_cache_length:] + + position_ids = kwargs.get("position_ids", None) + if attention_mask is not None and position_ids is None: + # create position_ids on the fly for batch generation + position_ids = attention_mask.long().cumsum(-1) - 1 + position_ids.masked_fill_(attention_mask == 0, 1) + if past_key_values: + position_ids = position_ids[:, -input_ids.shape[1]:] + + if self.generation_config.cache_implementation == "static": + # generation with static cache + cache_position = kwargs.get("cache_position", None) + if cache_position is None: + past_length = 0 + else: + past_length = cache_position[-1] + 1 + input_ids = input_ids[:, past_length:] + position_ids = position_ids[:, past_length:] + + # TODO @gante we should only keep a `cache_position` in generate, and do +=1. + # same goes for position ids. Could also help with continued generation. + cache_position = torch.arange(past_length, past_length + position_ids.shape[-1], device=position_ids.device) + + # if `inputs_embeds` are passed, we only want to use them in the 1st generation step + if inputs_embeds is not None and past_key_values is None: + model_inputs = {"inputs_embeds": inputs_embeds} + else: + # The `contiguous()` here is necessary to have a static stride during decoding. torchdynamo otherwise + # recompiles graphs as the stride of the inputs is a guard. Ref: https://github.com/huggingface/transformers/pull/29114 + # TODO: use `next_tokens` directly instead. + model_inputs = {"input_ids": input_ids.contiguous()} + + model_inputs.update( + { + "position_ids": position_ids.contiguous(), + "cache_position": cache_position, + "past_key_values": past_key_values, + "use_cache": kwargs.get("use_cache"), + "attention_mask": attention_mask, + } + ) + return model_inputs + + @staticmethod + def _reorder_cache(past_key_values, beam_idx): + reordered_past = () + for layer_past in past_key_values: + reordered_past += ( + tuple( + past_state.index_select(0, beam_idx.to(past_state.device)) + for past_state in layer_past + ), + ) + return reordered_past + + +@add_start_docstrings( + """ + The DeepseekV2 Model transformer with a sequence classification head on top (linear layer). + + [`DeepseekV2ForSequenceClassification`] uses the last token in order to do the classification, as other causal models + (e.g. GPT-2) do. + + Since it does classification on the last token, it requires to know the position of the last token. If a + `pad_token_id` is defined in the configuration, it finds the last token that is not a padding token in each row. If + no `pad_token_id` is defined, it simply takes the last value in each row of the batch. Since it cannot guess the + padding tokens when `inputs_embeds` are passed instead of `input_ids`, it does the same (take the last value in + each row of the batch). + """, + DeepseekV2_START_DOCSTRING, +) +class DeepseekV2ForSequenceClassification(DeepseekV2PreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.model = DeepseekV2Model(config) + self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + @add_start_docstrings_to_model_forward(DeepseekV2_INPUTS_DOCSTRING) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, SequenceClassifierOutputWithPast]: + r""" + labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for computing the sequence classification/regression loss. Indices should be in `[0, transformers., + config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If + `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = ( + return_dict if return_dict is not None else self.config.use_return_dict + ) + + transformer_outputs = self.model( + input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + hidden_states = transformer_outputs[0] + logits = self.score(hidden_states) + + if input_ids is not None: + batch_size = input_ids.shape[0] + else: + batch_size = inputs_embeds.shape[0] + + if self.config.pad_token_id is None and batch_size != 1: + raise ValueError( + "Cannot handle batch sizes > 1 if no padding token is defined." + ) + if self.config.pad_token_id is None: + sequence_lengths = -1 + else: + if input_ids is not None: + sequence_lengths = ( + torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1 + ).to(logits.device) + else: + sequence_lengths = -1 + + pooled_logits = logits[ + torch.arange(batch_size, device=logits.device), sequence_lengths + ] + + loss = None + if labels is not None: + labels = labels.to(logits.device) + if self.config.problem_type is None: + if self.num_labels == 1: + self.config.problem_type = "regression" + elif self.num_labels > 1 and ( + labels.dtype == torch.long or labels.dtype == torch.int + ): + self.config.problem_type = "single_label_classification" + else: + self.config.problem_type = "multi_label_classification" + + if self.config.problem_type == "regression": + loss_fct = MSELoss() + if self.num_labels == 1: + loss = loss_fct(pooled_logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(pooled_logits, labels) + elif self.config.problem_type == "single_label_classification": + loss_fct = CrossEntropyLoss() + loss = loss_fct( + pooled_logits.view(-1, self.num_labels), labels.view(-1) + ) + elif self.config.problem_type == "multi_label_classification": + loss_fct = BCEWithLogitsLoss() + loss = loss_fct(pooled_logits, labels) + if not return_dict: + output = (pooled_logits,) + transformer_outputs[1:] + return ((loss,) + output) if loss is not None else output + + return SequenceClassifierOutputWithPast( + loss=loss, + logits=pooled_logits, + past_key_values=transformer_outputs.past_key_values, + hidden_states=transformer_outputs.hidden_states, + attentions=transformer_outputs.attentions, + ) diff --git a/processor_config.json b/processor_config.json new file mode 100644 index 0000000..9153af2 --- /dev/null +++ b/processor_config.json @@ -0,0 +1,28 @@ +{ + "add_special_token": false, + "candidate_resolutions": [ + [ + 1024, + 1024 + ] + ], + "downsample_ratio": 4, + "ignore_id": -100, + "image_mean": [ + 0.5, + 0.5, + 0.5 + ], + "image_std": [ + 0.5, + 0.5, + 0.5 + ], + "image_token": "", + "mask_prompt": false, + "normalize": true, + "pad_token": "<\uff5c\u2581pad\u2581\uff5c>", + "patch_size": 16, + "processor_class": "DeepseekVLV2Processor", + "sft_format": "deepseek" +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..d59d312 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,39 @@ +{ + "additional_special_tokens": [ + { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<ļ½œā–padā–ļ½œ>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..8b21be0 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a02f8fd5228c90256bb4f6554c34a579d48f909e5beb232dc4afad870b55a8b4 +size 9979544 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..ba9d417 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,6661 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "0": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<ļ½œā–padā–ļ½œ>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128000": { + "content": "<|place▁holder▁no▁0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|place▁holder▁no▁1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|place▁holder▁no▁2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|place▁holder▁no▁3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|place▁holder▁no▁4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|place▁holder▁no▁5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|place▁holder▁no▁6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|place▁holder▁no▁7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|place▁holder▁no▁8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|place▁holder▁no▁9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|place▁holder▁no▁10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|place▁holder▁no▁11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|place▁holder▁no▁12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|place▁holder▁no▁13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|place▁holder▁no▁14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|place▁holder▁no▁15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|place▁holder▁no▁16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|place▁holder▁no▁17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|place▁holder▁no▁18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|place▁holder▁no▁19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|place▁holder▁no▁20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|place▁holder▁no▁21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|place▁holder▁no▁22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|place▁holder▁no▁23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|place▁holder▁no▁24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|place▁holder▁no▁25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|place▁holder▁no▁26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|place▁holder▁no▁27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|place▁holder▁no▁28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|place▁holder▁no▁29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|place▁holder▁no▁30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|place▁holder▁no▁31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|place▁holder▁no▁32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|place▁holder▁no▁33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|place▁holder▁no▁34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|place▁holder▁no▁35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|place▁holder▁no▁36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|place▁holder▁no▁37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|place▁holder▁no▁38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|place▁holder▁no▁39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|place▁holder▁no▁40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|place▁holder▁no▁41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|place▁holder▁no▁42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|place▁holder▁no▁43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|place▁holder▁no▁44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|place▁holder▁no▁45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|place▁holder▁no▁46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|place▁holder▁no▁47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|place▁holder▁no▁48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|place▁holder▁no▁49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|place▁holder▁no▁50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|place▁holder▁no▁51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|place▁holder▁no▁52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|place▁holder▁no▁53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|place▁holder▁no▁54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|place▁holder▁no▁55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|place▁holder▁no▁56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|place▁holder▁no▁57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|place▁holder▁no▁58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|place▁holder▁no▁59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|place▁holder▁no▁60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|place▁holder▁no▁61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|place▁holder▁no▁62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|place▁holder▁no▁63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|place▁holder▁no▁64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|place▁holder▁no▁65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|place▁holder▁no▁66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|place▁holder▁no▁67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|place▁holder▁no▁68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|place▁holder▁no▁69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|place▁holder▁no▁70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|place▁holder▁no▁71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|place▁holder▁no▁72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|place▁holder▁no▁73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|place▁holder▁no▁74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|place▁holder▁no▁75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|place▁holder▁no▁76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|place▁holder▁no▁77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|place▁holder▁no▁78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|place▁holder▁no▁79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|place▁holder▁no▁80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|place▁holder▁no▁81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|place▁holder▁no▁82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|place▁holder▁no▁83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|place▁holder▁no▁84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|place▁holder▁no▁85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|place▁holder▁no▁86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|place▁holder▁no▁87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|place▁holder▁no▁88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|place▁holder▁no▁89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|place▁holder▁no▁90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|place▁holder▁no▁91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|place▁holder▁no▁92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|place▁holder▁no▁93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|place▁holder▁no▁94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|place▁holder▁no▁95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|place▁holder▁no▁96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|place▁holder▁no▁97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|place▁holder▁no▁98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|place▁holder▁no▁99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|place▁holder▁no▁100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|place▁holder▁no▁101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|place▁holder▁no▁102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|place▁holder▁no▁103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|place▁holder▁no▁104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|place▁holder▁no▁105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|place▁holder▁no▁106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|place▁holder▁no▁107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|place▁holder▁no▁108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|place▁holder▁no▁109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|place▁holder▁no▁110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|place▁holder▁no▁111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|place▁holder▁no▁112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|place▁holder▁no▁113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|place▁holder▁no▁114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|place▁holder▁no▁115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|place▁holder▁no▁116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|place▁holder▁no▁117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|place▁holder▁no▁118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|place▁holder▁no▁119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|place▁holder▁no▁120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|place▁holder▁no▁121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|place▁holder▁no▁122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|place▁holder▁no▁123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|place▁holder▁no▁124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|place▁holder▁no▁125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|place▁holder▁no▁126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|place▁holder▁no▁127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|place▁holder▁no▁128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|place▁holder▁no▁129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|place▁holder▁no▁130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|place▁holder▁no▁131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|place▁holder▁no▁132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|place▁holder▁no▁133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|place▁holder▁no▁134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|place▁holder▁no▁135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|place▁holder▁no▁136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|place▁holder▁no▁137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|place▁holder▁no▁138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|place▁holder▁no▁139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|place▁holder▁no▁140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|place▁holder▁no▁141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|place▁holder▁no▁142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|place▁holder▁no▁143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|place▁holder▁no▁144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|place▁holder▁no▁145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|place▁holder▁no▁146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|place▁holder▁no▁147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|place▁holder▁no▁148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|place▁holder▁no▁149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|place▁holder▁no▁150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|place▁holder▁no▁151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|place▁holder▁no▁152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|place▁holder▁no▁153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|place▁holder▁no▁154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|place▁holder▁no▁155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|place▁holder▁no▁156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|place▁holder▁no▁157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|place▁holder▁no▁158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|place▁holder▁no▁159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|place▁holder▁no▁160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|place▁holder▁no▁161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|place▁holder▁no▁162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|place▁holder▁no▁163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|place▁holder▁no▁164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|place▁holder▁no▁165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|place▁holder▁no▁166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|place▁holder▁no▁167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|place▁holder▁no▁168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|place▁holder▁no▁169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|place▁holder▁no▁170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|place▁holder▁no▁171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|place▁holder▁no▁172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|place▁holder▁no▁173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|place▁holder▁no▁174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|place▁holder▁no▁175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|place▁holder▁no▁176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|place▁holder▁no▁177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|place▁holder▁no▁178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|place▁holder▁no▁179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|place▁holder▁no▁180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|place▁holder▁no▁181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|place▁holder▁no▁182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|place▁holder▁no▁183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|place▁holder▁no▁184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|place▁holder▁no▁185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|place▁holder▁no▁186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|place▁holder▁no▁187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|place▁holder▁no▁188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|place▁holder▁no▁189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|place▁holder▁no▁190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|place▁holder▁no▁191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|place▁holder▁no▁192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|place▁holder▁no▁193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|place▁holder▁no▁194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|place▁holder▁no▁195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|place▁holder▁no▁196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|place▁holder▁no▁197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|place▁holder▁no▁198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|place▁holder▁no▁199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|place▁holder▁no▁200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|place▁holder▁no▁201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|place▁holder▁no▁202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|place▁holder▁no▁203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|place▁holder▁no▁204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|place▁holder▁no▁205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|place▁holder▁no▁206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|place▁holder▁no▁207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|place▁holder▁no▁208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|place▁holder▁no▁209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|place▁holder▁no▁210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|place▁holder▁no▁211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|place▁holder▁no▁212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|place▁holder▁no▁213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|place▁holder▁no▁214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|place▁holder▁no▁215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|place▁holder▁no▁216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|place▁holder▁no▁217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|place▁holder▁no▁218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|place▁holder▁no▁219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|place▁holder▁no▁220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|place▁holder▁no▁221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|place▁holder▁no▁222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|place▁holder▁no▁223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|place▁holder▁no▁224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|place▁holder▁no▁225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|place▁holder▁no▁226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|place▁holder▁no▁227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|place▁holder▁no▁228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|place▁holder▁no▁229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|place▁holder▁no▁230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|place▁holder▁no▁231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|place▁holder▁no▁232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|place▁holder▁no▁233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|place▁holder▁no▁234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|place▁holder▁no▁235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|place▁holder▁no▁236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|place▁holder▁no▁237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|place▁holder▁no▁238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|place▁holder▁no▁239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|place▁holder▁no▁240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|place▁holder▁no▁241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|place▁holder▁no▁242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|place▁holder▁no▁243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|place▁holder▁no▁244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|place▁holder▁no▁245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|place▁holder▁no▁246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|place▁holder▁no▁247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|place▁holder▁no▁248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|place▁holder▁no▁249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|place▁holder▁no▁250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|place▁holder▁no▁251|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|place▁holder▁no▁252|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|place▁holder▁no▁253|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|place▁holder▁no▁254|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|place▁holder▁no▁255|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128256": { + "content": "<|place▁holder▁no▁256|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128257": { + "content": "<|place▁holder▁no▁257|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128258": { + "content": "<|place▁holder▁no▁258|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128259": { + "content": "<|place▁holder▁no▁259|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128260": { + "content": "<|place▁holder▁no▁260|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128261": { + "content": "<|place▁holder▁no▁261|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128262": { + "content": "<|place▁holder▁no▁262|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128263": { + "content": "<|place▁holder▁no▁263|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128264": { + "content": "<|place▁holder▁no▁264|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128265": { + "content": "<|place▁holder▁no▁265|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128266": { + "content": "<|place▁holder▁no▁266|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128267": { + "content": "<|place▁holder▁no▁267|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128268": { + "content": "<|place▁holder▁no▁268|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128269": { + "content": "<|place▁holder▁no▁269|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128270": { + "content": "<|place▁holder▁no▁270|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128271": { + "content": "<|place▁holder▁no▁271|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128272": { + "content": "<|place▁holder▁no▁272|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128273": { + "content": "<|place▁holder▁no▁273|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128274": { + "content": "<|place▁holder▁no▁274|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128275": { + "content": "<|place▁holder▁no▁275|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128276": { + "content": "<|place▁holder▁no▁276|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128277": { + "content": "<|place▁holder▁no▁277|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128278": { + "content": "<|place▁holder▁no▁278|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128279": { + "content": "<|place▁holder▁no▁279|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128280": { + "content": "<|place▁holder▁no▁280|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128281": { + "content": "<|place▁holder▁no▁281|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128282": { + "content": "<|place▁holder▁no▁282|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128283": { + "content": "<|place▁holder▁no▁283|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128284": { + "content": "<|place▁holder▁no▁284|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128285": { + "content": "<|place▁holder▁no▁285|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128286": { + "content": "<|place▁holder▁no▁286|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128287": { + "content": "<|place▁holder▁no▁287|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128288": { + "content": "<|place▁holder▁no▁288|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128289": { + "content": "<|place▁holder▁no▁289|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128290": { + "content": "<|place▁holder▁no▁290|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128291": { + "content": "<|place▁holder▁no▁291|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128292": { + "content": "<|place▁holder▁no▁292|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128293": { + "content": "<|place▁holder▁no▁293|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128294": { + "content": "<|place▁holder▁no▁294|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128295": { + "content": "<|place▁holder▁no▁295|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128296": { + "content": "<|place▁holder▁no▁296|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128297": { + "content": "<|place▁holder▁no▁297|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128298": { + "content": "<|place▁holder▁no▁298|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128299": { + "content": "<|place▁holder▁no▁299|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128300": { + "content": "<|place▁holder▁no▁300|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128301": { + "content": "<|place▁holder▁no▁301|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128302": { + "content": "<|place▁holder▁no▁302|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128303": { + "content": "<|place▁holder▁no▁303|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128304": { + "content": "<|place▁holder▁no▁304|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128305": { + "content": "<|place▁holder▁no▁305|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128306": { + "content": "<|place▁holder▁no▁306|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128307": { + "content": "<|place▁holder▁no▁307|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128308": { + "content": "<|place▁holder▁no▁308|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128309": { + "content": "<|place▁holder▁no▁309|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128310": { + "content": "<|place▁holder▁no▁310|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128311": { + "content": "<|place▁holder▁no▁311|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128312": { + "content": "<|place▁holder▁no▁312|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128313": { + "content": "<|place▁holder▁no▁313|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128314": { + "content": "<|place▁holder▁no▁314|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128315": { + "content": "<|place▁holder▁no▁315|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128316": { + "content": "<|place▁holder▁no▁316|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128317": { + "content": "<|place▁holder▁no▁317|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128318": { + "content": "<|place▁holder▁no▁318|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128319": { + "content": "<|place▁holder▁no▁319|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128320": { + "content": "<|place▁holder▁no▁320|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128321": { + "content": "<|place▁holder▁no▁321|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128322": { + "content": "<|place▁holder▁no▁322|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128323": { + "content": "<|place▁holder▁no▁323|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128324": { + "content": "<|place▁holder▁no▁324|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128325": { + "content": "<|place▁holder▁no▁325|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128326": { + "content": "<|place▁holder▁no▁326|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128327": { + "content": "<|place▁holder▁no▁327|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128328": { + "content": "<|place▁holder▁no▁328|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128329": { + "content": "<|place▁holder▁no▁329|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128330": { + "content": "<|place▁holder▁no▁330|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128331": { + "content": "<|place▁holder▁no▁331|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128332": { + "content": "<|place▁holder▁no▁332|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128333": { + "content": "<|place▁holder▁no▁333|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128334": { + "content": "<|place▁holder▁no▁334|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128335": { + "content": "<|place▁holder▁no▁335|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128336": { + "content": "<|place▁holder▁no▁336|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128337": { + "content": "<|place▁holder▁no▁337|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128338": { + "content": "<|place▁holder▁no▁338|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128339": { + "content": "<|place▁holder▁no▁339|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128340": { + "content": "<|place▁holder▁no▁340|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128341": { + "content": "<|place▁holder▁no▁341|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128342": { + "content": "<|place▁holder▁no▁342|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128343": { + "content": "<|place▁holder▁no▁343|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128344": { + "content": "<|place▁holder▁no▁344|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128345": { + "content": "<|place▁holder▁no▁345|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128346": { + "content": "<|place▁holder▁no▁346|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128347": { + "content": "<|place▁holder▁no▁347|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128348": { + "content": "<|place▁holder▁no▁348|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128349": { + "content": "<|place▁holder▁no▁349|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128350": { + "content": "<|place▁holder▁no▁350|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128351": { + "content": "<|place▁holder▁no▁351|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128352": { + "content": "<|place▁holder▁no▁352|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128353": { + "content": "<|place▁holder▁no▁353|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128354": { + "content": "<|place▁holder▁no▁354|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128355": { + "content": "<|place▁holder▁no▁355|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128356": { + "content": "<|place▁holder▁no▁356|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128357": { + "content": "<|place▁holder▁no▁357|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128358": { + "content": "<|place▁holder▁no▁358|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128359": { + "content": "<|place▁holder▁no▁359|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128360": { + "content": "<|place▁holder▁no▁360|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128361": { + "content": "<|place▁holder▁no▁361|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128362": { + "content": "<|place▁holder▁no▁362|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128363": { + "content": "<|place▁holder▁no▁363|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128364": { + "content": "<|place▁holder▁no▁364|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128365": { + "content": "<|place▁holder▁no▁365|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128366": { + "content": "<|place▁holder▁no▁366|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128367": { + "content": "<|place▁holder▁no▁367|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128368": { + "content": "<|place▁holder▁no▁368|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128369": { + "content": "<|place▁holder▁no▁369|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128370": { + "content": "<|place▁holder▁no▁370|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128371": { + "content": "<|place▁holder▁no▁371|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128372": { + "content": "<|place▁holder▁no▁372|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128373": { + "content": "<|place▁holder▁no▁373|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128374": { + "content": "<|place▁holder▁no▁374|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128375": { + "content": "<|place▁holder▁no▁375|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128376": { + "content": "<|place▁holder▁no▁376|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128377": { + "content": "<|place▁holder▁no▁377|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128378": { + "content": "<|place▁holder▁no▁378|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128379": { + "content": "<|place▁holder▁no▁379|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128380": { + "content": "<|place▁holder▁no▁380|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128381": { + "content": "<|place▁holder▁no▁381|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128382": { + "content": "<|place▁holder▁no▁382|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128383": { + "content": "<|place▁holder▁no▁383|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128384": { + "content": "<|place▁holder▁no▁384|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128385": { + "content": "<|place▁holder▁no▁385|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128386": { + "content": "<|place▁holder▁no▁386|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128387": { + "content": "<|place▁holder▁no▁387|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128388": { + "content": "<|place▁holder▁no▁388|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128389": { + "content": "<|place▁holder▁no▁389|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128390": { + "content": "<|place▁holder▁no▁390|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128391": { + "content": "<|place▁holder▁no▁391|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128392": { + "content": "<|place▁holder▁no▁392|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128393": { + "content": "<|place▁holder▁no▁393|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128394": { + "content": "<|place▁holder▁no▁394|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128395": { + "content": "<|place▁holder▁no▁395|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128396": { + "content": "<|place▁holder▁no▁396|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128397": { + "content": "<|place▁holder▁no▁397|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128398": { + "content": "<|place▁holder▁no▁398|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128399": { + "content": "<|place▁holder▁no▁399|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128400": { + "content": "<|place▁holder▁no▁400|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128401": { + "content": "<|place▁holder▁no▁401|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128402": { + "content": "<|place▁holder▁no▁402|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128403": { + "content": "<|place▁holder▁no▁403|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128404": { + "content": "<|place▁holder▁no▁404|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128405": { + "content": "<|place▁holder▁no▁405|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128406": { + "content": "<|place▁holder▁no▁406|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128407": { + "content": "<|place▁holder▁no▁407|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128408": { + "content": "<|place▁holder▁no▁408|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128409": { + "content": "<|place▁holder▁no▁409|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128410": { + "content": "<|place▁holder▁no▁410|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128411": { + "content": "<|place▁holder▁no▁411|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128412": { + "content": "<|place▁holder▁no▁412|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128413": { + "content": "<|place▁holder▁no▁413|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128414": { + "content": "<|place▁holder▁no▁414|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128415": { + "content": "<|place▁holder▁no▁415|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128416": { + "content": "<|place▁holder▁no▁416|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128417": { + "content": "<|place▁holder▁no▁417|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128418": { + "content": "<|place▁holder▁no▁418|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128419": { + "content": "<|place▁holder▁no▁419|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128420": { + "content": "<|place▁holder▁no▁420|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128421": { + "content": "<|place▁holder▁no▁421|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128422": { + "content": "<|place▁holder▁no▁422|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128423": { + "content": "<|place▁holder▁no▁423|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128424": { + "content": "<|place▁holder▁no▁424|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128425": { + "content": "<|place▁holder▁no▁425|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128426": { + "content": "<|place▁holder▁no▁426|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128427": { + "content": "<|place▁holder▁no▁427|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128428": { + "content": "<|place▁holder▁no▁428|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128429": { + "content": "<|place▁holder▁no▁429|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128430": { + "content": "<|place▁holder▁no▁430|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128431": { + "content": "<|place▁holder▁no▁431|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128432": { + "content": "<|place▁holder▁no▁432|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128433": { + "content": "<|place▁holder▁no▁433|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128434": { + "content": "<|place▁holder▁no▁434|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128435": { + "content": "<|place▁holder▁no▁435|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128436": { + "content": "<|place▁holder▁no▁436|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128437": { + "content": "<|place▁holder▁no▁437|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128438": { + "content": "<|place▁holder▁no▁438|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128439": { + "content": "<|place▁holder▁no▁439|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128440": { + "content": "<|place▁holder▁no▁440|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128441": { + "content": "<|place▁holder▁no▁441|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128442": { + "content": "<|place▁holder▁no▁442|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128443": { + "content": "<|place▁holder▁no▁443|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128444": { + "content": "<|place▁holder▁no▁444|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128445": { + "content": "<|place▁holder▁no▁445|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128446": { + "content": "<|place▁holder▁no▁446|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128447": { + "content": "<|place▁holder▁no▁447|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128448": { + "content": "<|place▁holder▁no▁448|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128449": { + "content": "<|place▁holder▁no▁449|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128450": { + "content": "<|place▁holder▁no▁450|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128451": { + "content": "<|place▁holder▁no▁451|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128452": { + "content": "<|place▁holder▁no▁452|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128453": { + "content": "<|place▁holder▁no▁453|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128454": { + "content": "<|place▁holder▁no▁454|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128455": { + "content": "<|place▁holder▁no▁455|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128456": { + "content": "<|place▁holder▁no▁456|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128457": { + "content": "<|place▁holder▁no▁457|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128458": { + "content": "<|place▁holder▁no▁458|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128459": { + "content": "<|place▁holder▁no▁459|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128460": { + "content": "<|place▁holder▁no▁460|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128461": { + "content": "<|place▁holder▁no▁461|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128462": { + "content": "<|place▁holder▁no▁462|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128463": { + "content": "<|place▁holder▁no▁463|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128464": { + "content": "<|place▁holder▁no▁464|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128465": { + "content": "<|place▁holder▁no▁465|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128466": { + "content": "<|place▁holder▁no▁466|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128467": { + "content": "<|place▁holder▁no▁467|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128468": { + "content": "<|place▁holder▁no▁468|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128469": { + "content": "<|place▁holder▁no▁469|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128470": { + "content": "<|place▁holder▁no▁470|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128471": { + "content": "<|place▁holder▁no▁471|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128472": { + "content": "<|place▁holder▁no▁472|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128473": { + "content": "<|place▁holder▁no▁473|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128474": { + "content": "<|place▁holder▁no▁474|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128475": { + "content": "<|place▁holder▁no▁475|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128476": { + "content": "<|place▁holder▁no▁476|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128477": { + "content": "<|place▁holder▁no▁477|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128478": { + "content": "<|place▁holder▁no▁478|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128479": { + "content": "<|place▁holder▁no▁479|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128480": { + "content": "<|place▁holder▁no▁480|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128481": { + "content": "<|place▁holder▁no▁481|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128482": { + "content": "<|place▁holder▁no▁482|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128483": { + "content": "<|place▁holder▁no▁483|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128484": { + "content": "<|place▁holder▁no▁484|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128485": { + "content": "<|place▁holder▁no▁485|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128486": { + "content": "<|place▁holder▁no▁486|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128487": { + "content": "<|place▁holder▁no▁487|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128488": { + "content": "<|place▁holder▁no▁488|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128489": { + "content": "<|place▁holder▁no▁489|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128490": { + "content": "<|place▁holder▁no▁490|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128491": { + "content": "<|place▁holder▁no▁491|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128492": { + "content": "<|place▁holder▁no▁492|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128493": { + "content": "<|place▁holder▁no▁493|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128494": { + "content": "<|place▁holder▁no▁494|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128495": { + "content": "<|place▁holder▁no▁495|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128496": { + "content": "<|place▁holder▁no▁496|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128497": { + "content": "<|place▁holder▁no▁497|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128498": { + "content": "<|place▁holder▁no▁498|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128499": { + "content": "<|place▁holder▁no▁499|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128500": { + "content": "<|place▁holder▁no▁500|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128501": { + "content": "<|place▁holder▁no▁501|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128502": { + "content": "<|place▁holder▁no▁502|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128503": { + "content": "<|place▁holder▁no▁503|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128504": { + "content": "<|place▁holder▁no▁504|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128505": { + "content": "<|place▁holder▁no▁505|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128506": { + "content": "<|place▁holder▁no▁506|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128507": { + "content": "<|place▁holder▁no▁507|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128508": { + "content": "<|place▁holder▁no▁508|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128509": { + "content": "<|place▁holder▁no▁509|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128510": { + "content": "<|place▁holder▁no▁510|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128511": { + "content": "<|place▁holder▁no▁511|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128512": { + "content": "<|place▁holder▁no▁512|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128513": { + "content": "<|place▁holder▁no▁513|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128514": { + "content": "<|place▁holder▁no▁514|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128515": { + "content": "<|place▁holder▁no▁515|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128516": { + "content": "<|place▁holder▁no▁516|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128517": { + "content": "<|place▁holder▁no▁517|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128518": { + "content": "<|place▁holder▁no▁518|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128519": { + "content": "<|place▁holder▁no▁519|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128520": { + "content": "<|place▁holder▁no▁520|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128521": { + "content": "<|place▁holder▁no▁521|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128522": { + "content": "<|place▁holder▁no▁522|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128523": { + "content": "<|place▁holder▁no▁523|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128524": { + "content": "<|place▁holder▁no▁524|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128525": { + "content": "<|place▁holder▁no▁525|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128526": { + "content": "<|place▁holder▁no▁526|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128527": { + "content": "<|place▁holder▁no▁527|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128528": { + "content": "<|place▁holder▁no▁528|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128529": { + "content": "<|place▁holder▁no▁529|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128530": { + "content": "<|place▁holder▁no▁530|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128531": { + "content": "<|place▁holder▁no▁531|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128532": { + "content": "<|place▁holder▁no▁532|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128533": { + "content": "<|place▁holder▁no▁533|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128534": { + "content": "<|place▁holder▁no▁534|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128535": { + "content": "<|place▁holder▁no▁535|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128536": { + "content": "<|place▁holder▁no▁536|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128537": { + "content": "<|place▁holder▁no▁537|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128538": { + "content": "<|place▁holder▁no▁538|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128539": { + "content": "<|place▁holder▁no▁539|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128540": { + "content": "<|place▁holder▁no▁540|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128541": { + "content": "<|place▁holder▁no▁541|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128542": { + "content": "<|place▁holder▁no▁542|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128543": { + "content": "<|place▁holder▁no▁543|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128544": { + "content": "<|place▁holder▁no▁544|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128545": { + "content": "<|place▁holder▁no▁545|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128546": { + "content": "<|place▁holder▁no▁546|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128547": { + "content": "<|place▁holder▁no▁547|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128548": { + "content": "<|place▁holder▁no▁548|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128549": { + "content": "<|place▁holder▁no▁549|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128550": { + "content": "<|place▁holder▁no▁550|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128551": { + "content": "<|place▁holder▁no▁551|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128552": { + "content": "<|place▁holder▁no▁552|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128553": { + "content": "<|place▁holder▁no▁553|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128554": { + "content": "<|place▁holder▁no▁554|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128555": { + "content": "<|place▁holder▁no▁555|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128556": { + "content": "<|place▁holder▁no▁556|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128557": { + "content": "<|place▁holder▁no▁557|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128558": { + "content": "<|place▁holder▁no▁558|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128559": { + "content": "<|place▁holder▁no▁559|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128560": { + "content": "<|place▁holder▁no▁560|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128561": { + "content": "<|place▁holder▁no▁561|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128562": { + "content": "<|place▁holder▁no▁562|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128563": { + "content": "<|place▁holder▁no▁563|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128564": { + "content": "<|place▁holder▁no▁564|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128565": { + "content": "<|place▁holder▁no▁565|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128566": { + "content": "<|place▁holder▁no▁566|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128567": { + "content": "<|place▁holder▁no▁567|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128568": { + "content": "<|place▁holder▁no▁568|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128569": { + "content": "<|place▁holder▁no▁569|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128570": { + "content": "<|place▁holder▁no▁570|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128571": { + "content": "<|place▁holder▁no▁571|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128572": { + "content": "<|place▁holder▁no▁572|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128573": { + "content": "<|place▁holder▁no▁573|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128574": { + "content": "<|place▁holder▁no▁574|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128575": { + "content": "<|place▁holder▁no▁575|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128576": { + "content": "<|place▁holder▁no▁576|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128577": { + "content": "<|place▁holder▁no▁577|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128578": { + "content": "<|place▁holder▁no▁578|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128579": { + "content": "<|place▁holder▁no▁579|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128580": { + "content": "<|place▁holder▁no▁580|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128581": { + "content": "<|place▁holder▁no▁581|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128582": { + "content": "<|place▁holder▁no▁582|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128583": { + "content": "<|place▁holder▁no▁583|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128584": { + "content": "<|place▁holder▁no▁584|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128585": { + "content": "<|place▁holder▁no▁585|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128586": { + "content": "<|place▁holder▁no▁586|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128587": { + "content": "<|place▁holder▁no▁587|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128588": { + "content": "<|place▁holder▁no▁588|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128589": { + "content": "<|place▁holder▁no▁589|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128590": { + "content": "<|place▁holder▁no▁590|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128591": { + "content": "<|place▁holder▁no▁591|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128592": { + "content": "<|place▁holder▁no▁592|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128593": { + "content": "<|place▁holder▁no▁593|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128594": { + "content": "<|place▁holder▁no▁594|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128595": { + "content": "<|place▁holder▁no▁595|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128596": { + "content": "<|place▁holder▁no▁596|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128597": { + "content": "<|place▁holder▁no▁597|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128598": { + "content": "<|place▁holder▁no▁598|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128599": { + "content": "<|place▁holder▁no▁599|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128600": { + "content": "<|place▁holder▁no▁600|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128601": { + "content": "<|place▁holder▁no▁601|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128602": { + "content": "<|place▁holder▁no▁602|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128603": { + "content": "<|place▁holder▁no▁603|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128604": { + "content": "<|place▁holder▁no▁604|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128605": { + "content": "<|place▁holder▁no▁605|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128606": { + "content": "<|place▁holder▁no▁606|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128607": { + "content": "<|place▁holder▁no▁607|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128608": { + "content": "<|place▁holder▁no▁608|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128609": { + "content": "<|place▁holder▁no▁609|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128610": { + "content": "<|place▁holder▁no▁610|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128611": { + "content": "<|place▁holder▁no▁611|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128612": { + "content": "<|place▁holder▁no▁612|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128613": { + "content": "<|place▁holder▁no▁613|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128614": { + "content": "<|place▁holder▁no▁614|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128615": { + "content": "<|place▁holder▁no▁615|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128616": { + "content": "<|place▁holder▁no▁616|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128617": { + "content": "<|place▁holder▁no▁617|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128618": { + "content": "<|place▁holder▁no▁618|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128619": { + "content": "<|place▁holder▁no▁619|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128620": { + "content": "<|place▁holder▁no▁620|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128621": { + "content": "<|place▁holder▁no▁621|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128622": { + "content": "<|place▁holder▁no▁622|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128623": { + "content": "<|place▁holder▁no▁623|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128624": { + "content": "<|place▁holder▁no▁624|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128625": { + "content": "<|place▁holder▁no▁625|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128626": { + "content": "<|place▁holder▁no▁626|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128627": { + "content": "<|place▁holder▁no▁627|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128628": { + "content": "<|place▁holder▁no▁628|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128629": { + "content": "<|place▁holder▁no▁629|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128630": { + "content": "<|place▁holder▁no▁630|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128631": { + "content": "<|place▁holder▁no▁631|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128632": { + "content": "<|place▁holder▁no▁632|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128633": { + "content": "<|place▁holder▁no▁633|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128634": { + "content": "<|place▁holder▁no▁634|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128635": { + "content": "<|place▁holder▁no▁635|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128636": { + "content": "<|place▁holder▁no▁636|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128637": { + "content": "<|place▁holder▁no▁637|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128638": { + "content": "<|place▁holder▁no▁638|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128639": { + "content": "<|place▁holder▁no▁639|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128640": { + "content": "<|place▁holder▁no▁640|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128641": { + "content": "<|place▁holder▁no▁641|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128642": { + "content": "<|place▁holder▁no▁642|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128643": { + "content": "<|place▁holder▁no▁643|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128644": { + "content": "<|place▁holder▁no▁644|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128645": { + "content": "<|place▁holder▁no▁645|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128646": { + "content": "<|place▁holder▁no▁646|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128647": { + "content": "<|place▁holder▁no▁647|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128648": { + "content": "<|place▁holder▁no▁648|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128649": { + "content": "<|place▁holder▁no▁649|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128650": { + "content": "<|place▁holder▁no▁650|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128651": { + "content": "<|place▁holder▁no▁651|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128652": { + "content": "<|place▁holder▁no▁652|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128653": { + "content": "<|place▁holder▁no▁653|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128654": { + "content": "<|place▁holder▁no▁654|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128655": { + "content": "<|place▁holder▁no▁655|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128656": { + "content": "<|place▁holder▁no▁656|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128657": { + "content": "<|place▁holder▁no▁657|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128658": { + "content": "<|place▁holder▁no▁658|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128659": { + "content": "<|place▁holder▁no▁659|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128660": { + "content": "<|place▁holder▁no▁660|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128661": { + "content": "<|place▁holder▁no▁661|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128662": { + "content": "<|place▁holder▁no▁662|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128663": { + "content": "<|place▁holder▁no▁663|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128664": { + "content": "<|place▁holder▁no▁664|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128665": { + "content": "<|place▁holder▁no▁665|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128666": { + "content": "<|place▁holder▁no▁666|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128667": { + "content": "<|place▁holder▁no▁667|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128668": { + "content": "<|place▁holder▁no▁668|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128669": { + "content": "<|place▁holder▁no▁669|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128670": { + "content": "<|place▁holder▁no▁670|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128671": { + "content": "<|place▁holder▁no▁671|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128672": { + "content": "<|place▁holder▁no▁672|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128673": { + "content": "<|place▁holder▁no▁673|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128674": { + "content": "<|place▁holder▁no▁674|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128675": { + "content": "<|place▁holder▁no▁675|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128676": { + "content": "<|place▁holder▁no▁676|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128677": { + "content": "<|place▁holder▁no▁677|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128678": { + "content": "<|place▁holder▁no▁678|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128679": { + "content": "<|place▁holder▁no▁679|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128680": { + "content": "<|place▁holder▁no▁680|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128681": { + "content": "<|place▁holder▁no▁681|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128682": { + "content": "<|place▁holder▁no▁682|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128683": { + "content": "<|place▁holder▁no▁683|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128684": { + "content": "<|place▁holder▁no▁684|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128685": { + "content": "<|place▁holder▁no▁685|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128686": { + "content": "<|place▁holder▁no▁686|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128687": { + "content": "<|place▁holder▁no▁687|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128688": { + "content": "<|place▁holder▁no▁688|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128689": { + "content": "<|place▁holder▁no▁689|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128690": { + "content": "<|place▁holder▁no▁690|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128691": { + "content": "<|place▁holder▁no▁691|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128692": { + "content": "<|place▁holder▁no▁692|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128693": { + "content": "<|place▁holder▁no▁693|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128694": { + "content": "<|place▁holder▁no▁694|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128695": { + "content": "<|place▁holder▁no▁695|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128696": { + "content": "<|place▁holder▁no▁696|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128697": { + "content": "<|place▁holder▁no▁697|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128698": { + "content": "<|place▁holder▁no▁698|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128699": { + "content": "<|place▁holder▁no▁699|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128700": { + "content": "<|place▁holder▁no▁700|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128701": { + "content": "<|place▁holder▁no▁701|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128702": { + "content": "<|place▁holder▁no▁702|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128703": { + "content": "<|place▁holder▁no▁703|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128704": { + "content": "<|place▁holder▁no▁704|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128705": { + "content": "<|place▁holder▁no▁705|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128706": { + "content": "<|place▁holder▁no▁706|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128707": { + "content": "<|place▁holder▁no▁707|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128708": { + "content": "<|place▁holder▁no▁708|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128709": { + "content": "<|place▁holder▁no▁709|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128710": { + "content": "<|place▁holder▁no▁710|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128711": { + "content": "<|place▁holder▁no▁711|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128712": { + "content": "<|place▁holder▁no▁712|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128713": { + "content": "<|place▁holder▁no▁713|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128714": { + "content": "<|place▁holder▁no▁714|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128715": { + "content": "<|place▁holder▁no▁715|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128716": { + "content": "<|place▁holder▁no▁716|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128717": { + "content": "<|place▁holder▁no▁717|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128718": { + "content": "<|place▁holder▁no▁718|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128719": { + "content": "<|place▁holder▁no▁719|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128720": { + "content": "<|place▁holder▁no▁720|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128721": { + "content": "<|place▁holder▁no▁721|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128722": { + "content": "<|place▁holder▁no▁722|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128723": { + "content": "<|place▁holder▁no▁723|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128724": { + "content": "<|place▁holder▁no▁724|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128725": { + "content": "<|place▁holder▁no▁725|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128726": { + "content": "<|place▁holder▁no▁726|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128727": { + "content": "<|place▁holder▁no▁727|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128728": { + "content": "<|place▁holder▁no▁728|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128729": { + "content": "<|place▁holder▁no▁729|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128730": { + "content": "<|place▁holder▁no▁730|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128731": { + "content": "<|place▁holder▁no▁731|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128732": { + "content": "<|place▁holder▁no▁732|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128733": { + "content": "<|place▁holder▁no▁733|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128734": { + "content": "<|place▁holder▁no▁734|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128735": { + "content": "<|place▁holder▁no▁735|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128736": { + "content": "<|place▁holder▁no▁736|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128737": { + "content": "<|place▁holder▁no▁737|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128738": { + "content": "<|place▁holder▁no▁738|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128739": { + "content": "<|place▁holder▁no▁739|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128740": { + "content": "<|place▁holder▁no▁740|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128741": { + "content": "<|place▁holder▁no▁741|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128742": { + "content": "<|place▁holder▁no▁742|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128743": { + "content": "<|place▁holder▁no▁743|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128744": { + "content": "<|place▁holder▁no▁744|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128745": { + "content": "<|place▁holder▁no▁745|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128746": { + "content": "<|place▁holder▁no▁746|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128747": { + "content": "<|place▁holder▁no▁747|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128748": { + "content": "<|place▁holder▁no▁748|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128749": { + "content": "<|place▁holder▁no▁749|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128750": { + "content": "<|place▁holder▁no▁750|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128751": { + "content": "<|place▁holder▁no▁751|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128752": { + "content": "<|place▁holder▁no▁752|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128753": { + "content": "<|place▁holder▁no▁753|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128754": { + "content": "<|place▁holder▁no▁754|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128755": { + "content": "<|place▁holder▁no▁755|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128756": { + "content": "<|place▁holder▁no▁756|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128757": { + "content": "<|place▁holder▁no▁757|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128758": { + "content": "<|place▁holder▁no▁758|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128759": { + "content": "<|place▁holder▁no▁759|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128760": { + "content": "<|place▁holder▁no▁760|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128761": { + "content": "<|place▁holder▁no▁761|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128762": { + "content": "<|place▁holder▁no▁762|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128763": { + "content": "<|place▁holder▁no▁763|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128764": { + "content": "<|place▁holder▁no▁764|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128765": { + "content": "<|place▁holder▁no▁765|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128766": { + "content": "<|place▁holder▁no▁766|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128767": { + "content": "<|place▁holder▁no▁767|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128768": { + "content": "<|place▁holder▁no▁768|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128769": { + "content": "<|place▁holder▁no▁769|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128770": { + "content": "<|place▁holder▁no▁770|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128771": { + "content": "<|place▁holder▁no▁771|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128772": { + "content": "<|place▁holder▁no▁772|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128773": { + "content": "<|place▁holder▁no▁773|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128774": { + "content": "<|place▁holder▁no▁774|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128775": { + "content": "<|place▁holder▁no▁775|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128776": { + "content": "<|place▁holder▁no▁776|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128777": { + "content": "<|place▁holder▁no▁777|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128778": { + "content": "<|place▁holder▁no▁778|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128779": { + "content": "<|place▁holder▁no▁779|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128780": { + "content": "<|place▁holder▁no▁780|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128781": { + "content": "<|place▁holder▁no▁781|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128782": { + "content": "<|place▁holder▁no▁782|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128783": { + "content": "<|place▁holder▁no▁783|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128784": { + "content": "<|place▁holder▁no▁784|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128785": { + "content": "<|place▁holder▁no▁785|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128786": { + "content": "<|place▁holder▁no▁786|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128787": { + "content": "<|place▁holder▁no▁787|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128788": { + "content": "<|place▁holder▁no▁788|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128789": { + "content": "<|place▁holder▁no▁789|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128790": { + "content": "<|place▁holder▁no▁790|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128791": { + "content": "<|place▁holder▁no▁791|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128792": { + "content": "<|place▁holder▁no▁792|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128793": { + "content": "<|place▁holder▁no▁793|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128794": { + "content": "<|place▁holder▁no▁794|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128795": { + "content": "<|place▁holder▁no▁795|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128796": { + "content": "<|place▁holder▁no▁796|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128797": { + "content": "<|place▁holder▁no▁797|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128798": { + "content": "<|place▁holder▁no▁798|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128799": { + "content": "<|place▁holder▁no▁799|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128800": { + "content": "<|fim▁hole|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128801": { + "content": "<|fim▁begin|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128802": { + "content": "<|fim▁end|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128803": { + "content": "<|User|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128804": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128805": { + "content": "<|EOT|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "128806": { + "content": "<|tool▁calls▁begin|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128807": { + "content": "<|tool▁calls▁end|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128808": { + "content": "<|tool▁call▁begin|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128809": { + "content": "<|tool▁call▁end|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128810": { + "content": "<|tool▁outputs▁begin|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128811": { + "content": "<|tool▁outputs▁end|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128812": { + "content": "<|tool▁output▁begin|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128813": { + "content": "<|tool▁output▁end|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128814": { + "content": "<|tool▁sep|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "128815": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128816": { + "content": "<|ref|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128817": { + "content": "<|/ref|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128818": { + "content": "<|det|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128819": { + "content": "<|/det|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128820": { + "content": "<|grounding|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128821": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128822": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128823": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128824": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128825": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128826": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|User|>", + "<|Assistant|>" + ], + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<ļ½œā–padā–ļ½œ>", + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +}