From 6f6682068584ce7a2cf4e4c0fd002bbd96a68ec2 Mon Sep 17 00:00:00 2001 From: systemd Date: Thu, 23 Apr 2026 12:53:15 +0000 Subject: [PATCH] Update multilingual tokenizer rupee placeholder (batch 1/1) --- .gitattributes | 4 +++- grapheme_mtl_merged_expanded_v1.json | 8 ++++---- t3_mtl23ls_v3.safetensors | 3 +++ 3 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 t3_mtl23ls_v3.safetensors diff --git a/.gitattributes b/.gitattributes index 7a0b41a..da054e5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -50,4 +50,6 @@ t3_cfg.safetensors filter=lfs diff=lfs merge=lfs -text Cangjie5_TC.json filter=lfs diff=lfs merge=lfs -text -t3_mtl23ls_v2.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file +t3_mtl23ls_v2.safetensors filter=lfs diff=lfs merge=lfs -text + +t3_mtl23ls_v3.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/grapheme_mtl_merged_expanded_v1.json b/grapheme_mtl_merged_expanded_v1.json index 4b0e465..d27fb3f 100644 --- a/grapheme_mtl_merged_expanded_v1.json +++ b/grapheme_mtl_merged_expanded_v1.json @@ -1022,12 +1022,12 @@ }, { "id": 2065, - "content": "[PLACEHOLDER45]", + "content": "₹", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, - "special": true + "special": false }, { "id": 2107, @@ -3145,7 +3145,7 @@ "[cj_8]": 2062, "[cj_9]": 2063, "[cj_.]": 2064, - "[PLACEHOLDER45]": 2065, + "₹": 2065, "‰": 2066, "‱": 2067, "′": 2068, @@ -3803,4 +3803,4 @@ "ˌ ɐ" ] } -} \ No newline at end of file +} diff --git a/t3_mtl23ls_v3.safetensors b/t3_mtl23ls_v3.safetensors new file mode 100644 index 0000000..1c209b8 --- /dev/null +++ b/t3_mtl23ls_v3.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5abca8321ede76f8e61f1cc0d19aea6c946b28871017ce8726f8a69203f05953 +size 2143989928