tokenizer_config.json
4.7 KB · 216 lines · json Raw
1 {
2 "added_tokens_decoder": {
3 "163584": {
4 "content": "[BOS]",
5 "lstrip": false,
6 "normalized": false,
7 "rstrip": false,
8 "single_word": false,
9 "special": true
10 },
11 "163585": {
12 "content": "[EOS]",
13 "lstrip": false,
14 "normalized": false,
15 "rstrip": false,
16 "single_word": false,
17 "special": true
18 },
19 "163586": {
20 "content": "<|im_end|>",
21 "lstrip": false,
22 "normalized": false,
23 "rstrip": false,
24 "single_word": false,
25 "special": true
26 },
27 "163587": {
28 "content": "<|im_user|>",
29 "lstrip": false,
30 "normalized": false,
31 "rstrip": false,
32 "single_word": false,
33 "special": true
34 },
35 "163588": {
36 "content": "<|im_assistant|>",
37 "lstrip": false,
38 "normalized": false,
39 "rstrip": false,
40 "single_word": false,
41 "special": true
42 },
43 "163590": {
44 "content": "<|start_header_id|>",
45 "lstrip": false,
46 "normalized": false,
47 "rstrip": false,
48 "single_word": false,
49 "special": true
50 },
51 "163591": {
52 "content": "<|end_header_id|>",
53 "lstrip": false,
54 "normalized": false,
55 "rstrip": false,
56 "single_word": false,
57 "special": true
58 },
59 "163593": {
60 "content": "[EOT]",
61 "lstrip": false,
62 "normalized": false,
63 "rstrip": false,
64 "single_word": false,
65 "special": true
66 },
67 "163594": {
68 "content": "<|im_system|>",
69 "lstrip": false,
70 "normalized": false,
71 "rstrip": false,
72 "single_word": false,
73 "special": true
74 },
75 "163595": {
76 "content": "<|tool_calls_section_begin|>",
77 "lstrip": false,
78 "normalized": false,
79 "rstrip": false,
80 "single_word": false,
81 "special": false
82 },
83 "163596": {
84 "content": "<|tool_calls_section_end|>",
85 "lstrip": false,
86 "normalized": false,
87 "rstrip": false,
88 "single_word": false,
89 "special": false
90 },
91 "163597": {
92 "content": "<|tool_call_begin|>",
93 "lstrip": false,
94 "normalized": false,
95 "rstrip": false,
96 "single_word": false,
97 "special": false
98 },
99 "163598": {
100 "content": "<|tool_call_argument_begin|>",
101 "lstrip": false,
102 "normalized": false,
103 "rstrip": false,
104 "single_word": false,
105 "special": false
106 },
107 "163599": {
108 "content": "<|tool_call_end|>",
109 "lstrip": false,
110 "normalized": false,
111 "rstrip": false,
112 "single_word": false,
113 "special": false
114 },
115 "163601": {
116 "content": "<|im_middle|>",
117 "lstrip": false,
118 "normalized": false,
119 "rstrip": false,
120 "single_word": false,
121 "special": true
122 },
123 "163602": {
124 "content": "<|media_begin|>",
125 "lstrip": false,
126 "normalized": false,
127 "rstrip": false,
128 "single_word": false,
129 "special": true
130 },
131 "163603": {
132 "content": "<|media_content|>",
133 "lstrip": false,
134 "normalized": false,
135 "rstrip": false,
136 "single_word": false,
137 "special": true
138 },
139 "163604": {
140 "content": "<|media_end|>",
141 "lstrip": false,
142 "normalized": false,
143 "rstrip": false,
144 "single_word": false,
145 "special": true
146 },
147 "163605": {
148 "content": "<|media_pad|>",
149 "lstrip": false,
150 "normalized": false,
151 "rstrip": false,
152 "single_word": false,
153 "special": true
154 },
155 "163606": {
156 "content": "<think>",
157 "lstrip": false,
158 "normalized": false,
159 "rstrip": false,
160 "single_word": false,
161 "special": false
162 },
163 "163607": {
164 "content": "</think>",
165 "lstrip": false,
166 "normalized": false,
167 "rstrip": false,
168 "single_word": false,
169 "special": false
170 },
171 "163838": {
172 "content": "[UNK]",
173 "lstrip": false,
174 "normalized": false,
175 "rstrip": false,
176 "single_word": false,
177 "special": true
178 },
179 "163839": {
180 "content": "[PAD]",
181 "lstrip": false,
182 "normalized": false,
183 "rstrip": false,
184 "single_word": false,
185 "special": true
186 }
187 },
188 "additional_special_tokens": [
189 "<|im_end|>",
190 "<|im_user|>",
191 "<|im_assistant|>",
192 "<|start_header_id|>",
193 "<|end_header_id|>",
194 "[EOT]",
195 "<|im_system|>",
196 "<|im_middle|>",
197 "<|media_begin|>",
198 "<|media_content|>",
199 "<|media_end|>",
200 "<|media_pad|>"
201 ],
202 "bos_token": "[BOS]",
203 "clean_up_tokenization_spaces": false,
204 "eos_token": "[EOS]",
205 "extra_special_tokens": {},
206 "model_max_length": 1000000000000000019884624838656,
207 "pad_token": "[PAD]",
208 "tokenizer_class": "TikTokenTokenizer",
209 "unk_token": "[UNK]",
210 "auto_map": {
211 "AutoTokenizer": [
212 "tokenization_kimi.TikTokenTokenizer",
213 null
214 ]
215 }
216 }