tokenizer_config.json
7.5 KB · 342 lines · json Raw
1 {
2 "added_tokens_decoder": {
3 "151329": {
4 "content": "<|endoftext|>",
5 "lstrip": false,
6 "normalized": false,
7 "rstrip": false,
8 "single_word": false,
9 "special": true
10 },
11 "151330": {
12 "content": "[MASK]",
13 "lstrip": false,
14 "normalized": false,
15 "rstrip": false,
16 "single_word": false,
17 "special": true
18 },
19 "151331": {
20 "content": "[gMASK]",
21 "lstrip": false,
22 "normalized": false,
23 "rstrip": false,
24 "single_word": false,
25 "special": true
26 },
27 "151332": {
28 "content": "[sMASK]",
29 "lstrip": false,
30 "normalized": false,
31 "rstrip": false,
32 "single_word": false,
33 "special": true
34 },
35 "151333": {
36 "content": "<|im_start|>",
37 "lstrip": false,
38 "normalized": false,
39 "rstrip": false,
40 "single_word": false,
41 "special": true
42 },
43 "151334": {
44 "content": "<|im_end|>",
45 "lstrip": false,
46 "normalized": false,
47 "rstrip": false,
48 "single_word": false,
49 "special": true
50 },
51 "151335": {
52 "content": "<|system|>",
53 "lstrip": false,
54 "normalized": false,
55 "rstrip": false,
56 "single_word": false,
57 "special": true
58 },
59 "151336": {
60 "content": "<|user|>",
61 "lstrip": false,
62 "normalized": false,
63 "rstrip": false,
64 "single_word": false,
65 "special": true
66 },
67 "151337": {
68 "content": "<|assistant|>",
69 "lstrip": false,
70 "normalized": false,
71 "rstrip": false,
72 "single_word": false,
73 "special": true
74 },
75 "151338": {
76 "content": "<|observation|>",
77 "lstrip": false,
78 "normalized": false,
79 "rstrip": false,
80 "single_word": false,
81 "special": true
82 },
83 "151339": {
84 "content": "<|begin_of_image|>",
85 "lstrip": false,
86 "normalized": false,
87 "rstrip": false,
88 "single_word": false,
89 "special": true
90 },
91 "151340": {
92 "content": "<|end_of_image|>",
93 "lstrip": false,
94 "normalized": false,
95 "rstrip": false,
96 "single_word": false,
97 "special": true
98 },
99 "151341": {
100 "content": "<|begin_of_video|>",
101 "lstrip": false,
102 "normalized": false,
103 "rstrip": false,
104 "single_word": false,
105 "special": true
106 },
107 "151342": {
108 "content": "<|end_of_video|>",
109 "lstrip": false,
110 "normalized": false,
111 "rstrip": false,
112 "single_word": false,
113 "special": true
114 },
115 "151343": {
116 "content": "<|begin_of_audio|>",
117 "lstrip": false,
118 "normalized": false,
119 "rstrip": false,
120 "single_word": false,
121 "special": true
122 },
123 "151344": {
124 "content": "<|end_of_audio|>",
125 "lstrip": false,
126 "normalized": false,
127 "rstrip": false,
128 "single_word": false,
129 "special": true
130 },
131 "151345": {
132 "content": "<|begin_of_transcription|>",
133 "lstrip": false,
134 "normalized": false,
135 "rstrip": false,
136 "single_word": false,
137 "special": true
138 },
139 "151346": {
140 "content": "<|end_of_transcription|>",
141 "lstrip": false,
142 "normalized": false,
143 "rstrip": false,
144 "single_word": false,
145 "special": true
146 },
147 "151347": {
148 "content": "<|code_prefix|>",
149 "lstrip": false,
150 "normalized": false,
151 "rstrip": false,
152 "single_word": false,
153 "special": true
154 },
155 "151348": {
156 "content": "<|code_middle|>",
157 "lstrip": false,
158 "normalized": false,
159 "rstrip": false,
160 "single_word": false,
161 "special": true
162 },
163 "151349": {
164 "content": "<|code_suffix|>",
165 "lstrip": false,
166 "normalized": false,
167 "rstrip": false,
168 "single_word": false,
169 "special": true
170 },
171 "151350": {
172 "content": "<think>",
173 "lstrip": false,
174 "normalized": false,
175 "rstrip": false,
176 "single_word": false,
177 "special": false
178 },
179 "151351": {
180 "content": "</think>",
181 "lstrip": false,
182 "normalized": false,
183 "rstrip": false,
184 "single_word": false,
185 "special": false
186 },
187 "151352": {
188 "content": "<tool_call>",
189 "lstrip": false,
190 "normalized": false,
191 "rstrip": false,
192 "single_word": false,
193 "special": false
194 },
195 "151353": {
196 "content": "</tool_call>",
197 "lstrip": false,
198 "normalized": false,
199 "rstrip": false,
200 "single_word": false,
201 "special": false
202 },
203 "151354": {
204 "content": "<tool_response>",
205 "lstrip": false,
206 "normalized": false,
207 "rstrip": false,
208 "single_word": false,
209 "special": false
210 },
211 "151355": {
212 "content": "</tool_response>",
213 "lstrip": false,
214 "normalized": false,
215 "rstrip": false,
216 "single_word": false,
217 "special": false
218 },
219 "151356": {
220 "content": "<arg_key>",
221 "lstrip": false,
222 "normalized": false,
223 "rstrip": false,
224 "single_word": false,
225 "special": false
226 },
227 "151357": {
228 "content": "</arg_key>",
229 "lstrip": false,
230 "normalized": false,
231 "rstrip": false,
232 "single_word": false,
233 "special": false
234 },
235 "151358": {
236 "content": "<arg_value>",
237 "lstrip": false,
238 "normalized": false,
239 "rstrip": false,
240 "single_word": false,
241 "special": false
242 },
243 "151359": {
244 "content": "</arg_value>",
245 "lstrip": false,
246 "normalized": false,
247 "rstrip": false,
248 "single_word": false,
249 "special": false
250 },
251 "151360": {
252 "content": "/nothink",
253 "lstrip": false,
254 "normalized": false,
255 "rstrip": false,
256 "single_word": false,
257 "special": true
258 },
259 "151361": {
260 "content": "<|begin_of_box|>",
261 "lstrip": false,
262 "normalized": false,
263 "rstrip": false,
264 "single_word": false,
265 "special": false
266 },
267 "151362": {
268 "content": "<|end_of_box|>",
269 "lstrip": false,
270 "normalized": false,
271 "rstrip": false,
272 "single_word": false,
273 "special": false
274 },
275 "151363": {
276 "content": "<|image|>",
277 "lstrip": false,
278 "normalized": false,
279 "rstrip": false,
280 "single_word": false,
281 "special": false
282 },
283 "151364": {
284 "content": "<|video|>",
285 "lstrip": false,
286 "normalized": false,
287 "rstrip": false,
288 "single_word": false,
289 "special": false
290 },
291 "151365": {
292 "content": "<sop>",
293 "lstrip": false,
294 "normalized": false,
295 "rstrip": false,
296 "single_word": false,
297 "special": true
298 },
299 "151366": {
300 "content": "<eop>",
301 "lstrip": false,
302 "normalized": false,
303 "rstrip": false,
304 "single_word": false,
305 "special": true
306 }
307 },
308 "additional_special_tokens": [
309 "<|endoftext|>",
310 "[MASK]",
311 "[gMASK]",
312 "[sMASK]",
313 "<sop>",
314 "<eop>",
315 "<|system|>",
316 "<|user|>",
317 "<|assistant|>",
318 "<|observation|>",
319 "<|begin_of_image|>",
320 "<|end_of_image|>",
321 "<|begin_of_video|>",
322 "<|end_of_video|>",
323 "<|begin_of_audio|>",
324 "<|end_of_audio|>",
325 "<|begin_of_transcription|>",
326 "<|end_of_transcription|>",
327 "<|code_prefix|>",
328 "<|code_middle|>",
329 "<|code_suffix|>",
330 "/nothink"
331 ],
332 "clean_up_tokenization_spaces": false,
333 "do_lower_case": false,
334 "eos_token": "<|im_end|>",
335 "extra_special_tokens": {},
336 "model_max_length": 128000,
337 "pad_token": "<|endoftext|>",
338 "padding_side": "left",
339 "remove_space": false,
340 "tokenizer_class": "PreTrainedTokenizerFast"
341 }
342