tokenizer_config.json
9.3 KB · 413 lines · json Raw
1 {
2 "add_bos_token": true,
3 "add_eos_token": false,
4 "added_tokens_decoder": {
5 "0": {
6 "content": "<unk>",
7 "lstrip": false,
8 "normalized": false,
9 "rstrip": false,
10 "single_word": false,
11 "special": true
12 },
13 "1": {
14 "content": "<s>",
15 "lstrip": false,
16 "normalized": false,
17 "rstrip": false,
18 "single_word": false,
19 "special": true
20 },
21 "2": {
22 "content": "</s>",
23 "lstrip": false,
24 "normalized": false,
25 "rstrip": true,
26 "single_word": false,
27 "special": false
28 },
29 "32000": {
30 "content": "<|endoftext|>",
31 "lstrip": false,
32 "normalized": false,
33 "rstrip": false,
34 "single_word": false,
35 "special": true
36 },
37 "32001": {
38 "content": "<|assistant|>",
39 "lstrip": false,
40 "normalized": false,
41 "rstrip": true,
42 "single_word": false,
43 "special": true
44 },
45 "32002": {
46 "content": "<|placeholder1|>",
47 "lstrip": false,
48 "normalized": false,
49 "rstrip": true,
50 "single_word": false,
51 "special": true
52 },
53 "32003": {
54 "content": "<|placeholder2|>",
55 "lstrip": false,
56 "normalized": false,
57 "rstrip": true,
58 "single_word": false,
59 "special": true
60 },
61 "32004": {
62 "content": "<|placeholder3|>",
63 "lstrip": false,
64 "normalized": false,
65 "rstrip": true,
66 "single_word": false,
67 "special": true
68 },
69 "32005": {
70 "content": "<|placeholder4|>",
71 "lstrip": false,
72 "normalized": false,
73 "rstrip": true,
74 "single_word": false,
75 "special": true
76 },
77 "32006": {
78 "content": "<|system|>",
79 "lstrip": false,
80 "normalized": false,
81 "rstrip": false,
82 "single_word": false,
83 "special": true
84 },
85 "32007": {
86 "content": "<|end|>",
87 "lstrip": false,
88 "normalized": false,
89 "rstrip": false,
90 "single_word": false,
91 "special": true
92 },
93 "32008": {
94 "content": "<|placeholder5|>",
95 "lstrip": false,
96 "normalized": false,
97 "rstrip": true,
98 "single_word": false,
99 "special": true
100 },
101 "32009": {
102 "content": "<|placeholder6|>",
103 "lstrip": false,
104 "normalized": false,
105 "rstrip": true,
106 "single_word": false,
107 "special": true
108 },
109 "32010": {
110 "content": "<|user|>",
111 "lstrip": false,
112 "normalized": false,
113 "rstrip": false,
114 "single_word": false,
115 "special": true
116 },
117 "32011": {
118 "content": "<|placeholder7|>",
119 "lstrip": false,
120 "normalized": false,
121 "rstrip": true,
122 "single_word": false,
123 "special": true
124 },
125 "32012": {
126 "content": "<|placeholder8|>",
127 "lstrip": false,
128 "normalized": false,
129 "rstrip": true,
130 "single_word": false,
131 "special": true
132 },
133 "32013": {
134 "content": "<|placeholder9|>",
135 "lstrip": false,
136 "normalized": false,
137 "rstrip": true,
138 "single_word": false,
139 "special": true
140 },
141 "32014": {
142 "content": "<|placeholder10|>",
143 "lstrip": false,
144 "normalized": false,
145 "rstrip": true,
146 "single_word": false,
147 "special": true
148 },
149 "32015": {
150 "content": "<|placeholder11|>",
151 "lstrip": false,
152 "normalized": false,
153 "rstrip": true,
154 "single_word": false,
155 "special": true
156 },
157 "32016": {
158 "content": "<|placeholder12|>",
159 "lstrip": false,
160 "normalized": false,
161 "rstrip": true,
162 "single_word": false,
163 "special": true
164 },
165 "32017": {
166 "content": "<|placeholder13|>",
167 "lstrip": false,
168 "normalized": false,
169 "rstrip": true,
170 "single_word": false,
171 "special": true
172 },
173 "32018": {
174 "content": "<|placeholder14|>",
175 "lstrip": false,
176 "normalized": false,
177 "rstrip": true,
178 "single_word": false,
179 "special": true
180 },
181 "32019": {
182 "content": "<|placeholder15|>",
183 "lstrip": false,
184 "normalized": false,
185 "rstrip": true,
186 "single_word": false,
187 "special": true
188 },
189 "32020": {
190 "content": "<|placeholder16|>",
191 "lstrip": false,
192 "normalized": false,
193 "rstrip": true,
194 "single_word": false,
195 "special": true
196 },
197 "32021": {
198 "content": "<|placeholder17|>",
199 "lstrip": false,
200 "normalized": false,
201 "rstrip": true,
202 "single_word": false,
203 "special": true
204 },
205 "32022": {
206 "content": "<|placeholder18|>",
207 "lstrip": false,
208 "normalized": false,
209 "rstrip": true,
210 "single_word": false,
211 "special": true
212 },
213 "32023": {
214 "content": "<|placeholder19|>",
215 "lstrip": false,
216 "normalized": false,
217 "rstrip": true,
218 "single_word": false,
219 "special": true
220 },
221 "32024": {
222 "content": "<|placeholder20|>",
223 "lstrip": false,
224 "normalized": false,
225 "rstrip": true,
226 "single_word": false,
227 "special": true
228 },
229 "32025": {
230 "content": "<|placeholder21|>",
231 "lstrip": false,
232 "normalized": false,
233 "rstrip": true,
234 "single_word": false,
235 "special": true
236 },
237 "32026": {
238 "content": "<|placeholder22|>",
239 "lstrip": false,
240 "normalized": false,
241 "rstrip": true,
242 "single_word": false,
243 "special": true
244 },
245 "32027": {
246 "content": "<|placeholder23|>",
247 "lstrip": false,
248 "normalized": false,
249 "rstrip": true,
250 "single_word": false,
251 "special": true
252 },
253 "32028": {
254 "content": "<|placeholder24|>",
255 "lstrip": false,
256 "normalized": false,
257 "rstrip": true,
258 "single_word": false,
259 "special": true
260 },
261 "32029": {
262 "content": "<|placeholder25|>",
263 "lstrip": false,
264 "normalized": false,
265 "rstrip": true,
266 "single_word": false,
267 "special": true
268 },
269 "32030": {
270 "content": "<|placeholder26|>",
271 "lstrip": false,
272 "normalized": false,
273 "rstrip": true,
274 "single_word": false,
275 "special": true
276 },
277 "32031": {
278 "content": "<|placeholder27|>",
279 "lstrip": false,
280 "normalized": false,
281 "rstrip": true,
282 "single_word": false,
283 "special": true
284 },
285 "32032": {
286 "content": "<|placeholder28|>",
287 "lstrip": false,
288 "normalized": false,
289 "rstrip": true,
290 "single_word": false,
291 "special": true
292 },
293 "32033": {
294 "content": "<|placeholder29|>",
295 "lstrip": false,
296 "normalized": false,
297 "rstrip": true,
298 "single_word": false,
299 "special": true
300 },
301 "32034": {
302 "content": "<|placeholder30|>",
303 "lstrip": false,
304 "normalized": false,
305 "rstrip": true,
306 "single_word": false,
307 "special": true
308 },
309 "32035": {
310 "content": "<|placeholder31|>",
311 "lstrip": false,
312 "normalized": false,
313 "rstrip": true,
314 "single_word": false,
315 "special": true
316 },
317 "32036": {
318 "content": "<|placeholder32|>",
319 "lstrip": false,
320 "normalized": false,
321 "rstrip": true,
322 "single_word": false,
323 "special": true
324 },
325 "32037": {
326 "content": "<|placeholder33|>",
327 "lstrip": false,
328 "normalized": false,
329 "rstrip": true,
330 "single_word": false,
331 "special": true
332 },
333 "32038": {
334 "content": "<|placeholder34|>",
335 "lstrip": false,
336 "normalized": false,
337 "rstrip": true,
338 "single_word": false,
339 "special": true
340 },
341 "32039": {
342 "content": "<|placeholder35|>",
343 "lstrip": false,
344 "normalized": false,
345 "rstrip": true,
346 "single_word": false,
347 "special": true
348 },
349 "32040": {
350 "content": "<|placeholder36|>",
351 "lstrip": false,
352 "normalized": false,
353 "rstrip": true,
354 "single_word": false,
355 "special": true
356 },
357 "32041": {
358 "content": "<|placeholder37|>",
359 "lstrip": false,
360 "normalized": false,
361 "rstrip": true,
362 "single_word": false,
363 "special": true
364 },
365 "32042": {
366 "content": "<|placeholder38|>",
367 "lstrip": false,
368 "normalized": false,
369 "rstrip": true,
370 "single_word": false,
371 "special": true
372 },
373 "32043": {
374 "content": "<|placeholder39|>",
375 "lstrip": false,
376 "normalized": false,
377 "rstrip": true,
378 "single_word": false,
379 "special": true
380 },
381 "32044": {
382 "content": "<|image|>",
383 "lstrip": false,
384 "normalized": false,
385 "rstrip": true,
386 "single_word": false,
387 "special": true
388 }
389 },
390 "additional_special_tokens": [
391 "<|system|>",
392 "<|end|>",
393 "<|user|>",
394 "<|end|>"
395 ],
396 "auto_map": {
397 "AutoProcessor": "processing_phi3_v.Phi3VProcessor"
398 },
399 "bos_token": "<s>",
400 "chat_template": "{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{- '<|assistant|>\n' -}}{% endif %}",
401 "clean_up_tokenization_spaces": false,
402 "eos_token": "<|endoftext|>",
403 "legacy": false,
404 "model_max_length": 131072,
405 "pad_token": "<|endoftext|>",
406 "padding_side": "right",
407 "processor_class": "Phi3VProcessor",
408 "sp_model_kwargs": {},
409 "tokenizer_class": "LlamaTokenizer",
410 "unk_token": "<unk>",
411 "use_default_system_prompt": false
412 }
413