tokenizer_config.json
5.6 KB · 208 lines · json Raw
1 {
2 "add_prefix_space": false,
3 "added_tokens_decoder": {
4 "151643": {
5 "content": "<|endoftext|>",
6 "lstrip": false,
7 "normalized": false,
8 "rstrip": false,
9 "single_word": false,
10 "special": true
11 },
12 "151644": {
13 "content": "<|im_start|>",
14 "lstrip": false,
15 "normalized": false,
16 "rstrip": false,
17 "single_word": false,
18 "special": true
19 },
20 "151645": {
21 "content": "<|im_end|>",
22 "lstrip": false,
23 "normalized": false,
24 "rstrip": false,
25 "single_word": false,
26 "special": true
27 },
28 "151646": {
29 "content": "<|object_ref_start|>",
30 "lstrip": false,
31 "normalized": false,
32 "rstrip": false,
33 "single_word": false,
34 "special": true
35 },
36 "151647": {
37 "content": "<|object_ref_end|>",
38 "lstrip": false,
39 "normalized": false,
40 "rstrip": false,
41 "single_word": false,
42 "special": true
43 },
44 "151648": {
45 "content": "<|box_start|>",
46 "lstrip": false,
47 "normalized": false,
48 "rstrip": false,
49 "single_word": false,
50 "special": true
51 },
52 "151649": {
53 "content": "<|box_end|>",
54 "lstrip": false,
55 "normalized": false,
56 "rstrip": false,
57 "single_word": false,
58 "special": true
59 },
60 "151650": {
61 "content": "<|quad_start|>",
62 "lstrip": false,
63 "normalized": false,
64 "rstrip": false,
65 "single_word": false,
66 "special": true
67 },
68 "151651": {
69 "content": "<|quad_end|>",
70 "lstrip": false,
71 "normalized": false,
72 "rstrip": false,
73 "single_word": false,
74 "special": true
75 },
76 "151652": {
77 "content": "<|vision_start|>",
78 "lstrip": false,
79 "normalized": false,
80 "rstrip": false,
81 "single_word": false,
82 "special": true
83 },
84 "151653": {
85 "content": "<|vision_end|>",
86 "lstrip": false,
87 "normalized": false,
88 "rstrip": false,
89 "single_word": false,
90 "special": true
91 },
92 "151654": {
93 "content": "<|vision_pad|>",
94 "lstrip": false,
95 "normalized": false,
96 "rstrip": false,
97 "single_word": false,
98 "special": true
99 },
100 "151655": {
101 "content": "<|image_pad|>",
102 "lstrip": false,
103 "normalized": false,
104 "rstrip": false,
105 "single_word": false,
106 "special": true
107 },
108 "151656": {
109 "content": "<|video_pad|>",
110 "lstrip": false,
111 "normalized": false,
112 "rstrip": false,
113 "single_word": false,
114 "special": true
115 },
116 "151657": {
117 "content": "<tool_call>",
118 "lstrip": false,
119 "normalized": false,
120 "rstrip": false,
121 "single_word": false,
122 "special": false
123 },
124 "151658": {
125 "content": "</tool_call>",
126 "lstrip": false,
127 "normalized": false,
128 "rstrip": false,
129 "single_word": false,
130 "special": false
131 },
132 "151659": {
133 "content": "<|fim_prefix|>",
134 "lstrip": false,
135 "normalized": false,
136 "rstrip": false,
137 "single_word": false,
138 "special": false
139 },
140 "151660": {
141 "content": "<|fim_middle|>",
142 "lstrip": false,
143 "normalized": false,
144 "rstrip": false,
145 "single_word": false,
146 "special": false
147 },
148 "151661": {
149 "content": "<|fim_suffix|>",
150 "lstrip": false,
151 "normalized": false,
152 "rstrip": false,
153 "single_word": false,
154 "special": false
155 },
156 "151662": {
157 "content": "<|fim_pad|>",
158 "lstrip": false,
159 "normalized": false,
160 "rstrip": false,
161 "single_word": false,
162 "special": false
163 },
164 "151663": {
165 "content": "<|repo_name|>",
166 "lstrip": false,
167 "normalized": false,
168 "rstrip": false,
169 "single_word": false,
170 "special": false
171 },
172 "151664": {
173 "content": "<|file_sep|>",
174 "lstrip": false,
175 "normalized": false,
176 "rstrip": false,
177 "single_word": false,
178 "special": false
179 }
180 },
181 "additional_special_tokens": [
182 "<|im_start|>",
183 "<|im_end|>",
184 "<|object_ref_start|>",
185 "<|object_ref_end|>",
186 "<|box_start|>",
187 "<|box_end|>",
188 "<|quad_start|>",
189 "<|quad_end|>",
190 "<|vision_start|>",
191 "<|vision_end|>",
192 "<|vision_pad|>",
193 "<|image_pad|>",
194 "<|video_pad|>"
195 ],
196 "bos_token": null,
197 "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
198 "clean_up_tokenization_spaces": false,
199 "eos_token": "<|im_end|>",
200 "errors": "replace",
201 "model_max_length": 131072,
202 "pad_token": "<|endoftext|>",
203 "split_special_tokens": false,
204 "tokenizer_class": "Qwen2Tokenizer",
205 "unk_token": null,
206 "add_bos_token": false
207 }
208