tokenizer_config.json
12.7 KB · 239 lines · json Raw
1 {
2 "add_prefix_space": false,
3 "added_tokens_decoder": {
4 "151643": {
5 "content": "<|endoftext|>",
6 "lstrip": false,
7 "normalized": false,
8 "rstrip": false,
9 "single_word": false,
10 "special": true
11 },
12 "151644": {
13 "content": "<|im_start|>",
14 "lstrip": false,
15 "normalized": false,
16 "rstrip": false,
17 "single_word": false,
18 "special": true
19 },
20 "151645": {
21 "content": "<|im_end|>",
22 "lstrip": false,
23 "normalized": false,
24 "rstrip": false,
25 "single_word": false,
26 "special": true
27 },
28 "151646": {
29 "content": "<|object_ref_start|>",
30 "lstrip": false,
31 "normalized": false,
32 "rstrip": false,
33 "single_word": false,
34 "special": true
35 },
36 "151647": {
37 "content": "<|object_ref_end|>",
38 "lstrip": false,
39 "normalized": false,
40 "rstrip": false,
41 "single_word": false,
42 "special": true
43 },
44 "151648": {
45 "content": "<|box_start|>",
46 "lstrip": false,
47 "normalized": false,
48 "rstrip": false,
49 "single_word": false,
50 "special": true
51 },
52 "151649": {
53 "content": "<|box_end|>",
54 "lstrip": false,
55 "normalized": false,
56 "rstrip": false,
57 "single_word": false,
58 "special": true
59 },
60 "151650": {
61 "content": "<|quad_start|>",
62 "lstrip": false,
63 "normalized": false,
64 "rstrip": false,
65 "single_word": false,
66 "special": true
67 },
68 "151651": {
69 "content": "<|quad_end|>",
70 "lstrip": false,
71 "normalized": false,
72 "rstrip": false,
73 "single_word": false,
74 "special": true
75 },
76 "151652": {
77 "content": "<|vision_start|>",
78 "lstrip": false,
79 "normalized": false,
80 "rstrip": false,
81 "single_word": false,
82 "special": true
83 },
84 "151653": {
85 "content": "<|vision_end|>",
86 "lstrip": false,
87 "normalized": false,
88 "rstrip": false,
89 "single_word": false,
90 "special": true
91 },
92 "151654": {
93 "content": "<|vision_pad|>",
94 "lstrip": false,
95 "normalized": false,
96 "rstrip": false,
97 "single_word": false,
98 "special": true
99 },
100 "151655": {
101 "content": "<|image_pad|>",
102 "lstrip": false,
103 "normalized": false,
104 "rstrip": false,
105 "single_word": false,
106 "special": true
107 },
108 "151656": {
109 "content": "<|video_pad|>",
110 "lstrip": false,
111 "normalized": false,
112 "rstrip": false,
113 "single_word": false,
114 "special": true
115 },
116 "151657": {
117 "content": "<tool_call>",
118 "lstrip": false,
119 "normalized": false,
120 "rstrip": false,
121 "single_word": false,
122 "special": false
123 },
124 "151658": {
125 "content": "</tool_call>",
126 "lstrip": false,
127 "normalized": false,
128 "rstrip": false,
129 "single_word": false,
130 "special": false
131 },
132 "151659": {
133 "content": "<|fim_prefix|>",
134 "lstrip": false,
135 "normalized": false,
136 "rstrip": false,
137 "single_word": false,
138 "special": false
139 },
140 "151660": {
141 "content": "<|fim_middle|>",
142 "lstrip": false,
143 "normalized": false,
144 "rstrip": false,
145 "single_word": false,
146 "special": false
147 },
148 "151661": {
149 "content": "<|fim_suffix|>",
150 "lstrip": false,
151 "normalized": false,
152 "rstrip": false,
153 "single_word": false,
154 "special": false
155 },
156 "151662": {
157 "content": "<|fim_pad|>",
158 "lstrip": false,
159 "normalized": false,
160 "rstrip": false,
161 "single_word": false,
162 "special": false
163 },
164 "151663": {
165 "content": "<|repo_name|>",
166 "lstrip": false,
167 "normalized": false,
168 "rstrip": false,
169 "single_word": false,
170 "special": false
171 },
172 "151664": {
173 "content": "<|file_sep|>",
174 "lstrip": false,
175 "normalized": false,
176 "rstrip": false,
177 "single_word": false,
178 "special": false
179 },
180 "151665": {
181 "content": "<tool_response>",
182 "lstrip": false,
183 "normalized": false,
184 "rstrip": false,
185 "single_word": false,
186 "special": false
187 },
188 "151666": {
189 "content": "</tool_response>",
190 "lstrip": false,
191 "normalized": false,
192 "rstrip": false,
193 "single_word": false,
194 "special": false
195 },
196 "151667": {
197 "content": "<think>",
198 "lstrip": false,
199 "normalized": false,
200 "rstrip": false,
201 "single_word": false,
202 "special": false
203 },
204 "151668": {
205 "content": "</think>",
206 "lstrip": false,
207 "normalized": false,
208 "rstrip": false,
209 "single_word": false,
210 "special": false
211 }
212 },
213 "additional_special_tokens": [
214 "<|im_start|>",
215 "<|im_end|>",
216 "<|object_ref_start|>",
217 "<|object_ref_end|>",
218 "<|box_start|>",
219 "<|box_end|>",
220 "<|quad_start|>",
221 "<|quad_end|>",
222 "<|vision_start|>",
223 "<|vision_end|>",
224 "<|vision_pad|>",
225 "<|image_pad|>",
226 "<|video_pad|>"
227 ],
228 "bos_token": null,
229 "chat_template": "{% macro render_extra_keys(json_dict, handled_keys) %}\n {%- if json_dict is mapping %}\n {%- for json_key in json_dict if json_key not in handled_keys %}\n {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}\n {{- '\\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}\n {%- else %}\n {{-'\\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n{% endmacro %}\n\n{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{%- if not tools is defined %}\n {%- set tools = [] %}\n{%- endif %}\n\n{%- if system_message is defined %}\n {{- \"<|im_start|>system\\n\" + system_message }}\n{%- else %}\n {%- if tools is iterable and tools | length > 0 %}\n {{- \"<|im_start|>system\\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks.\" }}\n {%- endif %}\n{%- endif %}\n{%- if tools is iterable and tools | length > 0 %}\n {{- \"\\n\\n# Tools\\n\\nYou have access to the following functions:\\n\\n\" }}\n {{- \"<tools>\" }}\n {%- for tool in tools %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {{- \"\\n<function>\\n<name>\" ~ tool.name ~ \"</name>\" }}\n {%- if tool.description is defined %}\n {{- '\\n<description>' ~ (tool.description | trim) ~ '</description>' }}\n {%- endif %}\n {{- '\\n<parameters>' }}\n {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {{- '\\n<parameter>' }}\n {{- '\\n<name>' ~ param_name ~ '</name>' }}\n {%- if param_fields.type is defined %}\n {{- '\\n<type>' ~ (param_fields.type | string) ~ '</type>' }}\n {%- endif %}\n {%- if param_fields.description is defined %}\n {{- '\\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}\n {%- endif %}\n {%- set handled_keys = ['name', 'type', 'description'] %}\n {{- render_extra_keys(param_fields, handled_keys) }}\n {{- '\\n</parameter>' }}\n {%- endfor %}\n {%- endif %}\n {% set handled_keys = ['type', 'properties'] %}\n {{- render_extra_keys(tool.parameters, handled_keys) }}\n {{- '\\n</parameters>' }}\n {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}\n {{- render_extra_keys(tool, handled_keys) }}\n {{- '\\n</function>' }}\n {%- endfor %}\n {{- \"\\n</tools>\" }}\n {{- '\\n\\nIf you choose to call a function ONLY reply in the following format with NO suffix:\\n\\n<tool_call>\\n<function=example_function_name>\\n<parameter=example_parameter_1>\\nvalue_1\\n</parameter>\\n<parameter=example_parameter_2>\\nThis is the value for the second parameter\\nthat can span\\nmultiple lines\\n</parameter>\\n</function>\\n</tool_call>\\n\\n<IMPORTANT>\\nReminder:\\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\\n- Required parameters MUST be specified\\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\\n</IMPORTANT>' }}\n{%- endif %}\n{%- if system_message is defined %}\n {{- '<|im_end|>\\n' }}\n{%- else %}\n {%- if tools is iterable and tools | length > 0 %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in loop_messages %}\n {%- if message.role == \"assistant\" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}\n {{- '\\n' + message.content | trim + '\\n' }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n<function=' + tool_call.name + '>\\n' }}\n {%- if tool_call.arguments is defined %}\n {%- for args_name, args_value in tool_call.arguments|items %}\n {{- '<parameter=' + args_name + '>\\n' }}\n {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}\n {{- args_value }}\n {{- '\\n</parameter>\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '</function>\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"user\" or message.role == \"system\" or message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.previtem and loop.previtem.role != \"tool\" %}\n {{- '<|im_start|>user\\n' }}\n {%- endif %}\n {{- '<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>\\n' }}\n {%- if not loop.last and loop.nextitem.role != \"tool\" %}\n {{- '<|im_end|>\\n' }}\n {%- elif loop.last %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
230 "clean_up_tokenization_spaces": false,
231 "eos_token": "<|im_end|>",
232 "errors": "replace",
233 "model_max_length": 1048576,
234 "pad_token": "<|endoftext|>",
235 "split_special_tokens": false,
236 "tokenizer_class": "Qwen2Tokenizer",
237 "unk_token": null,
238 "add_bos_token": false
239 }