tokenizer_config.json
| 1 | { |
| 2 | "add_prefix_space": false, |
| 3 | "added_tokens_decoder": { |
| 4 | "248044": { |
| 5 | "content": "<|endoftext|>", |
| 6 | "lstrip": false, |
| 7 | "normalized": false, |
| 8 | "rstrip": false, |
| 9 | "single_word": false, |
| 10 | "special": true |
| 11 | }, |
| 12 | "248045": { |
| 13 | "content": "<|im_start|>", |
| 14 | "lstrip": false, |
| 15 | "normalized": false, |
| 16 | "rstrip": false, |
| 17 | "single_word": false, |
| 18 | "special": true |
| 19 | }, |
| 20 | "248046": { |
| 21 | "content": "<|im_end|>", |
| 22 | "lstrip": false, |
| 23 | "normalized": false, |
| 24 | "rstrip": false, |
| 25 | "single_word": false, |
| 26 | "special": true |
| 27 | }, |
| 28 | "248047": { |
| 29 | "content": "<|object_ref_start|>", |
| 30 | "lstrip": false, |
| 31 | "normalized": false, |
| 32 | "rstrip": false, |
| 33 | "single_word": false, |
| 34 | "special": true |
| 35 | }, |
| 36 | "248048": { |
| 37 | "content": "<|object_ref_end|>", |
| 38 | "lstrip": false, |
| 39 | "normalized": false, |
| 40 | "rstrip": false, |
| 41 | "single_word": false, |
| 42 | "special": true |
| 43 | }, |
| 44 | "248049": { |
| 45 | "content": "<|box_start|>", |
| 46 | "lstrip": false, |
| 47 | "normalized": false, |
| 48 | "rstrip": false, |
| 49 | "single_word": false, |
| 50 | "special": true |
| 51 | }, |
| 52 | "248050": { |
| 53 | "content": "<|box_end|>", |
| 54 | "lstrip": false, |
| 55 | "normalized": false, |
| 56 | "rstrip": false, |
| 57 | "single_word": false, |
| 58 | "special": true |
| 59 | }, |
| 60 | "248051": { |
| 61 | "content": "<|quad_start|>", |
| 62 | "lstrip": false, |
| 63 | "normalized": false, |
| 64 | "rstrip": false, |
| 65 | "single_word": false, |
| 66 | "special": true |
| 67 | }, |
| 68 | "248052": { |
| 69 | "content": "<|quad_end|>", |
| 70 | "lstrip": false, |
| 71 | "normalized": false, |
| 72 | "rstrip": false, |
| 73 | "single_word": false, |
| 74 | "special": true |
| 75 | }, |
| 76 | "248053": { |
| 77 | "content": "<|vision_start|>", |
| 78 | "lstrip": false, |
| 79 | "normalized": false, |
| 80 | "rstrip": false, |
| 81 | "single_word": false, |
| 82 | "special": true |
| 83 | }, |
| 84 | "248054": { |
| 85 | "content": "<|vision_end|>", |
| 86 | "lstrip": false, |
| 87 | "normalized": false, |
| 88 | "rstrip": false, |
| 89 | "single_word": false, |
| 90 | "special": true |
| 91 | }, |
| 92 | "248055": { |
| 93 | "content": "<|vision_pad|>", |
| 94 | "lstrip": false, |
| 95 | "normalized": false, |
| 96 | "rstrip": false, |
| 97 | "single_word": false, |
| 98 | "special": true |
| 99 | }, |
| 100 | "248056": { |
| 101 | "content": "<|image_pad|>", |
| 102 | "lstrip": false, |
| 103 | "normalized": false, |
| 104 | "rstrip": false, |
| 105 | "single_word": false, |
| 106 | "special": true |
| 107 | }, |
| 108 | "248057": { |
| 109 | "content": "<|video_pad|>", |
| 110 | "lstrip": false, |
| 111 | "normalized": false, |
| 112 | "rstrip": false, |
| 113 | "single_word": false, |
| 114 | "special": true |
| 115 | }, |
| 116 | "248058": { |
| 117 | "content": "<tool_call>", |
| 118 | "lstrip": false, |
| 119 | "normalized": false, |
| 120 | "rstrip": false, |
| 121 | "single_word": false, |
| 122 | "special": false |
| 123 | }, |
| 124 | "248059": { |
| 125 | "content": "</tool_call>", |
| 126 | "lstrip": false, |
| 127 | "normalized": false, |
| 128 | "rstrip": false, |
| 129 | "single_word": false, |
| 130 | "special": false |
| 131 | }, |
| 132 | "248060": { |
| 133 | "content": "<|fim_prefix|>", |
| 134 | "lstrip": false, |
| 135 | "normalized": false, |
| 136 | "rstrip": false, |
| 137 | "single_word": false, |
| 138 | "special": false |
| 139 | }, |
| 140 | "248061": { |
| 141 | "content": "<|fim_middle|>", |
| 142 | "lstrip": false, |
| 143 | "normalized": false, |
| 144 | "rstrip": false, |
| 145 | "single_word": false, |
| 146 | "special": false |
| 147 | }, |
| 148 | "248062": { |
| 149 | "content": "<|fim_suffix|>", |
| 150 | "lstrip": false, |
| 151 | "normalized": false, |
| 152 | "rstrip": false, |
| 153 | "single_word": false, |
| 154 | "special": false |
| 155 | }, |
| 156 | "248063": { |
| 157 | "content": "<|fim_pad|>", |
| 158 | "lstrip": false, |
| 159 | "normalized": false, |
| 160 | "rstrip": false, |
| 161 | "single_word": false, |
| 162 | "special": false |
| 163 | }, |
| 164 | "248064": { |
| 165 | "content": "<|repo_name|>", |
| 166 | "lstrip": false, |
| 167 | "normalized": false, |
| 168 | "rstrip": false, |
| 169 | "single_word": false, |
| 170 | "special": false |
| 171 | }, |
| 172 | "248065": { |
| 173 | "content": "<|file_sep|>", |
| 174 | "lstrip": false, |
| 175 | "normalized": false, |
| 176 | "rstrip": false, |
| 177 | "single_word": false, |
| 178 | "special": false |
| 179 | }, |
| 180 | "248066": { |
| 181 | "content": "<tool_response>", |
| 182 | "lstrip": false, |
| 183 | "normalized": false, |
| 184 | "rstrip": false, |
| 185 | "single_word": false, |
| 186 | "special": false |
| 187 | }, |
| 188 | "248067": { |
| 189 | "content": "</tool_response>", |
| 190 | "lstrip": false, |
| 191 | "normalized": false, |
| 192 | "rstrip": false, |
| 193 | "single_word": false, |
| 194 | "special": false |
| 195 | }, |
| 196 | "248068": { |
| 197 | "content": "<think>", |
| 198 | "lstrip": false, |
| 199 | "normalized": false, |
| 200 | "rstrip": false, |
| 201 | "single_word": false, |
| 202 | "special": false |
| 203 | }, |
| 204 | "248069": { |
| 205 | "content": "</think>", |
| 206 | "lstrip": false, |
| 207 | "normalized": false, |
| 208 | "rstrip": false, |
| 209 | "single_word": false, |
| 210 | "special": false |
| 211 | }, |
| 212 | "248070": { |
| 213 | "content": "<|audio_start|>", |
| 214 | "lstrip": false, |
| 215 | "normalized": false, |
| 216 | "rstrip": false, |
| 217 | "single_word": false, |
| 218 | "special": true |
| 219 | }, |
| 220 | "248071": { |
| 221 | "content": "<|audio_end|>", |
| 222 | "lstrip": false, |
| 223 | "normalized": false, |
| 224 | "rstrip": false, |
| 225 | "single_word": false, |
| 226 | "special": true |
| 227 | }, |
| 228 | "248072": { |
| 229 | "content": "<tts_pad>", |
| 230 | "lstrip": false, |
| 231 | "normalized": false, |
| 232 | "rstrip": false, |
| 233 | "single_word": false, |
| 234 | "special": true |
| 235 | }, |
| 236 | "248073": { |
| 237 | "content": "<tts_text_bos>", |
| 238 | "lstrip": false, |
| 239 | "normalized": false, |
| 240 | "rstrip": false, |
| 241 | "single_word": false, |
| 242 | "special": true |
| 243 | }, |
| 244 | "248074": { |
| 245 | "content": "<tts_text_eod>", |
| 246 | "lstrip": false, |
| 247 | "normalized": false, |
| 248 | "rstrip": false, |
| 249 | "single_word": false, |
| 250 | "special": true |
| 251 | }, |
| 252 | "248075": { |
| 253 | "content": "<tts_text_bos_single>", |
| 254 | "lstrip": false, |
| 255 | "normalized": false, |
| 256 | "rstrip": false, |
| 257 | "single_word": false, |
| 258 | "special": true |
| 259 | }, |
| 260 | "248076": { |
| 261 | "content": "<|audio_pad|>", |
| 262 | "lstrip": false, |
| 263 | "normalized": false, |
| 264 | "rstrip": false, |
| 265 | "single_word": false, |
| 266 | "special": true |
| 267 | } |
| 268 | }, |
| 269 | "additional_special_tokens": [ |
| 270 | "<|im_start|>", |
| 271 | "<|im_end|>", |
| 272 | "<|object_ref_start|>", |
| 273 | "<|object_ref_end|>", |
| 274 | "<|box_start|>", |
| 275 | "<|box_end|>", |
| 276 | "<|quad_start|>", |
| 277 | "<|quad_end|>", |
| 278 | "<|vision_start|>", |
| 279 | "<|vision_end|>", |
| 280 | "<|vision_pad|>", |
| 281 | "<|image_pad|>", |
| 282 | "<|video_pad|>" |
| 283 | ], |
| 284 | "bos_token": null, |
| 285 | "chat_template": "{%- set image_count = namespace(value=0) %}\n{%- set video_count = namespace(value=0) %}\n{%- macro render_content(content, do_vision_count, is_system_content=false) %}\n {%- if content is string %}\n {{- content }}\n {%- elif content is iterable and content is not mapping %}\n {%- for item in content %}\n {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}\n {%- if is_system_content %}\n {{- raise_exception('System message cannot contain images.') }}\n {%- endif %}\n {%- if do_vision_count %}\n {%- set image_count.value = image_count.value + 1 %}\n {%- endif %}\n {%- if add_vision_id %}\n {{- 'Picture ' ~ image_count.value ~ ': ' }}\n {%- endif %}\n {{- '<|vision_start|><|image_pad|><|vision_end|>' }}\n {%- elif 'video' in item or item.type == 'video' %}\n {%- if is_system_content %}\n {{- raise_exception('System message cannot contain videos.') }}\n {%- endif %}\n {%- if do_vision_count %}\n {%- set video_count.value = video_count.value + 1 %}\n {%- endif %}\n {%- if add_vision_id %}\n {{- 'Video ' ~ video_count.value ~ ': ' }}\n {%- endif %}\n {{- '<|vision_start|><|video_pad|><|vision_end|>' }}\n {%- elif 'text' in item %}\n {{- item.text }}\n {%- else %}\n {{- raise_exception('Unexpected item type in content.') }}\n {%- endif %}\n {%- endfor %}\n {%- elif content is none or content is undefined %}\n {{- '' }}\n {%- else %}\n {{- raise_exception('Unexpected content type.') }}\n {%- endif %}\n{%- endmacro %}\n{%- if not messages %}\n {{- raise_exception('No messages provided.') }}\n{%- endif %}\n{%- if tools and tools is iterable and tools is not mapping %}\n {{- '<|im_start|>system\\n' }}\n {{- \"# Tools\\n\\nYou have access to the following functions:\\n\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\" }}\n {{- '\\n\\nIf you choose to call a function ONLY reply in the following format with NO suffix:\\n\\n<tool_call>\\n<function=example_function_name>\\n<parameter=example_parameter_1>\\nvalue_1\\n</parameter>\\n<parameter=example_parameter_2>\\nThis is the value for the second parameter\\nthat can span\\nmultiple lines\\n</parameter>\\n</function>\\n</tool_call>\\n\\n<IMPORTANT>\\nReminder:\\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\\n- Required parameters MUST be specified\\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\\n</IMPORTANT>' }}\n {%- if messages[0].role == 'system' %}\n {%- set content = render_content(messages[0].content, false, true)|trim %}\n {%- if content %}\n {{- '\\n\\n' + content }}\n {%- endif %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {%- set content = render_content(messages[0].content, false, true)|trim %}\n {{- '<|im_start|>system\\n' + content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" %}\n {%- set content = render_content(message.content, false)|trim %}\n {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if ns.multi_step_tool %}\n {{- raise_exception('No user query found in messages.') }}\n{%- endif %}\n{%- for message in messages %}\n {%- set content = render_content(message.content, true)|trim %}\n {%- if message.role == \"system\" %}\n {%- if not loop.first %}\n {{- raise_exception('System message must be at the beginning.') }}\n {%- endif %}\n {%- elif message.role == \"user\" %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- set reasoning_content = reasoning_content|trim %}\n {%- if (preserve_thinking is defined and preserve_thinking is true) or (loop.index0 > ns.last_query_index) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content + '\\n</think>\\n\\n' + content }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {%- if loop.first %}\n {%- if content|trim %}\n {{- '\\n\\n<tool_call>\\n<function=' + tool_call.name + '>\\n' }}\n {%- else %}\n {{- '<tool_call>\\n<function=' + tool_call.name + '>\\n' }}\n {%- endif %}\n {%- else %}\n {{- '\\n<tool_call>\\n<function=' + tool_call.name + '>\\n' }}\n {%- endif %}\n {%- if tool_call.arguments is defined %}\n {%- for args_name, args_value in tool_call.arguments|items %}\n {{- '<parameter=' + args_name + '>\\n' }}\n {%- set args_value = args_value | string if args_value is string else args_value | tojson | safe %}\n {{- args_value }}\n {{- '\\n</parameter>\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '</function>\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.previtem and loop.previtem.role != \"tool\" %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if not loop.last and loop.nextitem.role != \"tool\" %}\n {{- '<|im_end|>\\n' }}\n {%- elif loop.last %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- else %}\n {{- raise_exception('Unexpected message role.') }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- else %}\n {{- '<think>\\n' }}\n {%- endif %}\n{%- endif %}", |
| 286 | "clean_up_tokenization_spaces": false, |
| 287 | "eos_token": "<|im_end|>", |
| 288 | "errors": "replace", |
| 289 | "model_max_length": 262144, |
| 290 | "pad_token": "<|endoftext|>", |
| 291 | "split_special_tokens": false, |
| 292 | "tokenizer_class": "Qwen2Tokenizer", |
| 293 | "unk_token": null, |
| 294 | "add_bos_token": false, |
| 295 | "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", |
| 296 | "extra_special_tokens": { |
| 297 | "audio_bos_token": "<|audio_start|>", |
| 298 | "audio_eos_token": "<|audio_end|>", |
| 299 | "audio_token": "<|audio_pad|>", |
| 300 | "image_token": "<|image_pad|>", |
| 301 | "video_token": "<|video_pad|>", |
| 302 | "vision_bos_token": "<|vision_start|>", |
| 303 | "vision_eos_token": "<|vision_end|>" |
| 304 | } |
| 305 | } |