tokenizer_config.json
1.3 KB · 41 lines · json Raw
1 {
2 "add_prefix_space": false,
3 "added_tokens_decoder": {
4 "151643": {
5 "content": "<|endoftext|>",
6 "lstrip": false,
7 "normalized": false,
8 "rstrip": false,
9 "single_word": false,
10 "special": true
11 },
12 "151644": {
13 "content": "<|im_start|>",
14 "lstrip": false,
15 "normalized": false,
16 "rstrip": false,
17 "single_word": false,
18 "special": true
19 },
20 "151645": {
21 "content": "<|im_end|>",
22 "lstrip": false,
23 "normalized": false,
24 "rstrip": false,
25 "single_word": false,
26 "special": true
27 }
28 },
29 "additional_special_tokens": ["<|im_start|>", "<|im_end|>"],
30 "bos_token": null,
31 "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
32 "clean_up_tokenization_spaces": false,
33 "eos_token": "<|im_end|>",
34 "errors": "replace",
35 "model_max_length": 32768,
36 "pad_token": "<|endoftext|>",
37 "split_special_tokens": false,
38 "tokenizer_class": "Qwen2Tokenizer",
39 "unk_token": null
40 }
41