tokenizer_config.json
1.3 KB · 52 lines · json Raw
1 {
2 "add_eos_token": true,
3 "add_prefix_space": false,
4 "added_tokens_decoder": {
5 "151643": {
6 "content": "<|endoftext|>",
7 "lstrip": false,
8 "normalized": false,
9 "rstrip": false,
10 "single_word": false,
11 "special": true
12 },
13 "151644": {
14 "content": "<|im_start|>",
15 "lstrip": false,
16 "normalized": false,
17 "rstrip": false,
18 "single_word": false,
19 "special": true
20 },
21 "151645": {
22 "content": "<|im_end|>",
23 "lstrip": false,
24 "normalized": false,
25 "rstrip": false,
26 "single_word": false,
27 "special": true
28 }
29 },
30 "additional_special_tokens": [
31 "<|im_start|>",
32 "<|im_end|>"
33 ],
34 "auto_map": {
35 "AutoTokenizer": [
36 "tokenization_qwen.Qwen2Tokenizer",
37 "tokenization_qwen.Qwen2TokenizerFast"
38 ]
39 },
40 "bos_token": null,
41 "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
42 "clean_up_tokenization_spaces": false,
43 "eos_token": "<|endoftext|>",
44 "errors": "replace",
45 "extra_special_tokens": {},
46 "model_max_length": 32768,
47 "pad_token": "<|endoftext|>",
48 "split_special_tokens": false,
49 "tokenizer_class": "Qwen2Tokenizer",
50 "unk_token": null
51 }
52