tokenizer_config.json
4.1 KB · 130 lines · json Raw
1 {
2 "add_prefix_space": false,
3 "added_tokens_decoder": {
4 "151643": {
5 "content": "<|endoftext|>",
6 "lstrip": false,
7 "normalized": false,
8 "rstrip": false,
9 "single_word": false,
10 "special": true
11 },
12 "151644": {
13 "content": "<|im_start|>",
14 "lstrip": false,
15 "normalized": false,
16 "rstrip": false,
17 "single_word": false,
18 "special": true
19 },
20 "151645": {
21 "content": "<|im_end|>",
22 "lstrip": false,
23 "normalized": false,
24 "rstrip": false,
25 "single_word": false,
26 "special": true
27 },
28 "151646": {
29 "content": "<|object_ref_start|>",
30 "lstrip": false,
31 "normalized": false,
32 "rstrip": false,
33 "single_word": false,
34 "special": true
35 },
36 "151647": {
37 "content": "<|object_ref_end|>",
38 "lstrip": false,
39 "normalized": false,
40 "rstrip": false,
41 "single_word": false,
42 "special": true
43 },
44 "151648": {
45 "content": "<|box_start|>",
46 "lstrip": false,
47 "normalized": false,
48 "rstrip": false,
49 "single_word": false,
50 "special": true
51 },
52 "151649": {
53 "content": "<|box_end|>",
54 "lstrip": false,
55 "normalized": false,
56 "rstrip": false,
57 "single_word": false,
58 "special": true
59 },
60 "151650": {
61 "content": "<|quad_start|>",
62 "lstrip": false,
63 "normalized": false,
64 "rstrip": false,
65 "single_word": false,
66 "special": true
67 },
68 "151651": {
69 "content": "<|quad_end|>",
70 "lstrip": false,
71 "normalized": false,
72 "rstrip": false,
73 "single_word": false,
74 "special": true
75 },
76 "151652": {
77 "content": "<|vision_start|>",
78 "lstrip": false,
79 "normalized": false,
80 "rstrip": false,
81 "single_word": false,
82 "special": true
83 },
84 "151653": {
85 "content": "<|vision_end|>",
86 "lstrip": false,
87 "normalized": false,
88 "rstrip": false,
89 "single_word": false,
90 "special": true
91 },
92 "151654": {
93 "content": "<|vision_pad|>",
94 "lstrip": false,
95 "normalized": false,
96 "rstrip": false,
97 "single_word": false,
98 "special": true
99 },
100 "151655": {
101 "content": "<|image_pad|>",
102 "lstrip": false,
103 "normalized": false,
104 "rstrip": false,
105 "single_word": false,
106 "special": true
107 },
108 "151656": {
109 "content": "<|video_pad|>",
110 "lstrip": false,
111 "normalized": false,
112 "rstrip": false,
113 "single_word": false,
114 "special": true
115 }
116 },
117 "additional_special_tokens": ["<|im_start|>", "<|im_end|>", "<|object_ref_start|>","<|object_ref_end|>","<|box_start|>","<|box_end|>","<|quad_start|>","<|quad_end|>","<|vision_start|>","<|vision_end|>","<|vision_pad|>","<|image_pad|>","<|video_pad|>"],
118 "bos_token": null,
119 "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
120 "clean_up_tokenization_spaces": false,
121 "eos_token": "<|im_end|>",
122 "padding_side": "left",
123 "errors": "replace",
124 "model_max_length": 32768,
125 "pad_token": "<|endoftext|>",
126 "split_special_tokens": false,
127 "tokenizer_class": "Qwen2Tokenizer",
128 "unk_token": null
129 }
130