tokenizer_config.json
1.0 KB · 50 lines · json Raw
1 {
2 "backend": "tokenizers",
3 "clean_up_tokenization_spaces": false,
4 "eos_token": "<|endoftext|>",
5 "extra_special_tokens": [
6 "<|endoftext|>",
7 "[MASK]",
8 "[gMASK]",
9 "[sMASK]",
10 "<sop>",
11 "<eop>",
12 "<|system|>",
13 "<|user|>",
14 "<|assistant|>",
15 "<|observation|>",
16 "<|begin_of_image|>",
17 "<|end_of_image|>",
18 "<|begin_of_video|>",
19 "<|end_of_video|>",
20 "<|begin_of_audio|>",
21 "<|end_of_audio|>",
22 "<|begin_of_transcription|>",
23 "<|end_of_transcription|>",
24 "<|code_prefix|>",
25 "<|code_middle|>",
26 "<|code_suffix|>",
27 "<think>",
28 "</think>",
29 "<tool_call>",
30 "</tool_call>",
31 "<tool_response>",
32 "</tool_response>",
33 "<arg_key>",
34 "</arg_key>",
35 "<arg_value>",
36 "</arg_value>",
37 "/nothink",
38 "<|begin_of_box|>",
39 "<|end_of_box|>",
40 "<|image|>",
41 "<|video|>"
42 ],
43 "is_local": true,
44 "model_max_length": 655380,
45 "pad_token": "<|endoftext|>",
46 "padding_side": "left",
47 "processor_class": "Glm46VProcessor",
48 "tokenizer_class": "TokenizersBackend"
49 }
50