tokenizer_config.json
| 1 | { |
| 2 | "backend": "tokenizers", |
| 3 | "clean_up_tokenization_spaces": false, |
| 4 | "eos_token": "<|endoftext|>", |
| 5 | "extra_special_tokens": [ |
| 6 | "<|endoftext|>", |
| 7 | "[MASK]", |
| 8 | "[gMASK]", |
| 9 | "[sMASK]", |
| 10 | "<sop>", |
| 11 | "<eop>", |
| 12 | "<|system|>", |
| 13 | "<|user|>", |
| 14 | "<|assistant|>", |
| 15 | "<|observation|>", |
| 16 | "<|begin_of_image|>", |
| 17 | "<|end_of_image|>", |
| 18 | "<|begin_of_video|>", |
| 19 | "<|end_of_video|>", |
| 20 | "<|begin_of_audio|>", |
| 21 | "<|end_of_audio|>", |
| 22 | "<|begin_of_transcription|>", |
| 23 | "<|end_of_transcription|>", |
| 24 | "<|code_prefix|>", |
| 25 | "<|code_middle|>", |
| 26 | "<|code_suffix|>", |
| 27 | "<think>", |
| 28 | "</think>", |
| 29 | "<tool_call>", |
| 30 | "</tool_call>", |
| 31 | "<tool_response>", |
| 32 | "</tool_response>", |
| 33 | "<arg_key>", |
| 34 | "</arg_key>", |
| 35 | "<arg_value>", |
| 36 | "</arg_value>", |
| 37 | "/nothink", |
| 38 | "<|begin_of_box|>", |
| 39 | "<|end_of_box|>", |
| 40 | "<|image|>", |
| 41 | "<|video|>" |
| 42 | ], |
| 43 | "is_local": true, |
| 44 | "model_max_length": 655380, |
| 45 | "pad_token": "<|endoftext|>", |
| 46 | "padding_side": "left", |
| 47 | "processor_class": "Glm46VProcessor", |
| 48 | "tokenizer_class": "TokenizersBackend" |
| 49 | } |
| 50 | |