tokenizer_config.json
533 B · 24 lines · json Raw
1 {
2 "add_prefix_space": false,
3 "backend": "tokenizers",
4 "bos_token": null,
5 "clean_up_tokenization_spaces": false,
6 "eos_token": "<|im_end|>",
7 "errors": "replace",
8 "extra_special_tokens": [
9 "<|denoise|>",
10 "<|lang_start|>",
11 "<|lang_end|>",
12 "<|instruct_start|>",
13 "<|instruct_end|>",
14 "<|text_start|>",
15 "<|text_end|>"
16 ],
17 "is_local": true,
18 "model_max_length": 131072,
19 "pad_token": "<|endoftext|>",
20 "split_special_tokens": false,
21 "tokenizer_class": "Qwen2Tokenizer",
22 "unk_token": null
23 }
24