tokenizer_config.json
| 1 | { |
| 2 | "backend": "tokenizers", |
| 3 | "clean_up_tokenization_spaces": false, |
| 4 | "do_lower_case": false, |
| 5 | "eos_token": "<|endoftext|>", |
| 6 | "extra_special_tokens": [ |
| 7 | "<|endoftext|>", |
| 8 | "[MASK]", |
| 9 | "[gMASK]", |
| 10 | "[sMASK]", |
| 11 | "<sop>", |
| 12 | "<eop>", |
| 13 | "<|system|>", |
| 14 | "<|user|>", |
| 15 | "<|assistant|>", |
| 16 | "<|observation|>", |
| 17 | "<|begin_of_image|>", |
| 18 | "<|end_of_image|>", |
| 19 | "<|begin_of_video|>", |
| 20 | "<|end_of_video|>", |
| 21 | "<|begin_of_audio|>", |
| 22 | "<|end_of_audio|>", |
| 23 | "<|begin_of_transcription|>", |
| 24 | "<|end_of_transcription|>" |
| 25 | ], |
| 26 | "is_local": true, |
| 27 | "model_max_length": 202752, |
| 28 | "model_specific_special_tokens": {}, |
| 29 | "pad_token": "<|endoftext|>", |
| 30 | "padding_side": "left", |
| 31 | "remove_space": false, |
| 32 | "tokenizer_class": "TokenizersBackend" |
| 33 | } |
| 34 | |