onnx/tokenizer_config.json
443 B · 21 lines · json Raw
1 {
2 "bos_token": "<s>",
3 "clean_up_tokenization_spaces": true,
4 "cls_token": "<s>",
5 "eos_token": "</s>",
6 "mask_token": {
7 "__type": "AddedToken",
8 "content": "<mask>",
9 "lstrip": true,
10 "normalized": true,
11 "rstrip": false,
12 "single_word": false
13 },
14 "model_max_length": 512,
15 "pad_token": "<pad>",
16 "sep_token": "</s>",
17 "sp_model_kwargs": {},
18 "tokenizer_class": "XLMRobertaTokenizer",
19 "unk_token": "<unk>"
20 }
21