onnx/tokenizer_config.json
496 B · 23 lines · json Raw
1 {
2 "add_prefix_space": true,
3 "bos_token": "<s>",
4 "clean_up_tokenization_spaces": true,
5 "cls_token": "<s>",
6 "eos_token": "</s>",
7 "mask_token": {
8 "__type": "AddedToken",
9 "content": "<mask>",
10 "lstrip": true,
11 "normalized": true,
12 "rstrip": false,
13 "single_word": false
14 },
15 "model_max_length": 512,
16 "pad_token": "<pad>",
17 "sep_token": "</s>",
18 "sp_model_kwargs": {},
19 "strip_accent": false,
20 "tokenizer_class": "XLMRobertaTokenizer",
21 "unk_token": "<unk>"
22 }
23