tokenizer_config.json
1.2 KB · 58 lines · json Raw
1 {
2 "add_prefix_space": true,
3 "added_tokens_decoder": {
4 "0": {
5 "content": "<s>",
6 "lstrip": false,
7 "normalized": false,
8 "rstrip": false,
9 "single_word": false,
10 "special": true
11 },
12 "1": {
13 "content": "<pad>",
14 "lstrip": false,
15 "normalized": false,
16 "rstrip": false,
17 "single_word": false,
18 "special": true
19 },
20 "2": {
21 "content": "</s>",
22 "lstrip": false,
23 "normalized": false,
24 "rstrip": false,
25 "single_word": false,
26 "special": true
27 },
28 "3": {
29 "content": "<unk>",
30 "lstrip": false,
31 "normalized": false,
32 "rstrip": false,
33 "single_word": false,
34 "special": true
35 },
36 "4": {
37 "content": "<mask>",
38 "lstrip": true,
39 "normalized": false,
40 "rstrip": false,
41 "single_word": false,
42 "special": true
43 }
44 },
45 "bos_token": "<s>",
46 "clean_up_tokenization_spaces": true,
47 "cls_token": "<s>",
48 "eos_token": "</s>",
49 "errors": "replace",
50 "mask_token": "<mask>",
51 "model_max_length": 1000000000000000019884624838656,
52 "pad_token": "<pad>",
53 "sep_token": "</s>",
54 "tokenizer_class": "RobertaTokenizer",
55 "trim_offsets": true,
56 "unk_token": "<unk>"
57 }
58