tokenizer_config.json
1.3 KB · 59 lines · json Raw
1 {
2 "added_tokens_decoder": {
3 "0": {
4 "content": "[PAD]",
5 "lstrip": false,
6 "normalized": false,
7 "rstrip": false,
8 "single_word": false,
9 "special": true
10 },
11 "100": {
12 "content": "[UNK]",
13 "lstrip": false,
14 "normalized": false,
15 "rstrip": false,
16 "single_word": false,
17 "special": true
18 },
19 "101": {
20 "content": "[CLS]",
21 "lstrip": false,
22 "normalized": false,
23 "rstrip": false,
24 "single_word": false,
25 "special": true
26 },
27 "102": {
28 "content": "[SEP]",
29 "lstrip": false,
30 "normalized": false,
31 "rstrip": false,
32 "single_word": false,
33 "special": true
34 },
35 "103": {
36 "content": "[MASK]",
37 "lstrip": false,
38 "normalized": false,
39 "rstrip": false,
40 "single_word": false,
41 "special": true
42 }
43 },
44 "clean_up_tokenization_spaces": true,
45 "cls_token": "[CLS]",
46 "do_basic_tokenize": true,
47 "do_lower_case": true,
48 "extra_special_tokens": {},
49 "mask_token": "[MASK]",
50 "model_max_length": 512,
51 "never_split": null,
52 "pad_token": "[PAD]",
53 "sep_token": "[SEP]",
54 "strip_accents": null,
55 "tokenize_chinese_chars": true,
56 "tokenizer_class": "BertTokenizer",
57 "unk_token": "[UNK]"
58 }
59