tokenizer_config.json
1.3 KB · 60 lines · json Raw
1 {
2 "added_tokens_decoder": {
3 "0": {
4 "content": "[PAD]",
5 "lstrip": false,
6 "normalized": false,
7 "rstrip": false,
8 "single_word": false,
9 "special": true
10 },
11 "1": {
12 "content": "[CLS]",
13 "lstrip": false,
14 "normalized": false,
15 "rstrip": false,
16 "single_word": false,
17 "special": true
18 },
19 "2": {
20 "content": "[SEP]",
21 "lstrip": false,
22 "normalized": false,
23 "rstrip": false,
24 "single_word": false,
25 "special": true
26 },
27 "3": {
28 "content": "[UNK]",
29 "lstrip": false,
30 "normalized": true,
31 "rstrip": false,
32 "single_word": false,
33 "special": true
34 },
35 "128000": {
36 "content": "[MASK]",
37 "lstrip": false,
38 "normalized": false,
39 "rstrip": false,
40 "single_word": false,
41 "special": true
42 }
43 },
44 "bos_token": "[CLS]",
45 "clean_up_tokenization_spaces": false,
46 "cls_token": "[CLS]",
47 "do_lower_case": false,
48 "eos_token": "[SEP]",
49 "extra_special_tokens": {},
50 "mask_token": "[MASK]",
51 "model_max_length": 512,
52 "pad_token": "[PAD]",
53 "sep_token": "[SEP]",
54 "sp_model_kwargs": {},
55 "split_by_punct": false,
56 "tokenizer_class": "DebertaV2Tokenizer",
57 "unk_token": "[UNK]",
58 "vocab_type": "spm"
59 }
60