tokenizer_2/tokenizer_config.json
725 B · 34 lines · json Raw
1 {
2 "add_prefix_space": false,
3 "bos_token": {
4 "__type": "AddedToken",
5 "content": "<|startoftext|>",
6 "lstrip": false,
7 "normalized": true,
8 "rstrip": false,
9 "single_word": false
10 },
11 "clean_up_tokenization_spaces": true,
12 "do_lower_case": true,
13 "eos_token": {
14 "__type": "AddedToken",
15 "content": "<|endoftext|>",
16 "lstrip": false,
17 "normalized": true,
18 "rstrip": false,
19 "single_word": false
20 },
21 "errors": "replace",
22 "model_max_length": 77,
23 "pad_token": "!",
24 "tokenizer_class": "CLIPTokenizer",
25 "unk_token": {
26 "__type": "AddedToken",
27 "content": "<|endoftext|>",
28 "lstrip": false,
29 "normalized": true,
30 "rstrip": false,
31 "single_word": false
32 }
33 }
34