tokenizer/tokenizer_config.json
855 B · 39 lines · json Raw
1 {
2 "add_prefix_space": false,
3 "added_tokens_decoder": {
4 "0": {
5 "content": "!",
6 "lstrip": false,
7 "normalized": false,
8 "rstrip": false,
9 "single_word": false,
10 "special": true
11 },
12 "49406": {
13 "content": "<|startoftext|>",
14 "lstrip": false,
15 "normalized": true,
16 "rstrip": false,
17 "single_word": false,
18 "special": true
19 },
20 "49407": {
21 "content": "<|endoftext|>",
22 "lstrip": false,
23 "normalized": true,
24 "rstrip": false,
25 "single_word": false,
26 "special": true
27 }
28 },
29 "bos_token": "<|startoftext|>",
30 "clean_up_tokenization_spaces": true,
31 "do_lower_case": true,
32 "eos_token": "<|endoftext|>",
33 "errors": "replace",
34 "model_max_length": 77,
35 "pad_token": "!",
36 "tokenizer_class": "CLIPTokenizer",
37 "unk_token": "<|endoftext|>"
38 }
39