tokenizer/tokenizer_config.json
765 B · 32 lines · json Raw
1 {
2 "add_prefix_space": false,
3 "added_tokens_decoder": {
4 "49406": {
5 "content": "<|startoftext|>",
6 "lstrip": false,
7 "normalized": true,
8 "rstrip": false,
9 "single_word": false,
10 "special": true
11 },
12 "49407": {
13 "content": "<|endoftext|>",
14 "lstrip": false,
15 "normalized": true,
16 "rstrip": false,
17 "single_word": false,
18 "special": true
19 }
20 },
21 "bos_token": "<|startoftext|>",
22 "clean_up_tokenization_spaces": true,
23 "do_lower_case": true,
24 "eos_token": "<|endoftext|>",
25 "errors": "replace",
26 "extra_special_tokens": {},
27 "model_max_length": 77,
28 "pad_token": "<|endoftext|>",
29 "tokenizer_class": "CLIPTokenizer",
30 "unk_token": "<|endoftext|>"
31 }
32