tokenizer/tokenizer_config.json
705 B · 31 lines · json Raw
1 {
2 "add_prefix_space": false,
3 "added_tokens_decoder": {
4 "49406": {
5 "content": "<|startoftext|>",
6 "lstrip": false,
7 "normalized": true,
8 "rstrip": false,
9 "single_word": false,
10 "special": true
11 },
12 "49407": {
13 "content": "<|endoftext|>",
14 "lstrip": false,
15 "normalized": false,
16 "rstrip": false,
17 "single_word": false,
18 "special": true
19 }
20 },
21 "bos_token": "<|startoftext|>",
22 "clean_up_tokenization_spaces": true,
23 "do_lower_case": true,
24 "eos_token": "<|endoftext|>",
25 "errors": "replace",
26 "model_max_length": 77,
27 "pad_token": "<|endoftext|>",
28 "tokenizer_class": "CLIPTokenizer",
29 "unk_token": "<|endoftext|>"
30 }
31