tokenizer_config.json
711 B · 34 lines · json Raw
1 {
2 "added_tokens_decoder": {
3 "1": {
4 "content": "</s>",
5 "lstrip": true,
6 "normalized": false,
7 "rstrip": true,
8 "single_word": false,
9 "special": true
10 },
11 "2": {
12 "content": "<unk>",
13 "lstrip": true,
14 "normalized": false,
15 "rstrip": true,
16 "single_word": false,
17 "special": true
18 }
19 },
20 "additional_special_tokens": [],
21 "clean_up_tokenization_spaces": true,
22 "do_lower_case": true,
23 "eos_token": "</s>",
24 "model_input_names": [
25 "input_ids"
26 ],
27 "model_max_length": 64,
28 "pad_token": "</s>",
29 "processor_class": "SiglipProcessor",
30 "sp_model_kwargs": {},
31 "tokenizer_class": "SiglipTokenizer",
32 "unk_token": "<unk>"
33 }
34