tokenizer_config.json

711 B · 34 lines · json Raw

1	`{`
2	`"added_tokens_decoder": {`
3	`"1": {`
4	`"content": "</s>",`
5	`"lstrip": true,`
6	`"normalized": false,`
7	`"rstrip": true,`
8	`"single_word": false,`
9	`"special": true`
10	`},`
11	`"2": {`
12	`"content": "<unk>",`
13	`"lstrip": true,`
14	`"normalized": false,`
15	`"rstrip": true,`
16	`"single_word": false,`
17	`"special": true`
18	`}`
19	`},`
20	`"additional_special_tokens": [],`
21	`"clean_up_tokenization_spaces": true,`
22	`"do_lower_case": true,`
23	`"eos_token": "</s>",`
24	`"model_input_names": [`
25	`"input_ids"`
26	`],`
27	`"model_max_length": 64,`
28	`"pad_token": "</s>",`
29	`"processor_class": "SiglipProcessor",`
30	`"sp_model_kwargs": {},`
31	`"tokenizer_class": "SiglipTokenizer",`
32	`"unk_token": "<unk>"`
33	`}`
34