tokenizer_config.json

1.0 KB · 48 lines · json Raw

1	`{`
2	`"added_tokens_decoder": {`
3	`"0": {`
4	`"content": "<blank>",`
5	`"lstrip": true,`
6	`"normalized": false,`
7	`"rstrip": true,`
8	`"single_word": false,`
9	`"special": true`
10	`},`
11	`"1": {`
12	`"content": "<pad>",`
13	`"lstrip": true,`
14	`"normalized": false,`
15	`"rstrip": true,`
16	`"single_word": false,`
17	`"special": true`
18	`},`
19	`"2": {`
20	`"content": "</s>",`
21	`"lstrip": true,`
22	`"normalized": false,`
23	`"rstrip": true,`
24	`"single_word": false,`
25	`"special": true`
26	`},`
27	`"3": {`
28	`"content": "<unk>",`
29	`"lstrip": true,`
30	`"normalized": false,`
31	`"rstrip": true,`
32	`"single_word": false,`
33	`"special": true`
34	`}`
35	`},`
36	`"bos_token": null,`
37	`"clean_up_tokenization_spaces": true,`
38	`"do_lower_case": false,`
39	`"eos_token": "</s>",`
40	`"model_max_length": 1000000000000000019884624838656,`
41	`"pad_token": "<pad>",`
42	`"replace_word_delimiter_char": " ",`
43	`"target_lang": null,`
44	`"tokenizer_class": "Wav2Vec2CTCTokenizer",`
45	`"unk_token": "<unk>",`
46	`"word_delimiter_token": null`
47	`}`
48