tokenizer_config.json
1.0 KB · 48 lines · json Raw
1 {
2 "added_tokens_decoder": {
3 "0": {
4 "content": "<blank>",
5 "lstrip": true,
6 "normalized": false,
7 "rstrip": true,
8 "single_word": false,
9 "special": true
10 },
11 "1": {
12 "content": "<pad>",
13 "lstrip": true,
14 "normalized": false,
15 "rstrip": true,
16 "single_word": false,
17 "special": true
18 },
19 "2": {
20 "content": "</s>",
21 "lstrip": true,
22 "normalized": false,
23 "rstrip": true,
24 "single_word": false,
25 "special": true
26 },
27 "3": {
28 "content": "<unk>",
29 "lstrip": true,
30 "normalized": false,
31 "rstrip": true,
32 "single_word": false,
33 "special": true
34 }
35 },
36 "bos_token": null,
37 "clean_up_tokenization_spaces": true,
38 "do_lower_case": false,
39 "eos_token": "</s>",
40 "model_max_length": 1000000000000000019884624838656,
41 "pad_token": "<pad>",
42 "replace_word_delimiter_char": " ",
43 "target_lang": null,
44 "tokenizer_class": "Wav2Vec2CTCTokenizer",
45 "unk_token": "<unk>",
46 "word_delimiter_token": null
47 }
48