tokenizer_config.json
397 B · 15 lines · json Raw
1 {
2 "bos_token": "<s>",
3 "clean_up_tokenization_spaces": true,
4 "do_lower_case": false,
5 "eos_token": "</s>",
6 "model_max_length": 1000000000000000019884624838656,
7 "pad_token": "<pad>",
8 "processor_class": "Wav2Vec2Processor",
9 "replace_word_delimiter_char": " ",
10 "target_lang": "eng",
11 "tokenizer_class": "Wav2Vec2CTCTokenizer",
12 "unk_token": "<unk>",
13 "word_delimiter_token": "|"
14 }
15