tokenizer_config.json
| 1 | { |
| 2 | "clean_up_tokenization_spaces": true, |
| 3 | "cls_token": "[CLS]", |
| 4 | "do_basic_tokenize": true, |
| 5 | "do_lower_case": true, |
| 6 | "mask_token": "[MASK]", |
| 7 | "model_max_length": 512, |
| 8 | "never_split": null, |
| 9 | "pad_token": "[PAD]", |
| 10 | "sep_token": "[SEP]", |
| 11 | "strip_accents": null, |
| 12 | "tokenize_chinese_chars": true, |
| 13 | "tokenizer_class": "BertTokenizer", |
| 14 | "unk_token": "[UNK]" |
| 15 | } |
| 16 | |