tokenizer_config.json
1.6 KB · 61 lines · json Raw
1 {
2 "add_bos_token": false,
3 "add_eos_token": false,
4 "add_prefix_space": true,
5 "added_tokens_decoder": {
6 "0": {
7 "content": "<unk>",
8 "lstrip": false,
9 "normalized": false,
10 "rstrip": false,
11 "single_word": false,
12 "special": true
13 },
14 "1": {
15 "content": "<|startoftext|>",
16 "lstrip": false,
17 "normalized": false,
18 "rstrip": false,
19 "single_word": false,
20 "special": true
21 },
22 "2": {
23 "content": "<|endoftext|>",
24 "lstrip": false,
25 "normalized": false,
26 "rstrip": false,
27 "single_word": false,
28 "special": true
29 },
30 "6": {
31 "content": "<|im_start|>",
32 "lstrip": false,
33 "normalized": false,
34 "rstrip": false,
35 "single_word": false,
36 "special": false
37 },
38 "7": {
39 "content": "<|im_end|>",
40 "lstrip": false,
41 "normalized": false,
42 "rstrip": false,
43 "single_word": false,
44 "special": true
45 }
46 },
47 "bos_token": "<|startoftext|>",
48 "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
49 "clean_up_tokenization_spaces": false,
50 "eos_token": "<|im_end|>",
51 "legacy": true,
52 "model_max_length": 4096,
53 "pad_token": "<unk>",
54 "sp_model_kwargs": {},
55 "spaces_between_special_tokens": false,
56 "tokenizer_class": "LlamaTokenizer",
57 "unk_token": "<unk>",
58 "use_default_system_prompt": false,
59 "use_fast": true
60 }
61