tokenizer_config.json
1.9 KB · 43 lines · json Raw
1 {
2 "add_bos_token": true,
3 "add_eos_token": false,
4 "add_prefix_space": null,
5 "added_tokens_decoder": {
6 "0": {
7 "content": "<unk>",
8 "lstrip": false,
9 "normalized": false,
10 "rstrip": false,
11 "single_word": false,
12 "special": true
13 },
14 "1": {
15 "content": "<s>",
16 "lstrip": false,
17 "normalized": false,
18 "rstrip": false,
19 "single_word": false,
20 "special": true
21 },
22 "2": {
23 "content": "</s>",
24 "lstrip": false,
25 "normalized": false,
26 "rstrip": false,
27 "single_word": false,
28 "special": true
29 }
30 },
31 "bos_token": "<s>",
32 "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\n' + system_message + '\n<</SYS>>\n\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\n' + content.strip() + '\n<</SYS>>\n\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
33 "clean_up_tokenization_spaces": false,
34 "eos_token": "</s>",
35 "extra_special_tokens": {},
36 "legacy": true,
37 "model_max_length": 2048,
38 "pad_token": "<unk>",
39 "tokenizer_class": "LlamaTokenizer",
40 "unk_token": "<unk>",
41 "use_default_system_prompt": true
42 }
43