tokenizer_config.json
4.1 KB · 184 lines · json Raw
1 {
2 "added_tokens_decoder": {
3 "199998": {
4 "content": "<|startoftext|>",
5 "lstrip": false,
6 "normalized": false,
7 "rstrip": false,
8 "single_word": false,
9 "special": true
10 },
11 "199999": {
12 "content": "<|endoftext|>",
13 "lstrip": false,
14 "normalized": false,
15 "rstrip": false,
16 "single_word": false,
17 "special": true
18 },
19 "200000": {
20 "content": "<|reserved_200000|>",
21 "lstrip": false,
22 "normalized": false,
23 "rstrip": false,
24 "single_word": false,
25 "special": true
26 },
27 "200001": {
28 "content": "<|reserved_200001|>",
29 "lstrip": false,
30 "normalized": false,
31 "rstrip": false,
32 "single_word": false,
33 "special": true
34 },
35 "200002": {
36 "content": "<|return|>",
37 "lstrip": false,
38 "normalized": false,
39 "rstrip": false,
40 "single_word": false,
41 "special": true
42 },
43 "200003": {
44 "content": "<|constrain|>",
45 "lstrip": false,
46 "normalized": false,
47 "rstrip": false,
48 "single_word": false,
49 "special": true
50 },
51 "200004": {
52 "content": "<|reserved_200004|>",
53 "lstrip": false,
54 "normalized": false,
55 "rstrip": false,
56 "single_word": false,
57 "special": true
58 },
59 "200005": {
60 "content": "<|channel|>",
61 "lstrip": false,
62 "normalized": false,
63 "rstrip": false,
64 "single_word": false,
65 "special": true
66 },
67 "200006": {
68 "content": "<|start|>",
69 "lstrip": false,
70 "normalized": false,
71 "rstrip": false,
72 "single_word": false,
73 "special": true
74 },
75 "200007": {
76 "content": "<|end|>",
77 "lstrip": false,
78 "normalized": false,
79 "rstrip": false,
80 "single_word": false,
81 "special": true
82 },
83 "200008": {
84 "content": "<|message|>",
85 "lstrip": false,
86 "normalized": false,
87 "rstrip": false,
88 "single_word": false,
89 "special": true
90 },
91 "200009": {
92 "content": "<|reserved_200009|>",
93 "lstrip": false,
94 "normalized": false,
95 "rstrip": false,
96 "single_word": false,
97 "special": true
98 },
99 "200010": {
100 "content": "<|reserved_200010|>",
101 "lstrip": false,
102 "normalized": false,
103 "rstrip": false,
104 "single_word": false,
105 "special": true
106 },
107 "200011": {
108 "content": "<|reserved_200011|>",
109 "lstrip": false,
110 "normalized": false,
111 "rstrip": false,
112 "single_word": false,
113 "special": true
114 },
115 "200012": {
116 "content": "<|call|>",
117 "lstrip": false,
118 "normalized": false,
119 "rstrip": false,
120 "single_word": false,
121 "special": true
122 },
123 "200013": {
124 "content": "<|reserved_200013|>",
125 "lstrip": false,
126 "normalized": false,
127 "rstrip": false,
128 "single_word": false,
129 "special": true
130 },
131 "200014": {
132 "content": "<|reserved_200014|>",
133 "lstrip": false,
134 "normalized": false,
135 "rstrip": false,
136 "single_word": false,
137 "special": true
138 },
139 "200015": {
140 "content": "<|reserved_200015|>",
141 "lstrip": false,
142 "normalized": false,
143 "rstrip": false,
144 "single_word": false,
145 "special": true
146 },
147 "200016": {
148 "content": "<|reserved_200016|>",
149 "lstrip": false,
150 "normalized": false,
151 "rstrip": false,
152 "single_word": false,
153 "special": true
154 },
155 "200017": {
156 "content": "<|reserved_200017|>",
157 "lstrip": false,
158 "normalized": false,
159 "rstrip": false,
160 "single_word": false,
161 "special": true
162 },
163 "200018": {
164 "content": "<|endofprompt|>",
165 "lstrip": false,
166 "normalized": false,
167 "rstrip": false,
168 "single_word": false,
169 "special": true
170 }
171 },
172 "bos_token": "<|startoftext|>",
173 "clean_up_tokenization_spaces": false,
174 "eos_token": "<|return|>",
175 "extra_special_tokens": {},
176 "model_input_names": [
177 "input_ids",
178 "attention_mask"
179 ],
180 "model_max_length": 1000000000000000019884624838656,
181 "pad_token": "<|endoftext|>",
182 "tokenizer_class": "PreTrainedTokenizerFast"
183 }
184