tokenizer.json
3.4 KB · 175 lines · json Raw
1 {
2 "version": "1.0",
3 "truncation": null,
4 "padding": null,
5 "added_tokens": [],
6 "normalizer": {
7 "type": "Replace",
8 "pattern": {
9 "Regex": "[^$;:,.!?\u2014\u2026\"()\u201c\u201d \u0303\u02a3\u02a5\u02a6\u02a8\u1d5d\uab67AIOQSTWY\u1d4aabcdefhijklmnopqrstuvwxyz\u0251\u0250\u0252\u00e6\u03b2\u0254\u0255\u00e7\u0256\u00f0\u02a4\u0259\u025a\u025b\u025c\u025f\u0261\u0265\u0268\u026a\u029d\u026f\u0270\u014b\u0273\u0272\u0274\u00f8\u0278\u03b8\u0153\u0279\u027e\u027b\u0281\u027d\u0282\u0283\u0288\u02a7\u028a\u028b\u028c\u0263\u0264\u03c7\u028e\u0292\u0294\u02c8\u02cc\u02d0\u02b0\u02b2\u2193\u2192\u2197\u2198\u1d7b]"
10 },
11 "content": ""
12 },
13 "pre_tokenizer": {
14 "type": "Split",
15 "pattern": {
16 "Regex": ""
17 },
18 "behavior": "Isolated",
19 "invert": false
20 },
21 "post_processor": {
22 "type": "TemplateProcessing",
23 "single": [
24 {
25 "SpecialToken": {
26 "id": "$",
27 "type_id": 0
28 }
29 },
30 {
31 "Sequence": {
32 "id": "A",
33 "type_id": 0
34 }
35 },
36 {
37 "SpecialToken": {
38 "id": "$",
39 "type_id": 0
40 }
41 }
42 ],
43 "special_tokens": {
44 "$": {
45 "id": "$",
46 "ids": [
47 0
48 ],
49 "tokens": [
50 "$"
51 ]
52 }
53 }
54 },
55 "decoder": null,
56 "model": {
57 "vocab": {
58 "$": 0,
59 ";": 1,
60 ":": 2,
61 ",": 3,
62 ".": 4,
63 "!": 5,
64 "?": 6,
65 "\u2014": 9,
66 "\u2026": 10,
67 "\"": 11,
68 "(": 12,
69 ")": 13,
70 "\u201c": 14,
71 "\u201d": 15,
72 " ": 16,
73 "\u0303": 17,
74 "\u02a3": 18,
75 "\u02a5": 19,
76 "\u02a6": 20,
77 "\u02a8": 21,
78 "\u1d5d": 22,
79 "\uab67": 23,
80 "A": 24,
81 "I": 25,
82 "O": 31,
83 "Q": 33,
84 "S": 35,
85 "T": 36,
86 "W": 39,
87 "Y": 41,
88 "\u1d4a": 42,
89 "a": 43,
90 "b": 44,
91 "c": 45,
92 "d": 46,
93 "e": 47,
94 "f": 48,
95 "h": 50,
96 "i": 51,
97 "j": 52,
98 "k": 53,
99 "l": 54,
100 "m": 55,
101 "n": 56,
102 "o": 57,
103 "p": 58,
104 "q": 59,
105 "r": 60,
106 "s": 61,
107 "t": 62,
108 "u": 63,
109 "v": 64,
110 "w": 65,
111 "x": 66,
112 "y": 67,
113 "z": 68,
114 "\u0251": 69,
115 "\u0250": 70,
116 "\u0252": 71,
117 "\u00e6": 72,
118 "\u03b2": 75,
119 "\u0254": 76,
120 "\u0255": 77,
121 "\u00e7": 78,
122 "\u0256": 80,
123 "\u00f0": 81,
124 "\u02a4": 82,
125 "\u0259": 83,
126 "\u025a": 85,
127 "\u025b": 86,
128 "\u025c": 87,
129 "\u025f": 90,
130 "\u0261": 92,
131 "\u0265": 99,
132 "\u0268": 101,
133 "\u026a": 102,
134 "\u029d": 103,
135 "\u026f": 110,
136 "\u0270": 111,
137 "\u014b": 112,
138 "\u0273": 113,
139 "\u0272": 114,
140 "\u0274": 115,
141 "\u00f8": 116,
142 "\u0278": 118,
143 "\u03b8": 119,
144 "\u0153": 120,
145 "\u0279": 123,
146 "\u027e": 125,
147 "\u027b": 126,
148 "\u0281": 128,
149 "\u027d": 129,
150 "\u0282": 130,
151 "\u0283": 131,
152 "\u0288": 132,
153 "\u02a7": 133,
154 "\u028a": 135,
155 "\u028b": 136,
156 "\u028c": 138,
157 "\u0263": 139,
158 "\u0264": 140,
159 "\u03c7": 142,
160 "\u028e": 143,
161 "\u0292": 147,
162 "\u0294": 148,
163 "\u02c8": 156,
164 "\u02cc": 157,
165 "\u02d0": 158,
166 "\u02b0": 162,
167 "\u02b2": 164,
168 "\u2193": 169,
169 "\u2192": 171,
170 "\u2197": 172,
171 "\u2198": 173,
172 "\u1d7b": 177
173 }
174 }
175 }