special_tokens_map.json
2.3 KB · 145 lines · json Raw
1 {
2 "additional_special_tokens": [
3 "__afr__",
4 "__amh__",
5 "__arb__",
6 "__ary__",
7 "__arz__",
8 "__asm__",
9 "__azj__",
10 "__bel__",
11 "__ben__",
12 "__bos__",
13 "__bul__",
14 "__cat__",
15 "__ceb__",
16 "__ces__",
17 "__ckb__",
18 "__cmn__",
19 "__cmn_Hant__",
20 "__cym__",
21 "__dan__",
22 "__deu__",
23 "__ell__",
24 "__eng__",
25 "__est__",
26 "__eus__",
27 "__fin__",
28 "__fra__",
29 "__fuv__",
30 "__gaz__",
31 "__gle__",
32 "__glg__",
33 "__guj__",
34 "__heb__",
35 "__hin__",
36 "__hrv__",
37 "__hun__",
38 "__hye__",
39 "__ibo__",
40 "__ind__",
41 "__isl__",
42 "__ita__",
43 "__jav__",
44 "__jpn__",
45 "__kan__",
46 "__kat__",
47 "__kaz__",
48 "__khk__",
49 "__khm__",
50 "__kir__",
51 "__kor__",
52 "__lao__",
53 "__lit__",
54 "__lug__",
55 "__luo__",
56 "__lvs__",
57 "__mai__",
58 "__mal__",
59 "__mar__",
60 "__mkd__",
61 "__mlt__",
62 "__mni__",
63 "__mya__",
64 "__nld__",
65 "__nno__",
66 "__nob__",
67 "__npi__",
68 "__nya__",
69 "__ory__",
70 "__pan__",
71 "__pbt__",
72 "__pes__",
73 "__pol__",
74 "__por__",
75 "__ron__",
76 "__rus__",
77 "__sat__",
78 "__slk__",
79 "__slv__",
80 "__sna__",
81 "__snd__",
82 "__som__",
83 "__spa__",
84 "__srp__",
85 "__swe__",
86 "__swh__",
87 "__tam__",
88 "__tel__",
89 "__tgk__",
90 "__tgl__",
91 "__tha__",
92 "__tur__",
93 "__ukr__",
94 "__urd__",
95 "__uzn__",
96 "__vie__",
97 "__yor__",
98 "__yue__",
99 "__zlm__",
100 "__zul__"
101 ],
102 "bos_token": {
103 "content": "<s>",
104 "lstrip": false,
105 "normalized": false,
106 "rstrip": false,
107 "single_word": false
108 },
109 "cls_token": {
110 "content": "<s>",
111 "lstrip": false,
112 "normalized": false,
113 "rstrip": false,
114 "single_word": false
115 },
116 "eos_token": {
117 "content": "</s>",
118 "lstrip": false,
119 "normalized": false,
120 "rstrip": false,
121 "single_word": false
122 },
123 "pad_token": {
124 "content": "<pad>",
125 "lstrip": false,
126 "normalized": false,
127 "rstrip": false,
128 "single_word": false
129 },
130 "sep_token": {
131 "content": "</s>",
132 "lstrip": false,
133 "normalized": false,
134 "rstrip": false,
135 "single_word": false
136 },
137 "unk_token": {
138 "content": "<unk>",
139 "lstrip": false,
140 "normalized": false,
141 "rstrip": false,
142 "single_word": false
143 }
144 }
145