config.json
6.5 KB · 368 lines · json Raw
1 {
2 "_name_or_path": "mms-lid-256",
3 "activation_dropout": 0.05,
4 "adapter_attn_dim": 16,
5 "adapter_kernel_size": 3,
6 "adapter_stride": 2,
7 "add_adapter": false,
8 "apply_spec_augment": true,
9 "architectures": [
10 "Wav2Vec2ForSequenceClassification"
11 ],
12 "attention_dropout": 0.05,
13 "bos_token_id": 1,
14 "classifier_proj_size": 1024,
15 "codevector_dim": 1024,
16 "contrastive_logits_temperature": 0.1,
17 "conv_bias": true,
18 "conv_dim": [
19 512,
20 512,
21 512,
22 512,
23 512,
24 512,
25 512
26 ],
27 "conv_kernel": [
28 10,
29 3,
30 3,
31 3,
32 3,
33 2,
34 2
35 ],
36 "conv_stride": [
37 5,
38 2,
39 2,
40 2,
41 2,
42 2,
43 2
44 ],
45 "ctc_loss_reduction": "mean",
46 "ctc_zero_infinity": false,
47 "diversity_loss_weight": 0.1,
48 "do_stable_layer_norm": true,
49 "eos_token_id": 2,
50 "feat_extract_activation": "gelu",
51 "feat_extract_dropout": 0.0,
52 "feat_extract_norm": "layer",
53 "feat_proj_dropout": 0.05,
54 "feat_quantizer_dropout": 0.0,
55 "final_dropout": 0.05,
56 "hidden_act": "gelu",
57 "hidden_dropout": 0.05,
58 "hidden_size": 1280,
59 "id2label": {
60 "0": "ara",
61 "1": "cmn",
62 "2": "eng",
63 "3": "spa",
64 "4": "fra",
65 "5": "mlg",
66 "6": "swe",
67 "7": "por",
68 "8": "vie",
69 "9": "ful",
70 "10": "sun",
71 "11": "asm",
72 "12": "ben",
73 "13": "zlm",
74 "14": "kor",
75 "15": "ind",
76 "16": "hin",
77 "17": "tuk",
78 "18": "urd",
79 "19": "aze",
80 "20": "slv",
81 "21": "mon",
82 "22": "hau",
83 "23": "tel",
84 "24": "swh",
85 "25": "bod",
86 "26": "rus",
87 "27": "tur",
88 "28": "heb",
89 "29": "mar",
90 "30": "som",
91 "31": "tgl",
92 "32": "tat",
93 "33": "tha",
94 "34": "cat",
95 "35": "ron",
96 "36": "mal",
97 "37": "bel",
98 "38": "pol",
99 "39": "yor",
100 "40": "nld",
101 "41": "bul",
102 "42": "hat",
103 "43": "afr",
104 "44": "isl",
105 "45": "amh",
106 "46": "tam",
107 "47": "hun",
108 "48": "hrv",
109 "49": "lit",
110 "50": "cym",
111 "51": "fas",
112 "52": "mkd",
113 "53": "ell",
114 "54": "bos",
115 "55": "deu",
116 "56": "sqi",
117 "57": "jav",
118 "58": "kmr",
119 "59": "nob",
120 "60": "uzb",
121 "61": "snd",
122 "62": "lat",
123 "63": "nya",
124 "64": "grn",
125 "65": "mya",
126 "66": "orm",
127 "67": "lin",
128 "68": "hye",
129 "69": "yue",
130 "70": "pan",
131 "71": "jpn",
132 "72": "kaz",
133 "73": "npi",
134 "74": "kik",
135 "75": "kat",
136 "76": "guj",
137 "77": "kan",
138 "78": "tgk",
139 "79": "ukr",
140 "80": "ces",
141 "81": "lav",
142 "82": "bak",
143 "83": "khm",
144 "84": "fao",
145 "85": "glg",
146 "86": "ltz",
147 "87": "xog",
148 "88": "lao",
149 "89": "mlt",
150 "90": "sin",
151 "91": "aka",
152 "92": "sna",
153 "93": "ita",
154 "94": "srp",
155 "95": "mri",
156 "96": "nno",
157 "97": "pus",
158 "98": "eus",
159 "99": "ory",
160 "100": "lug",
161 "101": "bre",
162 "102": "luo",
163 "103": "slk",
164 "104": "ewe",
165 "105": "fin",
166 "106": "rif",
167 "107": "dan",
168 "108": "yid",
169 "109": "yao",
170 "110": "mos",
171 "111": "hne",
172 "112": "est",
173 "113": "dyu",
174 "114": "bam",
175 "115": "uig",
176 "116": "sck",
177 "117": "tso",
178 "118": "mup",
179 "119": "ctg",
180 "120": "ceb",
181 "121": "war",
182 "122": "bbc",
183 "123": "vmw",
184 "124": "sid",
185 "125": "tpi",
186 "126": "mag",
187 "127": "san",
188 "128": "kri",
189 "129": "lon",
190 "130": "kir",
191 "131": "run",
192 "132": "ubl",
193 "133": "kin",
194 "134": "rkt",
195 "135": "xmm",
196 "136": "tir",
197 "137": "mai",
198 "138": "nan",
199 "139": "nyn",
200 "140": "bcc",
201 "141": "hak",
202 "142": "suk",
203 "143": "bem",
204 "144": "rmy",
205 "145": "awa",
206 "146": "pcm",
207 "147": "bgc",
208 "148": "shn",
209 "149": "oci",
210 "150": "wol",
211 "151": "bci",
212 "152": "kab",
213 "153": "ilo",
214 "154": "bcl",
215 "155": "haw",
216 "156": "mad",
217 "157": "nod",
218 "158": "sag",
219 "159": "sas",
220 "160": "jam",
221 "161": "mey",
222 "162": "shi",
223 "163": "hil",
224 "164": "ace",
225 "165": "kam",
226 "166": "min",
227 "167": "umb",
228 "168": "hno",
229 "169": "ban",
230 "170": "syl",
231 "171": "bxg",
232 "172": "xho",
233 "173": "mww",
234 "174": "epo",
235 "175": "tzm",
236 "176": "zul",
237 "177": "ibo",
238 "178": "abk",
239 "179": "guz",
240 "180": "ckb",
241 "181": "knc",
242 "182": "nso",
243 "183": "bho",
244 "184": "dje",
245 "185": "tiv",
246 "186": "gle",
247 "187": "lua",
248 "188": "skr",
249 "189": "bto",
250 "190": "kea",
251 "191": "glk",
252 "192": "ast",
253 "193": "sat",
254 "194": "ktu",
255 "195": "bhb",
256 "196": "emk",
257 "197": "kng",
258 "198": "kmb",
259 "199": "tsn",
260 "200": "gom",
261 "201": "ven",
262 "202": "sco",
263 "203": "glv",
264 "204": "sot",
265 "205": "sou",
266 "206": "gno",
267 "207": "nde",
268 "208": "bjn",
269 "209": "ina",
270 "210": "fmu",
271 "211": "esg",
272 "212": "wes",
273 "213": "pnb",
274 "214": "phr",
275 "215": "mui",
276 "216": "bug",
277 "217": "mrr",
278 "218": "kas",
279 "219": "lir",
280 "220": "vah",
281 "221": "ssw",
282 "222": "rwr",
283 "223": "pcc",
284 "224": "hms",
285 "225": "wbr",
286 "226": "swv",
287 "227": "mtr",
288 "228": "haz",
289 "229": "aii",
290 "230": "bns",
291 "231": "msi",
292 "232": "wuu",
293 "233": "hsn",
294 "234": "bgp",
295 "235": "tts",
296 "236": "lmn",
297 "237": "dcc",
298 "238": "bew",
299 "239": "bjj",
300 "240": "ibb",
301 "241": "tji",
302 "242": "hoj",
303 "243": "cpx",
304 "244": "cdo",
305 "245": "daq",
306 "246": "mut",
307 "247": "nap",
308 "248": "czh",
309 "249": "gdx",
310 "250": "sdh",
311 "251": "scn",
312 "252": "mnp",
313 "253": "bar",
314 "254": "mzn",
315 "255": "gsw"
316 },
317 "initializer_range": 0.02,
318 "intermediate_size": 5120,
319 "label2id": null,
320 "layer_norm_eps": 1e-05,
321 "layerdrop": 0.05,
322 "mask_feature_length": 10,
323 "mask_feature_min_masks": 0,
324 "mask_feature_prob": 0.0,
325 "mask_time_length": 10,
326 "mask_time_min_masks": 2,
327 "mask_time_prob": 0.05,
328 "model_type": "wav2vec2",
329 "num_adapter_layers": 3,
330 "num_attention_heads": 16,
331 "num_codevector_groups": 2,
332 "num_codevectors_per_group": 320,
333 "num_conv_pos_embedding_groups": 16,
334 "num_conv_pos_embeddings": 128,
335 "num_feat_extract_layers": 7,
336 "num_hidden_layers": 48,
337 "num_negatives": 100,
338 "output_hidden_size": 1280,
339 "pad_token_id": 0,
340 "proj_codevector_dim": 1024,
341 "tdnn_dilation": [
342 1,
343 2,
344 3,
345 1,
346 1
347 ],
348 "tdnn_dim": [
349 512,
350 512,
351 512,
352 512,
353 1500
354 ],
355 "tdnn_kernel": [
356 5,
357 3,
358 3,
359 1,
360 1
361 ],
362 "torch_dtype": "float32",
363 "transformers_version": "4.31.0.dev0",
364 "use_weighted_layer_sum": false,
365 "vocab_size": 154,
366 "xvector_output_dim": 512
367 }
368