hf_quant_config.json
34.3 KB · 1051 lines · json Raw
1 {
2 "producer": {
3 "name": "modelopt",
4 "version": "0.44.0"
5 },
6 "quantization": {
7 "quant_algo": "MIXED_PRECISION",
8 "kv_cache_quant_algo": "FP8",
9 "quantized_layers": {
10 "model.language_model.layers.0.linear_attn.out_proj": {
11 "quant_algo": "FP8"
12 },
13 "model.language_model.layers.0.linear_attn.in_proj_qkv": {
14 "quant_algo": "FP8"
15 },
16 "model.language_model.layers.0.linear_attn.in_proj_z": {
17 "quant_algo": "FP8"
18 },
19 "model.language_model.layers.0.mlp.experts": {
20 "quant_algo": "W4A16_NVFP4",
21 "group_size": 16
22 },
23 "model.language_model.layers.0.mlp.shared_expert.gate_proj": {
24 "quant_algo": "W4A16_NVFP4",
25 "group_size": 16
26 },
27 "model.language_model.layers.0.mlp.shared_expert.up_proj": {
28 "quant_algo": "W4A16_NVFP4",
29 "group_size": 16
30 },
31 "model.language_model.layers.0.mlp.shared_expert.down_proj": {
32 "quant_algo": "W4A16_NVFP4",
33 "group_size": 16
34 },
35 "model.language_model.layers.1.linear_attn.out_proj": {
36 "quant_algo": "FP8"
37 },
38 "model.language_model.layers.1.linear_attn.in_proj_qkv": {
39 "quant_algo": "FP8"
40 },
41 "model.language_model.layers.1.linear_attn.in_proj_z": {
42 "quant_algo": "FP8"
43 },
44 "model.language_model.layers.1.mlp.experts": {
45 "quant_algo": "W4A16_NVFP4",
46 "group_size": 16
47 },
48 "model.language_model.layers.1.mlp.shared_expert.gate_proj": {
49 "quant_algo": "W4A16_NVFP4",
50 "group_size": 16
51 },
52 "model.language_model.layers.1.mlp.shared_expert.up_proj": {
53 "quant_algo": "W4A16_NVFP4",
54 "group_size": 16
55 },
56 "model.language_model.layers.1.mlp.shared_expert.down_proj": {
57 "quant_algo": "W4A16_NVFP4",
58 "group_size": 16
59 },
60 "model.language_model.layers.2.linear_attn.out_proj": {
61 "quant_algo": "FP8"
62 },
63 "model.language_model.layers.2.linear_attn.in_proj_qkv": {
64 "quant_algo": "FP8"
65 },
66 "model.language_model.layers.2.linear_attn.in_proj_z": {
67 "quant_algo": "FP8"
68 },
69 "model.language_model.layers.2.mlp.experts": {
70 "quant_algo": "W4A16_NVFP4",
71 "group_size": 16
72 },
73 "model.language_model.layers.2.mlp.shared_expert.gate_proj": {
74 "quant_algo": "W4A16_NVFP4",
75 "group_size": 16
76 },
77 "model.language_model.layers.2.mlp.shared_expert.up_proj": {
78 "quant_algo": "W4A16_NVFP4",
79 "group_size": 16
80 },
81 "model.language_model.layers.2.mlp.shared_expert.down_proj": {
82 "quant_algo": "W4A16_NVFP4",
83 "group_size": 16
84 },
85 "model.language_model.layers.3.self_attn.q_proj": {
86 "quant_algo": "FP8"
87 },
88 "model.language_model.layers.3.self_attn.k_proj": {
89 "quant_algo": "FP8"
90 },
91 "model.language_model.layers.3.self_attn.v_proj": {
92 "quant_algo": "FP8"
93 },
94 "model.language_model.layers.3.self_attn.o_proj": {
95 "quant_algo": "FP8"
96 },
97 "model.language_model.layers.3.mlp.experts": {
98 "quant_algo": "W4A16_NVFP4",
99 "group_size": 16
100 },
101 "model.language_model.layers.3.mlp.shared_expert.gate_proj": {
102 "quant_algo": "W4A16_NVFP4",
103 "group_size": 16
104 },
105 "model.language_model.layers.3.mlp.shared_expert.up_proj": {
106 "quant_algo": "W4A16_NVFP4",
107 "group_size": 16
108 },
109 "model.language_model.layers.3.mlp.shared_expert.down_proj": {
110 "quant_algo": "W4A16_NVFP4",
111 "group_size": 16
112 },
113 "model.language_model.layers.4.linear_attn.out_proj": {
114 "quant_algo": "FP8"
115 },
116 "model.language_model.layers.4.linear_attn.in_proj_qkv": {
117 "quant_algo": "FP8"
118 },
119 "model.language_model.layers.4.linear_attn.in_proj_z": {
120 "quant_algo": "FP8"
121 },
122 "model.language_model.layers.4.mlp.experts": {
123 "quant_algo": "W4A16_NVFP4",
124 "group_size": 16
125 },
126 "model.language_model.layers.4.mlp.shared_expert.gate_proj": {
127 "quant_algo": "W4A16_NVFP4",
128 "group_size": 16
129 },
130 "model.language_model.layers.4.mlp.shared_expert.up_proj": {
131 "quant_algo": "W4A16_NVFP4",
132 "group_size": 16
133 },
134 "model.language_model.layers.4.mlp.shared_expert.down_proj": {
135 "quant_algo": "W4A16_NVFP4",
136 "group_size": 16
137 },
138 "model.language_model.layers.5.linear_attn.out_proj": {
139 "quant_algo": "FP8"
140 },
141 "model.language_model.layers.5.linear_attn.in_proj_qkv": {
142 "quant_algo": "FP8"
143 },
144 "model.language_model.layers.5.linear_attn.in_proj_z": {
145 "quant_algo": "FP8"
146 },
147 "model.language_model.layers.5.mlp.experts": {
148 "quant_algo": "W4A16_NVFP4",
149 "group_size": 16
150 },
151 "model.language_model.layers.5.mlp.shared_expert.gate_proj": {
152 "quant_algo": "W4A16_NVFP4",
153 "group_size": 16
154 },
155 "model.language_model.layers.5.mlp.shared_expert.up_proj": {
156 "quant_algo": "W4A16_NVFP4",
157 "group_size": 16
158 },
159 "model.language_model.layers.5.mlp.shared_expert.down_proj": {
160 "quant_algo": "W4A16_NVFP4",
161 "group_size": 16
162 },
163 "model.language_model.layers.6.linear_attn.out_proj": {
164 "quant_algo": "FP8"
165 },
166 "model.language_model.layers.6.linear_attn.in_proj_qkv": {
167 "quant_algo": "FP8"
168 },
169 "model.language_model.layers.6.linear_attn.in_proj_z": {
170 "quant_algo": "FP8"
171 },
172 "model.language_model.layers.6.mlp.experts": {
173 "quant_algo": "W4A16_NVFP4",
174 "group_size": 16
175 },
176 "model.language_model.layers.6.mlp.shared_expert.gate_proj": {
177 "quant_algo": "W4A16_NVFP4",
178 "group_size": 16
179 },
180 "model.language_model.layers.6.mlp.shared_expert.up_proj": {
181 "quant_algo": "W4A16_NVFP4",
182 "group_size": 16
183 },
184 "model.language_model.layers.6.mlp.shared_expert.down_proj": {
185 "quant_algo": "W4A16_NVFP4",
186 "group_size": 16
187 },
188 "model.language_model.layers.7.self_attn.q_proj": {
189 "quant_algo": "FP8"
190 },
191 "model.language_model.layers.7.self_attn.k_proj": {
192 "quant_algo": "FP8"
193 },
194 "model.language_model.layers.7.self_attn.v_proj": {
195 "quant_algo": "FP8"
196 },
197 "model.language_model.layers.7.self_attn.o_proj": {
198 "quant_algo": "FP8"
199 },
200 "model.language_model.layers.7.mlp.experts": {
201 "quant_algo": "W4A16_NVFP4",
202 "group_size": 16
203 },
204 "model.language_model.layers.7.mlp.shared_expert.gate_proj": {
205 "quant_algo": "W4A16_NVFP4",
206 "group_size": 16
207 },
208 "model.language_model.layers.7.mlp.shared_expert.up_proj": {
209 "quant_algo": "W4A16_NVFP4",
210 "group_size": 16
211 },
212 "model.language_model.layers.7.mlp.shared_expert.down_proj": {
213 "quant_algo": "W4A16_NVFP4",
214 "group_size": 16
215 },
216 "model.language_model.layers.8.linear_attn.out_proj": {
217 "quant_algo": "FP8"
218 },
219 "model.language_model.layers.8.linear_attn.in_proj_qkv": {
220 "quant_algo": "FP8"
221 },
222 "model.language_model.layers.8.linear_attn.in_proj_z": {
223 "quant_algo": "FP8"
224 },
225 "model.language_model.layers.8.mlp.experts": {
226 "quant_algo": "W4A16_NVFP4",
227 "group_size": 16
228 },
229 "model.language_model.layers.8.mlp.shared_expert.gate_proj": {
230 "quant_algo": "W4A16_NVFP4",
231 "group_size": 16
232 },
233 "model.language_model.layers.8.mlp.shared_expert.up_proj": {
234 "quant_algo": "W4A16_NVFP4",
235 "group_size": 16
236 },
237 "model.language_model.layers.8.mlp.shared_expert.down_proj": {
238 "quant_algo": "W4A16_NVFP4",
239 "group_size": 16
240 },
241 "model.language_model.layers.9.linear_attn.out_proj": {
242 "quant_algo": "FP8"
243 },
244 "model.language_model.layers.9.linear_attn.in_proj_qkv": {
245 "quant_algo": "FP8"
246 },
247 "model.language_model.layers.9.linear_attn.in_proj_z": {
248 "quant_algo": "FP8"
249 },
250 "model.language_model.layers.9.mlp.experts": {
251 "quant_algo": "W4A16_NVFP4",
252 "group_size": 16
253 },
254 "model.language_model.layers.9.mlp.shared_expert.gate_proj": {
255 "quant_algo": "W4A16_NVFP4",
256 "group_size": 16
257 },
258 "model.language_model.layers.9.mlp.shared_expert.up_proj": {
259 "quant_algo": "W4A16_NVFP4",
260 "group_size": 16
261 },
262 "model.language_model.layers.9.mlp.shared_expert.down_proj": {
263 "quant_algo": "W4A16_NVFP4",
264 "group_size": 16
265 },
266 "model.language_model.layers.10.linear_attn.out_proj": {
267 "quant_algo": "FP8"
268 },
269 "model.language_model.layers.10.linear_attn.in_proj_qkv": {
270 "quant_algo": "FP8"
271 },
272 "model.language_model.layers.10.linear_attn.in_proj_z": {
273 "quant_algo": "FP8"
274 },
275 "model.language_model.layers.10.mlp.experts": {
276 "quant_algo": "W4A16_NVFP4",
277 "group_size": 16
278 },
279 "model.language_model.layers.10.mlp.shared_expert.gate_proj": {
280 "quant_algo": "W4A16_NVFP4",
281 "group_size": 16
282 },
283 "model.language_model.layers.10.mlp.shared_expert.up_proj": {
284 "quant_algo": "W4A16_NVFP4",
285 "group_size": 16
286 },
287 "model.language_model.layers.10.mlp.shared_expert.down_proj": {
288 "quant_algo": "W4A16_NVFP4",
289 "group_size": 16
290 },
291 "model.language_model.layers.11.self_attn.q_proj": {
292 "quant_algo": "FP8"
293 },
294 "model.language_model.layers.11.self_attn.k_proj": {
295 "quant_algo": "FP8"
296 },
297 "model.language_model.layers.11.self_attn.v_proj": {
298 "quant_algo": "FP8"
299 },
300 "model.language_model.layers.11.self_attn.o_proj": {
301 "quant_algo": "FP8"
302 },
303 "model.language_model.layers.11.mlp.experts": {
304 "quant_algo": "W4A16_NVFP4",
305 "group_size": 16
306 },
307 "model.language_model.layers.11.mlp.shared_expert.gate_proj": {
308 "quant_algo": "W4A16_NVFP4",
309 "group_size": 16
310 },
311 "model.language_model.layers.11.mlp.shared_expert.up_proj": {
312 "quant_algo": "W4A16_NVFP4",
313 "group_size": 16
314 },
315 "model.language_model.layers.11.mlp.shared_expert.down_proj": {
316 "quant_algo": "W4A16_NVFP4",
317 "group_size": 16
318 },
319 "model.language_model.layers.12.linear_attn.out_proj": {
320 "quant_algo": "FP8"
321 },
322 "model.language_model.layers.12.linear_attn.in_proj_qkv": {
323 "quant_algo": "FP8"
324 },
325 "model.language_model.layers.12.linear_attn.in_proj_z": {
326 "quant_algo": "FP8"
327 },
328 "model.language_model.layers.12.mlp.experts": {
329 "quant_algo": "W4A16_NVFP4",
330 "group_size": 16
331 },
332 "model.language_model.layers.12.mlp.shared_expert.gate_proj": {
333 "quant_algo": "W4A16_NVFP4",
334 "group_size": 16
335 },
336 "model.language_model.layers.12.mlp.shared_expert.up_proj": {
337 "quant_algo": "W4A16_NVFP4",
338 "group_size": 16
339 },
340 "model.language_model.layers.12.mlp.shared_expert.down_proj": {
341 "quant_algo": "W4A16_NVFP4",
342 "group_size": 16
343 },
344 "model.language_model.layers.13.linear_attn.out_proj": {
345 "quant_algo": "FP8"
346 },
347 "model.language_model.layers.13.linear_attn.in_proj_qkv": {
348 "quant_algo": "FP8"
349 },
350 "model.language_model.layers.13.linear_attn.in_proj_z": {
351 "quant_algo": "FP8"
352 },
353 "model.language_model.layers.13.mlp.experts": {
354 "quant_algo": "W4A16_NVFP4",
355 "group_size": 16
356 },
357 "model.language_model.layers.13.mlp.shared_expert.gate_proj": {
358 "quant_algo": "W4A16_NVFP4",
359 "group_size": 16
360 },
361 "model.language_model.layers.13.mlp.shared_expert.up_proj": {
362 "quant_algo": "W4A16_NVFP4",
363 "group_size": 16
364 },
365 "model.language_model.layers.13.mlp.shared_expert.down_proj": {
366 "quant_algo": "W4A16_NVFP4",
367 "group_size": 16
368 },
369 "model.language_model.layers.14.linear_attn.out_proj": {
370 "quant_algo": "FP8"
371 },
372 "model.language_model.layers.14.linear_attn.in_proj_qkv": {
373 "quant_algo": "FP8"
374 },
375 "model.language_model.layers.14.linear_attn.in_proj_z": {
376 "quant_algo": "FP8"
377 },
378 "model.language_model.layers.14.mlp.experts": {
379 "quant_algo": "W4A16_NVFP4",
380 "group_size": 16
381 },
382 "model.language_model.layers.14.mlp.shared_expert.gate_proj": {
383 "quant_algo": "W4A16_NVFP4",
384 "group_size": 16
385 },
386 "model.language_model.layers.14.mlp.shared_expert.up_proj": {
387 "quant_algo": "W4A16_NVFP4",
388 "group_size": 16
389 },
390 "model.language_model.layers.14.mlp.shared_expert.down_proj": {
391 "quant_algo": "W4A16_NVFP4",
392 "group_size": 16
393 },
394 "model.language_model.layers.15.self_attn.q_proj": {
395 "quant_algo": "FP8"
396 },
397 "model.language_model.layers.15.self_attn.k_proj": {
398 "quant_algo": "FP8"
399 },
400 "model.language_model.layers.15.self_attn.v_proj": {
401 "quant_algo": "FP8"
402 },
403 "model.language_model.layers.15.self_attn.o_proj": {
404 "quant_algo": "FP8"
405 },
406 "model.language_model.layers.15.mlp.experts": {
407 "quant_algo": "W4A16_NVFP4",
408 "group_size": 16
409 },
410 "model.language_model.layers.15.mlp.shared_expert.gate_proj": {
411 "quant_algo": "W4A16_NVFP4",
412 "group_size": 16
413 },
414 "model.language_model.layers.15.mlp.shared_expert.up_proj": {
415 "quant_algo": "W4A16_NVFP4",
416 "group_size": 16
417 },
418 "model.language_model.layers.15.mlp.shared_expert.down_proj": {
419 "quant_algo": "W4A16_NVFP4",
420 "group_size": 16
421 },
422 "model.language_model.layers.16.linear_attn.out_proj": {
423 "quant_algo": "FP8"
424 },
425 "model.language_model.layers.16.linear_attn.in_proj_qkv": {
426 "quant_algo": "FP8"
427 },
428 "model.language_model.layers.16.linear_attn.in_proj_z": {
429 "quant_algo": "FP8"
430 },
431 "model.language_model.layers.16.mlp.experts": {
432 "quant_algo": "W4A16_NVFP4",
433 "group_size": 16
434 },
435 "model.language_model.layers.16.mlp.shared_expert.gate_proj": {
436 "quant_algo": "W4A16_NVFP4",
437 "group_size": 16
438 },
439 "model.language_model.layers.16.mlp.shared_expert.up_proj": {
440 "quant_algo": "W4A16_NVFP4",
441 "group_size": 16
442 },
443 "model.language_model.layers.16.mlp.shared_expert.down_proj": {
444 "quant_algo": "W4A16_NVFP4",
445 "group_size": 16
446 },
447 "model.language_model.layers.17.linear_attn.out_proj": {
448 "quant_algo": "FP8"
449 },
450 "model.language_model.layers.17.linear_attn.in_proj_qkv": {
451 "quant_algo": "FP8"
452 },
453 "model.language_model.layers.17.linear_attn.in_proj_z": {
454 "quant_algo": "FP8"
455 },
456 "model.language_model.layers.17.mlp.experts": {
457 "quant_algo": "W4A16_NVFP4",
458 "group_size": 16
459 },
460 "model.language_model.layers.17.mlp.shared_expert.gate_proj": {
461 "quant_algo": "W4A16_NVFP4",
462 "group_size": 16
463 },
464 "model.language_model.layers.17.mlp.shared_expert.up_proj": {
465 "quant_algo": "W4A16_NVFP4",
466 "group_size": 16
467 },
468 "model.language_model.layers.17.mlp.shared_expert.down_proj": {
469 "quant_algo": "W4A16_NVFP4",
470 "group_size": 16
471 },
472 "model.language_model.layers.18.linear_attn.out_proj": {
473 "quant_algo": "FP8"
474 },
475 "model.language_model.layers.18.linear_attn.in_proj_qkv": {
476 "quant_algo": "FP8"
477 },
478 "model.language_model.layers.18.linear_attn.in_proj_z": {
479 "quant_algo": "FP8"
480 },
481 "model.language_model.layers.18.mlp.experts": {
482 "quant_algo": "W4A16_NVFP4",
483 "group_size": 16
484 },
485 "model.language_model.layers.18.mlp.shared_expert.gate_proj": {
486 "quant_algo": "W4A16_NVFP4",
487 "group_size": 16
488 },
489 "model.language_model.layers.18.mlp.shared_expert.up_proj": {
490 "quant_algo": "W4A16_NVFP4",
491 "group_size": 16
492 },
493 "model.language_model.layers.18.mlp.shared_expert.down_proj": {
494 "quant_algo": "W4A16_NVFP4",
495 "group_size": 16
496 },
497 "model.language_model.layers.19.self_attn.q_proj": {
498 "quant_algo": "FP8"
499 },
500 "model.language_model.layers.19.self_attn.k_proj": {
501 "quant_algo": "FP8"
502 },
503 "model.language_model.layers.19.self_attn.v_proj": {
504 "quant_algo": "FP8"
505 },
506 "model.language_model.layers.19.self_attn.o_proj": {
507 "quant_algo": "FP8"
508 },
509 "model.language_model.layers.19.mlp.experts": {
510 "quant_algo": "W4A16_NVFP4",
511 "group_size": 16
512 },
513 "model.language_model.layers.19.mlp.shared_expert.gate_proj": {
514 "quant_algo": "W4A16_NVFP4",
515 "group_size": 16
516 },
517 "model.language_model.layers.19.mlp.shared_expert.up_proj": {
518 "quant_algo": "W4A16_NVFP4",
519 "group_size": 16
520 },
521 "model.language_model.layers.19.mlp.shared_expert.down_proj": {
522 "quant_algo": "W4A16_NVFP4",
523 "group_size": 16
524 },
525 "model.language_model.layers.20.linear_attn.out_proj": {
526 "quant_algo": "FP8"
527 },
528 "model.language_model.layers.20.linear_attn.in_proj_qkv": {
529 "quant_algo": "FP8"
530 },
531 "model.language_model.layers.20.linear_attn.in_proj_z": {
532 "quant_algo": "FP8"
533 },
534 "model.language_model.layers.20.mlp.experts": {
535 "quant_algo": "W4A16_NVFP4",
536 "group_size": 16
537 },
538 "model.language_model.layers.20.mlp.shared_expert.gate_proj": {
539 "quant_algo": "W4A16_NVFP4",
540 "group_size": 16
541 },
542 "model.language_model.layers.20.mlp.shared_expert.up_proj": {
543 "quant_algo": "W4A16_NVFP4",
544 "group_size": 16
545 },
546 "model.language_model.layers.20.mlp.shared_expert.down_proj": {
547 "quant_algo": "W4A16_NVFP4",
548 "group_size": 16
549 },
550 "model.language_model.layers.21.linear_attn.out_proj": {
551 "quant_algo": "FP8"
552 },
553 "model.language_model.layers.21.linear_attn.in_proj_qkv": {
554 "quant_algo": "FP8"
555 },
556 "model.language_model.layers.21.linear_attn.in_proj_z": {
557 "quant_algo": "FP8"
558 },
559 "model.language_model.layers.21.mlp.experts": {
560 "quant_algo": "W4A16_NVFP4",
561 "group_size": 16
562 },
563 "model.language_model.layers.21.mlp.shared_expert.gate_proj": {
564 "quant_algo": "W4A16_NVFP4",
565 "group_size": 16
566 },
567 "model.language_model.layers.21.mlp.shared_expert.up_proj": {
568 "quant_algo": "W4A16_NVFP4",
569 "group_size": 16
570 },
571 "model.language_model.layers.21.mlp.shared_expert.down_proj": {
572 "quant_algo": "W4A16_NVFP4",
573 "group_size": 16
574 },
575 "model.language_model.layers.22.linear_attn.out_proj": {
576 "quant_algo": "FP8"
577 },
578 "model.language_model.layers.22.linear_attn.in_proj_qkv": {
579 "quant_algo": "FP8"
580 },
581 "model.language_model.layers.22.linear_attn.in_proj_z": {
582 "quant_algo": "FP8"
583 },
584 "model.language_model.layers.22.mlp.experts": {
585 "quant_algo": "W4A16_NVFP4",
586 "group_size": 16
587 },
588 "model.language_model.layers.22.mlp.shared_expert.gate_proj": {
589 "quant_algo": "W4A16_NVFP4",
590 "group_size": 16
591 },
592 "model.language_model.layers.22.mlp.shared_expert.up_proj": {
593 "quant_algo": "W4A16_NVFP4",
594 "group_size": 16
595 },
596 "model.language_model.layers.22.mlp.shared_expert.down_proj": {
597 "quant_algo": "W4A16_NVFP4",
598 "group_size": 16
599 },
600 "model.language_model.layers.23.self_attn.q_proj": {
601 "quant_algo": "FP8"
602 },
603 "model.language_model.layers.23.self_attn.k_proj": {
604 "quant_algo": "FP8"
605 },
606 "model.language_model.layers.23.self_attn.v_proj": {
607 "quant_algo": "FP8"
608 },
609 "model.language_model.layers.23.self_attn.o_proj": {
610 "quant_algo": "FP8"
611 },
612 "model.language_model.layers.23.mlp.experts": {
613 "quant_algo": "W4A16_NVFP4",
614 "group_size": 16
615 },
616 "model.language_model.layers.23.mlp.shared_expert.gate_proj": {
617 "quant_algo": "W4A16_NVFP4",
618 "group_size": 16
619 },
620 "model.language_model.layers.23.mlp.shared_expert.up_proj": {
621 "quant_algo": "W4A16_NVFP4",
622 "group_size": 16
623 },
624 "model.language_model.layers.23.mlp.shared_expert.down_proj": {
625 "quant_algo": "W4A16_NVFP4",
626 "group_size": 16
627 },
628 "model.language_model.layers.24.linear_attn.out_proj": {
629 "quant_algo": "FP8"
630 },
631 "model.language_model.layers.24.linear_attn.in_proj_qkv": {
632 "quant_algo": "FP8"
633 },
634 "model.language_model.layers.24.linear_attn.in_proj_z": {
635 "quant_algo": "FP8"
636 },
637 "model.language_model.layers.24.mlp.experts": {
638 "quant_algo": "W4A16_NVFP4",
639 "group_size": 16
640 },
641 "model.language_model.layers.24.mlp.shared_expert.gate_proj": {
642 "quant_algo": "W4A16_NVFP4",
643 "group_size": 16
644 },
645 "model.language_model.layers.24.mlp.shared_expert.up_proj": {
646 "quant_algo": "W4A16_NVFP4",
647 "group_size": 16
648 },
649 "model.language_model.layers.24.mlp.shared_expert.down_proj": {
650 "quant_algo": "W4A16_NVFP4",
651 "group_size": 16
652 },
653 "model.language_model.layers.25.linear_attn.out_proj": {
654 "quant_algo": "FP8"
655 },
656 "model.language_model.layers.25.linear_attn.in_proj_qkv": {
657 "quant_algo": "FP8"
658 },
659 "model.language_model.layers.25.linear_attn.in_proj_z": {
660 "quant_algo": "FP8"
661 },
662 "model.language_model.layers.25.mlp.experts": {
663 "quant_algo": "W4A16_NVFP4",
664 "group_size": 16
665 },
666 "model.language_model.layers.25.mlp.shared_expert.gate_proj": {
667 "quant_algo": "W4A16_NVFP4",
668 "group_size": 16
669 },
670 "model.language_model.layers.25.mlp.shared_expert.up_proj": {
671 "quant_algo": "W4A16_NVFP4",
672 "group_size": 16
673 },
674 "model.language_model.layers.25.mlp.shared_expert.down_proj": {
675 "quant_algo": "W4A16_NVFP4",
676 "group_size": 16
677 },
678 "model.language_model.layers.26.linear_attn.out_proj": {
679 "quant_algo": "FP8"
680 },
681 "model.language_model.layers.26.linear_attn.in_proj_qkv": {
682 "quant_algo": "FP8"
683 },
684 "model.language_model.layers.26.linear_attn.in_proj_z": {
685 "quant_algo": "FP8"
686 },
687 "model.language_model.layers.26.mlp.experts": {
688 "quant_algo": "W4A16_NVFP4",
689 "group_size": 16
690 },
691 "model.language_model.layers.26.mlp.shared_expert.gate_proj": {
692 "quant_algo": "W4A16_NVFP4",
693 "group_size": 16
694 },
695 "model.language_model.layers.26.mlp.shared_expert.up_proj": {
696 "quant_algo": "W4A16_NVFP4",
697 "group_size": 16
698 },
699 "model.language_model.layers.26.mlp.shared_expert.down_proj": {
700 "quant_algo": "W4A16_NVFP4",
701 "group_size": 16
702 },
703 "model.language_model.layers.27.self_attn.q_proj": {
704 "quant_algo": "FP8"
705 },
706 "model.language_model.layers.27.self_attn.k_proj": {
707 "quant_algo": "FP8"
708 },
709 "model.language_model.layers.27.self_attn.v_proj": {
710 "quant_algo": "FP8"
711 },
712 "model.language_model.layers.27.self_attn.o_proj": {
713 "quant_algo": "FP8"
714 },
715 "model.language_model.layers.27.mlp.experts": {
716 "quant_algo": "W4A16_NVFP4",
717 "group_size": 16
718 },
719 "model.language_model.layers.27.mlp.shared_expert.gate_proj": {
720 "quant_algo": "W4A16_NVFP4",
721 "group_size": 16
722 },
723 "model.language_model.layers.27.mlp.shared_expert.up_proj": {
724 "quant_algo": "W4A16_NVFP4",
725 "group_size": 16
726 },
727 "model.language_model.layers.27.mlp.shared_expert.down_proj": {
728 "quant_algo": "W4A16_NVFP4",
729 "group_size": 16
730 },
731 "model.language_model.layers.28.linear_attn.out_proj": {
732 "quant_algo": "FP8"
733 },
734 "model.language_model.layers.28.linear_attn.in_proj_qkv": {
735 "quant_algo": "FP8"
736 },
737 "model.language_model.layers.28.linear_attn.in_proj_z": {
738 "quant_algo": "FP8"
739 },
740 "model.language_model.layers.28.mlp.experts": {
741 "quant_algo": "W4A16_NVFP4",
742 "group_size": 16
743 },
744 "model.language_model.layers.28.mlp.shared_expert.gate_proj": {
745 "quant_algo": "W4A16_NVFP4",
746 "group_size": 16
747 },
748 "model.language_model.layers.28.mlp.shared_expert.up_proj": {
749 "quant_algo": "W4A16_NVFP4",
750 "group_size": 16
751 },
752 "model.language_model.layers.28.mlp.shared_expert.down_proj": {
753 "quant_algo": "W4A16_NVFP4",
754 "group_size": 16
755 },
756 "model.language_model.layers.29.linear_attn.out_proj": {
757 "quant_algo": "FP8"
758 },
759 "model.language_model.layers.29.linear_attn.in_proj_qkv": {
760 "quant_algo": "FP8"
761 },
762 "model.language_model.layers.29.linear_attn.in_proj_z": {
763 "quant_algo": "FP8"
764 },
765 "model.language_model.layers.29.mlp.experts": {
766 "quant_algo": "W4A16_NVFP4",
767 "group_size": 16
768 },
769 "model.language_model.layers.29.mlp.shared_expert.gate_proj": {
770 "quant_algo": "W4A16_NVFP4",
771 "group_size": 16
772 },
773 "model.language_model.layers.29.mlp.shared_expert.up_proj": {
774 "quant_algo": "W4A16_NVFP4",
775 "group_size": 16
776 },
777 "model.language_model.layers.29.mlp.shared_expert.down_proj": {
778 "quant_algo": "W4A16_NVFP4",
779 "group_size": 16
780 },
781 "model.language_model.layers.30.linear_attn.out_proj": {
782 "quant_algo": "FP8"
783 },
784 "model.language_model.layers.30.linear_attn.in_proj_qkv": {
785 "quant_algo": "FP8"
786 },
787 "model.language_model.layers.30.linear_attn.in_proj_z": {
788 "quant_algo": "FP8"
789 },
790 "model.language_model.layers.30.mlp.experts": {
791 "quant_algo": "W4A16_NVFP4",
792 "group_size": 16
793 },
794 "model.language_model.layers.30.mlp.shared_expert.gate_proj": {
795 "quant_algo": "W4A16_NVFP4",
796 "group_size": 16
797 },
798 "model.language_model.layers.30.mlp.shared_expert.up_proj": {
799 "quant_algo": "W4A16_NVFP4",
800 "group_size": 16
801 },
802 "model.language_model.layers.30.mlp.shared_expert.down_proj": {
803 "quant_algo": "W4A16_NVFP4",
804 "group_size": 16
805 },
806 "model.language_model.layers.31.self_attn.q_proj": {
807 "quant_algo": "FP8"
808 },
809 "model.language_model.layers.31.self_attn.k_proj": {
810 "quant_algo": "FP8"
811 },
812 "model.language_model.layers.31.self_attn.v_proj": {
813 "quant_algo": "FP8"
814 },
815 "model.language_model.layers.31.self_attn.o_proj": {
816 "quant_algo": "FP8"
817 },
818 "model.language_model.layers.31.mlp.experts": {
819 "quant_algo": "W4A16_NVFP4",
820 "group_size": 16
821 },
822 "model.language_model.layers.31.mlp.shared_expert.gate_proj": {
823 "quant_algo": "W4A16_NVFP4",
824 "group_size": 16
825 },
826 "model.language_model.layers.31.mlp.shared_expert.up_proj": {
827 "quant_algo": "W4A16_NVFP4",
828 "group_size": 16
829 },
830 "model.language_model.layers.31.mlp.shared_expert.down_proj": {
831 "quant_algo": "W4A16_NVFP4",
832 "group_size": 16
833 },
834 "model.language_model.layers.32.linear_attn.out_proj": {
835 "quant_algo": "FP8"
836 },
837 "model.language_model.layers.32.linear_attn.in_proj_qkv": {
838 "quant_algo": "FP8"
839 },
840 "model.language_model.layers.32.linear_attn.in_proj_z": {
841 "quant_algo": "FP8"
842 },
843 "model.language_model.layers.32.mlp.experts": {
844 "quant_algo": "W4A16_NVFP4",
845 "group_size": 16
846 },
847 "model.language_model.layers.32.mlp.shared_expert.gate_proj": {
848 "quant_algo": "W4A16_NVFP4",
849 "group_size": 16
850 },
851 "model.language_model.layers.32.mlp.shared_expert.up_proj": {
852 "quant_algo": "W4A16_NVFP4",
853 "group_size": 16
854 },
855 "model.language_model.layers.32.mlp.shared_expert.down_proj": {
856 "quant_algo": "W4A16_NVFP4",
857 "group_size": 16
858 },
859 "model.language_model.layers.33.linear_attn.out_proj": {
860 "quant_algo": "FP8"
861 },
862 "model.language_model.layers.33.linear_attn.in_proj_qkv": {
863 "quant_algo": "FP8"
864 },
865 "model.language_model.layers.33.linear_attn.in_proj_z": {
866 "quant_algo": "FP8"
867 },
868 "model.language_model.layers.33.mlp.experts": {
869 "quant_algo": "W4A16_NVFP4",
870 "group_size": 16
871 },
872 "model.language_model.layers.33.mlp.shared_expert.gate_proj": {
873 "quant_algo": "W4A16_NVFP4",
874 "group_size": 16
875 },
876 "model.language_model.layers.33.mlp.shared_expert.up_proj": {
877 "quant_algo": "W4A16_NVFP4",
878 "group_size": 16
879 },
880 "model.language_model.layers.33.mlp.shared_expert.down_proj": {
881 "quant_algo": "W4A16_NVFP4",
882 "group_size": 16
883 },
884 "model.language_model.layers.34.linear_attn.out_proj": {
885 "quant_algo": "FP8"
886 },
887 "model.language_model.layers.34.linear_attn.in_proj_qkv": {
888 "quant_algo": "FP8"
889 },
890 "model.language_model.layers.34.linear_attn.in_proj_z": {
891 "quant_algo": "FP8"
892 },
893 "model.language_model.layers.34.mlp.experts": {
894 "quant_algo": "W4A16_NVFP4",
895 "group_size": 16
896 },
897 "model.language_model.layers.34.mlp.shared_expert.gate_proj": {
898 "quant_algo": "W4A16_NVFP4",
899 "group_size": 16
900 },
901 "model.language_model.layers.34.mlp.shared_expert.up_proj": {
902 "quant_algo": "W4A16_NVFP4",
903 "group_size": 16
904 },
905 "model.language_model.layers.34.mlp.shared_expert.down_proj": {
906 "quant_algo": "W4A16_NVFP4",
907 "group_size": 16
908 },
909 "model.language_model.layers.35.self_attn.q_proj": {
910 "quant_algo": "FP8"
911 },
912 "model.language_model.layers.35.self_attn.k_proj": {
913 "quant_algo": "FP8"
914 },
915 "model.language_model.layers.35.self_attn.v_proj": {
916 "quant_algo": "FP8"
917 },
918 "model.language_model.layers.35.self_attn.o_proj": {
919 "quant_algo": "FP8"
920 },
921 "model.language_model.layers.35.mlp.experts": {
922 "quant_algo": "W4A16_NVFP4",
923 "group_size": 16
924 },
925 "model.language_model.layers.35.mlp.shared_expert.gate_proj": {
926 "quant_algo": "W4A16_NVFP4",
927 "group_size": 16
928 },
929 "model.language_model.layers.35.mlp.shared_expert.up_proj": {
930 "quant_algo": "W4A16_NVFP4",
931 "group_size": 16
932 },
933 "model.language_model.layers.35.mlp.shared_expert.down_proj": {
934 "quant_algo": "W4A16_NVFP4",
935 "group_size": 16
936 },
937 "model.language_model.layers.36.linear_attn.out_proj": {
938 "quant_algo": "FP8"
939 },
940 "model.language_model.layers.36.linear_attn.in_proj_qkv": {
941 "quant_algo": "FP8"
942 },
943 "model.language_model.layers.36.linear_attn.in_proj_z": {
944 "quant_algo": "FP8"
945 },
946 "model.language_model.layers.36.mlp.experts": {
947 "quant_algo": "W4A16_NVFP4",
948 "group_size": 16
949 },
950 "model.language_model.layers.36.mlp.shared_expert.gate_proj": {
951 "quant_algo": "W4A16_NVFP4",
952 "group_size": 16
953 },
954 "model.language_model.layers.36.mlp.shared_expert.up_proj": {
955 "quant_algo": "W4A16_NVFP4",
956 "group_size": 16
957 },
958 "model.language_model.layers.36.mlp.shared_expert.down_proj": {
959 "quant_algo": "W4A16_NVFP4",
960 "group_size": 16
961 },
962 "model.language_model.layers.37.linear_attn.out_proj": {
963 "quant_algo": "FP8"
964 },
965 "model.language_model.layers.37.linear_attn.in_proj_qkv": {
966 "quant_algo": "FP8"
967 },
968 "model.language_model.layers.37.linear_attn.in_proj_z": {
969 "quant_algo": "FP8"
970 },
971 "model.language_model.layers.37.mlp.experts": {
972 "quant_algo": "W4A16_NVFP4",
973 "group_size": 16
974 },
975 "model.language_model.layers.37.mlp.shared_expert.gate_proj": {
976 "quant_algo": "W4A16_NVFP4",
977 "group_size": 16
978 },
979 "model.language_model.layers.37.mlp.shared_expert.up_proj": {
980 "quant_algo": "W4A16_NVFP4",
981 "group_size": 16
982 },
983 "model.language_model.layers.37.mlp.shared_expert.down_proj": {
984 "quant_algo": "W4A16_NVFP4",
985 "group_size": 16
986 },
987 "model.language_model.layers.38.linear_attn.out_proj": {
988 "quant_algo": "FP8"
989 },
990 "model.language_model.layers.38.linear_attn.in_proj_qkv": {
991 "quant_algo": "FP8"
992 },
993 "model.language_model.layers.38.linear_attn.in_proj_z": {
994 "quant_algo": "FP8"
995 },
996 "model.language_model.layers.38.mlp.experts": {
997 "quant_algo": "W4A16_NVFP4",
998 "group_size": 16
999 },
1000 "model.language_model.layers.38.mlp.shared_expert.gate_proj": {
1001 "quant_algo": "W4A16_NVFP4",
1002 "group_size": 16
1003 },
1004 "model.language_model.layers.38.mlp.shared_expert.up_proj": {
1005 "quant_algo": "W4A16_NVFP4",
1006 "group_size": 16
1007 },
1008 "model.language_model.layers.38.mlp.shared_expert.down_proj": {
1009 "quant_algo": "W4A16_NVFP4",
1010 "group_size": 16
1011 },
1012 "model.language_model.layers.39.self_attn.q_proj": {
1013 "quant_algo": "FP8"
1014 },
1015 "model.language_model.layers.39.self_attn.k_proj": {
1016 "quant_algo": "FP8"
1017 },
1018 "model.language_model.layers.39.self_attn.v_proj": {
1019 "quant_algo": "FP8"
1020 },
1021 "model.language_model.layers.39.self_attn.o_proj": {
1022 "quant_algo": "FP8"
1023 },
1024 "model.language_model.layers.39.mlp.experts": {
1025 "quant_algo": "W4A16_NVFP4",
1026 "group_size": 16
1027 },
1028 "model.language_model.layers.39.mlp.shared_expert.gate_proj": {
1029 "quant_algo": "W4A16_NVFP4",
1030 "group_size": 16
1031 },
1032 "model.language_model.layers.39.mlp.shared_expert.up_proj": {
1033 "quant_algo": "W4A16_NVFP4",
1034 "group_size": 16
1035 },
1036 "model.language_model.layers.39.mlp.shared_expert.down_proj": {
1037 "quant_algo": "W4A16_NVFP4",
1038 "group_size": 16
1039 },
1040 "lm_head": {
1041 "quant_algo": "W4A16_NVFP4",
1042 "group_size": 16
1043 }
1044 },
1045 "exclude_modules": [
1046 "mtp.layers.0*",
1047 "mtp*"
1048 ]
1049 }
1050 }
1051