config.json
56.7 KB · 1493 lines · json Raw
1 {
2 "architectures": [
3 "Qwen3_5MoeForConditionalGeneration"
4 ],
5 "dtype": "bfloat16",
6 "image_token_id": 248056,
7 "model_type": "qwen3_5_moe",
8 "text_config": {
9 "attention_bias": false,
10 "attention_dropout": 0.0,
11 "attn_output_gate": true,
12 "bos_token_id": 248044,
13 "dtype": "bfloat16",
14 "eos_token_id": 248044,
15 "full_attention_interval": 4,
16 "head_dim": 256,
17 "hidden_act": "silu",
18 "hidden_size": 2048,
19 "initializer_range": 0.02,
20 "layer_types": [
21 "linear_attention",
22 "linear_attention",
23 "linear_attention",
24 "full_attention",
25 "linear_attention",
26 "linear_attention",
27 "linear_attention",
28 "full_attention",
29 "linear_attention",
30 "linear_attention",
31 "linear_attention",
32 "full_attention",
33 "linear_attention",
34 "linear_attention",
35 "linear_attention",
36 "full_attention",
37 "linear_attention",
38 "linear_attention",
39 "linear_attention",
40 "full_attention",
41 "linear_attention",
42 "linear_attention",
43 "linear_attention",
44 "full_attention",
45 "linear_attention",
46 "linear_attention",
47 "linear_attention",
48 "full_attention",
49 "linear_attention",
50 "linear_attention",
51 "linear_attention",
52 "full_attention",
53 "linear_attention",
54 "linear_attention",
55 "linear_attention",
56 "full_attention",
57 "linear_attention",
58 "linear_attention",
59 "linear_attention",
60 "full_attention"
61 ],
62 "linear_conv_kernel_dim": 4,
63 "linear_key_head_dim": 128,
64 "linear_num_key_heads": 16,
65 "linear_num_value_heads": 32,
66 "linear_value_head_dim": 128,
67 "mamba_ssm_dtype": "float32",
68 "max_position_embeddings": 262144,
69 "model_type": "qwen3_5_moe_text",
70 "moe_intermediate_size": 512,
71 "mtp_num_hidden_layers": 1,
72 "mtp_use_dedicated_embeddings": false,
73 "num_attention_heads": 16,
74 "num_experts": 256,
75 "num_experts_per_tok": 8,
76 "num_hidden_layers": 40,
77 "num_key_value_heads": 2,
78 "output_router_logits": false,
79 "pad_token_id": null,
80 "partial_rotary_factor": 0.25,
81 "rms_norm_eps": 1e-06,
82 "rope_parameters": {
83 "mrope_interleaved": true,
84 "mrope_section": [
85 11,
86 11,
87 10
88 ],
89 "partial_rotary_factor": 0.25,
90 "rope_theta": 10000000,
91 "rope_type": "default"
92 },
93 "router_aux_loss_coef": 0.001,
94 "shared_expert_intermediate_size": 512,
95 "tie_word_embeddings": false,
96 "use_cache": true,
97 "vocab_size": 248320
98 },
99 "tie_word_embeddings": false,
100 "transformers_version": "5.7.0.dev0",
101 "video_token_id": 248057,
102 "vision_config": {
103 "deepstack_visual_indexes": [],
104 "depth": 27,
105 "dtype": "bfloat16",
106 "hidden_act": "gelu_pytorch_tanh",
107 "hidden_size": 1152,
108 "in_channels": 3,
109 "initializer_range": 0.02,
110 "intermediate_size": 4304,
111 "model_type": "qwen3_5_moe_vision",
112 "num_heads": 16,
113 "num_position_embeddings": 2304,
114 "out_hidden_size": 2048,
115 "patch_size": 16,
116 "spatial_merge_size": 2,
117 "temporal_patch_size": 2
118 },
119 "vision_end_token_id": 248054,
120 "vision_start_token_id": 248053,
121 "quantization_config": {
122 "config_groups": {
123 "group_0": {
124 "input_activations": {
125 "dynamic": false,
126 "num_bits": 8,
127 "type": "float"
128 },
129 "weights": {
130 "dynamic": false,
131 "num_bits": 8,
132 "type": "float"
133 },
134 "targets": [
135 "model.language_model.layers.0.linear_attn.in_proj_qkv",
136 "model.language_model.layers.0.linear_attn.in_proj_z",
137 "model.language_model.layers.0.linear_attn.out_proj",
138 "model.language_model.layers.1.linear_attn.in_proj_qkv",
139 "model.language_model.layers.1.linear_attn.in_proj_z",
140 "model.language_model.layers.1.linear_attn.out_proj",
141 "model.language_model.layers.10.linear_attn.in_proj_qkv",
142 "model.language_model.layers.10.linear_attn.in_proj_z",
143 "model.language_model.layers.10.linear_attn.out_proj",
144 "model.language_model.layers.11.self_attn.k_proj",
145 "model.language_model.layers.11.self_attn.o_proj",
146 "model.language_model.layers.11.self_attn.q_proj",
147 "model.language_model.layers.11.self_attn.v_proj",
148 "model.language_model.layers.12.linear_attn.in_proj_qkv",
149 "model.language_model.layers.12.linear_attn.in_proj_z",
150 "model.language_model.layers.12.linear_attn.out_proj",
151 "model.language_model.layers.13.linear_attn.in_proj_qkv",
152 "model.language_model.layers.13.linear_attn.in_proj_z",
153 "model.language_model.layers.13.linear_attn.out_proj",
154 "model.language_model.layers.14.linear_attn.in_proj_qkv",
155 "model.language_model.layers.14.linear_attn.in_proj_z",
156 "model.language_model.layers.14.linear_attn.out_proj",
157 "model.language_model.layers.15.self_attn.k_proj",
158 "model.language_model.layers.15.self_attn.o_proj",
159 "model.language_model.layers.15.self_attn.q_proj",
160 "model.language_model.layers.15.self_attn.v_proj",
161 "model.language_model.layers.16.linear_attn.in_proj_qkv",
162 "model.language_model.layers.16.linear_attn.in_proj_z",
163 "model.language_model.layers.16.linear_attn.out_proj",
164 "model.language_model.layers.17.linear_attn.in_proj_qkv",
165 "model.language_model.layers.17.linear_attn.in_proj_z",
166 "model.language_model.layers.17.linear_attn.out_proj",
167 "model.language_model.layers.18.linear_attn.in_proj_qkv",
168 "model.language_model.layers.18.linear_attn.in_proj_z",
169 "model.language_model.layers.18.linear_attn.out_proj",
170 "model.language_model.layers.19.self_attn.k_proj",
171 "model.language_model.layers.19.self_attn.o_proj",
172 "model.language_model.layers.19.self_attn.q_proj",
173 "model.language_model.layers.19.self_attn.v_proj",
174 "model.language_model.layers.2.linear_attn.in_proj_qkv",
175 "model.language_model.layers.2.linear_attn.in_proj_z",
176 "model.language_model.layers.2.linear_attn.out_proj",
177 "model.language_model.layers.20.linear_attn.in_proj_qkv",
178 "model.language_model.layers.20.linear_attn.in_proj_z",
179 "model.language_model.layers.20.linear_attn.out_proj",
180 "model.language_model.layers.21.linear_attn.in_proj_qkv",
181 "model.language_model.layers.21.linear_attn.in_proj_z",
182 "model.language_model.layers.21.linear_attn.out_proj",
183 "model.language_model.layers.22.linear_attn.in_proj_qkv",
184 "model.language_model.layers.22.linear_attn.in_proj_z",
185 "model.language_model.layers.22.linear_attn.out_proj",
186 "model.language_model.layers.23.self_attn.k_proj",
187 "model.language_model.layers.23.self_attn.o_proj",
188 "model.language_model.layers.23.self_attn.q_proj",
189 "model.language_model.layers.23.self_attn.v_proj",
190 "model.language_model.layers.24.linear_attn.in_proj_qkv",
191 "model.language_model.layers.24.linear_attn.in_proj_z",
192 "model.language_model.layers.24.linear_attn.out_proj",
193 "model.language_model.layers.25.linear_attn.in_proj_qkv",
194 "model.language_model.layers.25.linear_attn.in_proj_z",
195 "model.language_model.layers.25.linear_attn.out_proj",
196 "model.language_model.layers.26.linear_attn.in_proj_qkv",
197 "model.language_model.layers.26.linear_attn.in_proj_z",
198 "model.language_model.layers.26.linear_attn.out_proj",
199 "model.language_model.layers.27.self_attn.k_proj",
200 "model.language_model.layers.27.self_attn.o_proj",
201 "model.language_model.layers.27.self_attn.q_proj",
202 "model.language_model.layers.27.self_attn.v_proj",
203 "model.language_model.layers.28.linear_attn.in_proj_qkv",
204 "model.language_model.layers.28.linear_attn.in_proj_z",
205 "model.language_model.layers.28.linear_attn.out_proj",
206 "model.language_model.layers.29.linear_attn.in_proj_qkv",
207 "model.language_model.layers.29.linear_attn.in_proj_z",
208 "model.language_model.layers.29.linear_attn.out_proj",
209 "model.language_model.layers.3.self_attn.k_proj",
210 "model.language_model.layers.3.self_attn.o_proj",
211 "model.language_model.layers.3.self_attn.q_proj",
212 "model.language_model.layers.3.self_attn.v_proj",
213 "model.language_model.layers.30.linear_attn.in_proj_qkv",
214 "model.language_model.layers.30.linear_attn.in_proj_z",
215 "model.language_model.layers.30.linear_attn.out_proj",
216 "model.language_model.layers.31.self_attn.k_proj",
217 "model.language_model.layers.31.self_attn.o_proj",
218 "model.language_model.layers.31.self_attn.q_proj",
219 "model.language_model.layers.31.self_attn.v_proj",
220 "model.language_model.layers.32.linear_attn.in_proj_qkv",
221 "model.language_model.layers.32.linear_attn.in_proj_z",
222 "model.language_model.layers.32.linear_attn.out_proj",
223 "model.language_model.layers.33.linear_attn.in_proj_qkv",
224 "model.language_model.layers.33.linear_attn.in_proj_z",
225 "model.language_model.layers.33.linear_attn.out_proj",
226 "model.language_model.layers.34.linear_attn.in_proj_qkv",
227 "model.language_model.layers.34.linear_attn.in_proj_z",
228 "model.language_model.layers.34.linear_attn.out_proj",
229 "model.language_model.layers.35.self_attn.k_proj",
230 "model.language_model.layers.35.self_attn.o_proj",
231 "model.language_model.layers.35.self_attn.q_proj",
232 "model.language_model.layers.35.self_attn.v_proj",
233 "model.language_model.layers.36.linear_attn.in_proj_qkv",
234 "model.language_model.layers.36.linear_attn.in_proj_z",
235 "model.language_model.layers.36.linear_attn.out_proj",
236 "model.language_model.layers.37.linear_attn.in_proj_qkv",
237 "model.language_model.layers.37.linear_attn.in_proj_z",
238 "model.language_model.layers.37.linear_attn.out_proj",
239 "model.language_model.layers.38.linear_attn.in_proj_qkv",
240 "model.language_model.layers.38.linear_attn.in_proj_z",
241 "model.language_model.layers.38.linear_attn.out_proj",
242 "model.language_model.layers.39.self_attn.k_proj",
243 "model.language_model.layers.39.self_attn.o_proj",
244 "model.language_model.layers.39.self_attn.q_proj",
245 "model.language_model.layers.39.self_attn.v_proj",
246 "model.language_model.layers.4.linear_attn.in_proj_qkv",
247 "model.language_model.layers.4.linear_attn.in_proj_z",
248 "model.language_model.layers.4.linear_attn.out_proj",
249 "model.language_model.layers.5.linear_attn.in_proj_qkv",
250 "model.language_model.layers.5.linear_attn.in_proj_z",
251 "model.language_model.layers.5.linear_attn.out_proj",
252 "model.language_model.layers.6.linear_attn.in_proj_qkv",
253 "model.language_model.layers.6.linear_attn.in_proj_z",
254 "model.language_model.layers.6.linear_attn.out_proj",
255 "model.language_model.layers.7.self_attn.k_proj",
256 "model.language_model.layers.7.self_attn.o_proj",
257 "model.language_model.layers.7.self_attn.q_proj",
258 "model.language_model.layers.7.self_attn.v_proj",
259 "model.language_model.layers.8.linear_attn.in_proj_qkv",
260 "model.language_model.layers.8.linear_attn.in_proj_z",
261 "model.language_model.layers.8.linear_attn.out_proj",
262 "model.language_model.layers.9.linear_attn.in_proj_qkv",
263 "model.language_model.layers.9.linear_attn.in_proj_z",
264 "model.language_model.layers.9.linear_attn.out_proj"
265 ]
266 },
267 "group_1": {
268 "input_activations": {
269 "dynamic": false,
270 "num_bits": 4,
271 "type": "float",
272 "group_size": 16
273 },
274 "weights": {
275 "dynamic": false,
276 "num_bits": 4,
277 "type": "float",
278 "group_size": 16
279 },
280 "targets": [
281 "lm_head",
282 "model.language_model.layers.0.mlp.experts",
283 "model.language_model.layers.0.mlp.shared_expert.down_proj",
284 "model.language_model.layers.0.mlp.shared_expert.gate_proj",
285 "model.language_model.layers.0.mlp.shared_expert.up_proj",
286 "model.language_model.layers.1.mlp.experts",
287 "model.language_model.layers.1.mlp.shared_expert.down_proj",
288 "model.language_model.layers.1.mlp.shared_expert.gate_proj",
289 "model.language_model.layers.1.mlp.shared_expert.up_proj",
290 "model.language_model.layers.10.mlp.experts",
291 "model.language_model.layers.10.mlp.shared_expert.down_proj",
292 "model.language_model.layers.10.mlp.shared_expert.gate_proj",
293 "model.language_model.layers.10.mlp.shared_expert.up_proj",
294 "model.language_model.layers.11.mlp.experts",
295 "model.language_model.layers.11.mlp.shared_expert.down_proj",
296 "model.language_model.layers.11.mlp.shared_expert.gate_proj",
297 "model.language_model.layers.11.mlp.shared_expert.up_proj",
298 "model.language_model.layers.12.mlp.experts",
299 "model.language_model.layers.12.mlp.shared_expert.down_proj",
300 "model.language_model.layers.12.mlp.shared_expert.gate_proj",
301 "model.language_model.layers.12.mlp.shared_expert.up_proj",
302 "model.language_model.layers.13.mlp.experts",
303 "model.language_model.layers.13.mlp.shared_expert.down_proj",
304 "model.language_model.layers.13.mlp.shared_expert.gate_proj",
305 "model.language_model.layers.13.mlp.shared_expert.up_proj",
306 "model.language_model.layers.14.mlp.experts",
307 "model.language_model.layers.14.mlp.shared_expert.down_proj",
308 "model.language_model.layers.14.mlp.shared_expert.gate_proj",
309 "model.language_model.layers.14.mlp.shared_expert.up_proj",
310 "model.language_model.layers.15.mlp.experts",
311 "model.language_model.layers.15.mlp.shared_expert.down_proj",
312 "model.language_model.layers.15.mlp.shared_expert.gate_proj",
313 "model.language_model.layers.15.mlp.shared_expert.up_proj",
314 "model.language_model.layers.16.mlp.experts",
315 "model.language_model.layers.16.mlp.shared_expert.down_proj",
316 "model.language_model.layers.16.mlp.shared_expert.gate_proj",
317 "model.language_model.layers.16.mlp.shared_expert.up_proj",
318 "model.language_model.layers.17.mlp.experts",
319 "model.language_model.layers.17.mlp.shared_expert.down_proj",
320 "model.language_model.layers.17.mlp.shared_expert.gate_proj",
321 "model.language_model.layers.17.mlp.shared_expert.up_proj",
322 "model.language_model.layers.18.mlp.experts",
323 "model.language_model.layers.18.mlp.shared_expert.down_proj",
324 "model.language_model.layers.18.mlp.shared_expert.gate_proj",
325 "model.language_model.layers.18.mlp.shared_expert.up_proj",
326 "model.language_model.layers.19.mlp.experts",
327 "model.language_model.layers.19.mlp.shared_expert.down_proj",
328 "model.language_model.layers.19.mlp.shared_expert.gate_proj",
329 "model.language_model.layers.19.mlp.shared_expert.up_proj",
330 "model.language_model.layers.2.mlp.experts",
331 "model.language_model.layers.2.mlp.shared_expert.down_proj",
332 "model.language_model.layers.2.mlp.shared_expert.gate_proj",
333 "model.language_model.layers.2.mlp.shared_expert.up_proj",
334 "model.language_model.layers.20.mlp.experts",
335 "model.language_model.layers.20.mlp.shared_expert.down_proj",
336 "model.language_model.layers.20.mlp.shared_expert.gate_proj",
337 "model.language_model.layers.20.mlp.shared_expert.up_proj",
338 "model.language_model.layers.21.mlp.experts",
339 "model.language_model.layers.21.mlp.shared_expert.down_proj",
340 "model.language_model.layers.21.mlp.shared_expert.gate_proj",
341 "model.language_model.layers.21.mlp.shared_expert.up_proj",
342 "model.language_model.layers.22.mlp.experts",
343 "model.language_model.layers.22.mlp.shared_expert.down_proj",
344 "model.language_model.layers.22.mlp.shared_expert.gate_proj",
345 "model.language_model.layers.22.mlp.shared_expert.up_proj",
346 "model.language_model.layers.23.mlp.experts",
347 "model.language_model.layers.23.mlp.shared_expert.down_proj",
348 "model.language_model.layers.23.mlp.shared_expert.gate_proj",
349 "model.language_model.layers.23.mlp.shared_expert.up_proj",
350 "model.language_model.layers.24.mlp.experts",
351 "model.language_model.layers.24.mlp.shared_expert.down_proj",
352 "model.language_model.layers.24.mlp.shared_expert.gate_proj",
353 "model.language_model.layers.24.mlp.shared_expert.up_proj",
354 "model.language_model.layers.25.mlp.experts",
355 "model.language_model.layers.25.mlp.shared_expert.down_proj",
356 "model.language_model.layers.25.mlp.shared_expert.gate_proj",
357 "model.language_model.layers.25.mlp.shared_expert.up_proj",
358 "model.language_model.layers.26.mlp.experts",
359 "model.language_model.layers.26.mlp.shared_expert.down_proj",
360 "model.language_model.layers.26.mlp.shared_expert.gate_proj",
361 "model.language_model.layers.26.mlp.shared_expert.up_proj",
362 "model.language_model.layers.27.mlp.experts",
363 "model.language_model.layers.27.mlp.shared_expert.down_proj",
364 "model.language_model.layers.27.mlp.shared_expert.gate_proj",
365 "model.language_model.layers.27.mlp.shared_expert.up_proj",
366 "model.language_model.layers.28.mlp.experts",
367 "model.language_model.layers.28.mlp.shared_expert.down_proj",
368 "model.language_model.layers.28.mlp.shared_expert.gate_proj",
369 "model.language_model.layers.28.mlp.shared_expert.up_proj",
370 "model.language_model.layers.29.mlp.experts",
371 "model.language_model.layers.29.mlp.shared_expert.down_proj",
372 "model.language_model.layers.29.mlp.shared_expert.gate_proj",
373 "model.language_model.layers.29.mlp.shared_expert.up_proj",
374 "model.language_model.layers.3.mlp.experts",
375 "model.language_model.layers.3.mlp.shared_expert.down_proj",
376 "model.language_model.layers.3.mlp.shared_expert.gate_proj",
377 "model.language_model.layers.3.mlp.shared_expert.up_proj",
378 "model.language_model.layers.30.mlp.experts",
379 "model.language_model.layers.30.mlp.shared_expert.down_proj",
380 "model.language_model.layers.30.mlp.shared_expert.gate_proj",
381 "model.language_model.layers.30.mlp.shared_expert.up_proj",
382 "model.language_model.layers.31.mlp.experts",
383 "model.language_model.layers.31.mlp.shared_expert.down_proj",
384 "model.language_model.layers.31.mlp.shared_expert.gate_proj",
385 "model.language_model.layers.31.mlp.shared_expert.up_proj",
386 "model.language_model.layers.32.mlp.experts",
387 "model.language_model.layers.32.mlp.shared_expert.down_proj",
388 "model.language_model.layers.32.mlp.shared_expert.gate_proj",
389 "model.language_model.layers.32.mlp.shared_expert.up_proj",
390 "model.language_model.layers.33.mlp.experts",
391 "model.language_model.layers.33.mlp.shared_expert.down_proj",
392 "model.language_model.layers.33.mlp.shared_expert.gate_proj",
393 "model.language_model.layers.33.mlp.shared_expert.up_proj",
394 "model.language_model.layers.34.mlp.experts",
395 "model.language_model.layers.34.mlp.shared_expert.down_proj",
396 "model.language_model.layers.34.mlp.shared_expert.gate_proj",
397 "model.language_model.layers.34.mlp.shared_expert.up_proj",
398 "model.language_model.layers.35.mlp.experts",
399 "model.language_model.layers.35.mlp.shared_expert.down_proj",
400 "model.language_model.layers.35.mlp.shared_expert.gate_proj",
401 "model.language_model.layers.35.mlp.shared_expert.up_proj",
402 "model.language_model.layers.36.mlp.experts",
403 "model.language_model.layers.36.mlp.shared_expert.down_proj",
404 "model.language_model.layers.36.mlp.shared_expert.gate_proj",
405 "model.language_model.layers.36.mlp.shared_expert.up_proj",
406 "model.language_model.layers.37.mlp.experts",
407 "model.language_model.layers.37.mlp.shared_expert.down_proj",
408 "model.language_model.layers.37.mlp.shared_expert.gate_proj",
409 "model.language_model.layers.37.mlp.shared_expert.up_proj",
410 "model.language_model.layers.38.mlp.experts",
411 "model.language_model.layers.38.mlp.shared_expert.down_proj",
412 "model.language_model.layers.38.mlp.shared_expert.gate_proj",
413 "model.language_model.layers.38.mlp.shared_expert.up_proj",
414 "model.language_model.layers.39.mlp.experts",
415 "model.language_model.layers.39.mlp.shared_expert.down_proj",
416 "model.language_model.layers.39.mlp.shared_expert.gate_proj",
417 "model.language_model.layers.39.mlp.shared_expert.up_proj",
418 "model.language_model.layers.4.mlp.experts",
419 "model.language_model.layers.4.mlp.shared_expert.down_proj",
420 "model.language_model.layers.4.mlp.shared_expert.gate_proj",
421 "model.language_model.layers.4.mlp.shared_expert.up_proj",
422 "model.language_model.layers.5.mlp.experts",
423 "model.language_model.layers.5.mlp.shared_expert.down_proj",
424 "model.language_model.layers.5.mlp.shared_expert.gate_proj",
425 "model.language_model.layers.5.mlp.shared_expert.up_proj",
426 "model.language_model.layers.6.mlp.experts",
427 "model.language_model.layers.6.mlp.shared_expert.down_proj",
428 "model.language_model.layers.6.mlp.shared_expert.gate_proj",
429 "model.language_model.layers.6.mlp.shared_expert.up_proj",
430 "model.language_model.layers.7.mlp.experts",
431 "model.language_model.layers.7.mlp.shared_expert.down_proj",
432 "model.language_model.layers.7.mlp.shared_expert.gate_proj",
433 "model.language_model.layers.7.mlp.shared_expert.up_proj",
434 "model.language_model.layers.8.mlp.experts",
435 "model.language_model.layers.8.mlp.shared_expert.down_proj",
436 "model.language_model.layers.8.mlp.shared_expert.gate_proj",
437 "model.language_model.layers.8.mlp.shared_expert.up_proj",
438 "model.language_model.layers.9.mlp.experts",
439 "model.language_model.layers.9.mlp.shared_expert.down_proj",
440 "model.language_model.layers.9.mlp.shared_expert.gate_proj",
441 "model.language_model.layers.9.mlp.shared_expert.up_proj"
442 ]
443 }
444 },
445 "quantized_layers": {
446 "model.language_model.layers.0.linear_attn.out_proj": {
447 "quant_algo": "FP8"
448 },
449 "model.language_model.layers.0.linear_attn.in_proj_qkv": {
450 "quant_algo": "FP8"
451 },
452 "model.language_model.layers.0.linear_attn.in_proj_z": {
453 "quant_algo": "FP8"
454 },
455 "model.language_model.layers.0.mlp.experts": {
456 "quant_algo": "W4A16_NVFP4",
457 "group_size": 16
458 },
459 "model.language_model.layers.0.mlp.shared_expert.gate_proj": {
460 "quant_algo": "W4A16_NVFP4",
461 "group_size": 16
462 },
463 "model.language_model.layers.0.mlp.shared_expert.up_proj": {
464 "quant_algo": "W4A16_NVFP4",
465 "group_size": 16
466 },
467 "model.language_model.layers.0.mlp.shared_expert.down_proj": {
468 "quant_algo": "W4A16_NVFP4",
469 "group_size": 16
470 },
471 "model.language_model.layers.1.linear_attn.out_proj": {
472 "quant_algo": "FP8"
473 },
474 "model.language_model.layers.1.linear_attn.in_proj_qkv": {
475 "quant_algo": "FP8"
476 },
477 "model.language_model.layers.1.linear_attn.in_proj_z": {
478 "quant_algo": "FP8"
479 },
480 "model.language_model.layers.1.mlp.experts": {
481 "quant_algo": "W4A16_NVFP4",
482 "group_size": 16
483 },
484 "model.language_model.layers.1.mlp.shared_expert.gate_proj": {
485 "quant_algo": "W4A16_NVFP4",
486 "group_size": 16
487 },
488 "model.language_model.layers.1.mlp.shared_expert.up_proj": {
489 "quant_algo": "W4A16_NVFP4",
490 "group_size": 16
491 },
492 "model.language_model.layers.1.mlp.shared_expert.down_proj": {
493 "quant_algo": "W4A16_NVFP4",
494 "group_size": 16
495 },
496 "model.language_model.layers.2.linear_attn.out_proj": {
497 "quant_algo": "FP8"
498 },
499 "model.language_model.layers.2.linear_attn.in_proj_qkv": {
500 "quant_algo": "FP8"
501 },
502 "model.language_model.layers.2.linear_attn.in_proj_z": {
503 "quant_algo": "FP8"
504 },
505 "model.language_model.layers.2.mlp.experts": {
506 "quant_algo": "W4A16_NVFP4",
507 "group_size": 16
508 },
509 "model.language_model.layers.2.mlp.shared_expert.gate_proj": {
510 "quant_algo": "W4A16_NVFP4",
511 "group_size": 16
512 },
513 "model.language_model.layers.2.mlp.shared_expert.up_proj": {
514 "quant_algo": "W4A16_NVFP4",
515 "group_size": 16
516 },
517 "model.language_model.layers.2.mlp.shared_expert.down_proj": {
518 "quant_algo": "W4A16_NVFP4",
519 "group_size": 16
520 },
521 "model.language_model.layers.3.self_attn.q_proj": {
522 "quant_algo": "FP8"
523 },
524 "model.language_model.layers.3.self_attn.k_proj": {
525 "quant_algo": "FP8"
526 },
527 "model.language_model.layers.3.self_attn.v_proj": {
528 "quant_algo": "FP8"
529 },
530 "model.language_model.layers.3.self_attn.o_proj": {
531 "quant_algo": "FP8"
532 },
533 "model.language_model.layers.3.mlp.experts": {
534 "quant_algo": "W4A16_NVFP4",
535 "group_size": 16
536 },
537 "model.language_model.layers.3.mlp.shared_expert.gate_proj": {
538 "quant_algo": "W4A16_NVFP4",
539 "group_size": 16
540 },
541 "model.language_model.layers.3.mlp.shared_expert.up_proj": {
542 "quant_algo": "W4A16_NVFP4",
543 "group_size": 16
544 },
545 "model.language_model.layers.3.mlp.shared_expert.down_proj": {
546 "quant_algo": "W4A16_NVFP4",
547 "group_size": 16
548 },
549 "model.language_model.layers.4.linear_attn.out_proj": {
550 "quant_algo": "FP8"
551 },
552 "model.language_model.layers.4.linear_attn.in_proj_qkv": {
553 "quant_algo": "FP8"
554 },
555 "model.language_model.layers.4.linear_attn.in_proj_z": {
556 "quant_algo": "FP8"
557 },
558 "model.language_model.layers.4.mlp.experts": {
559 "quant_algo": "W4A16_NVFP4",
560 "group_size": 16
561 },
562 "model.language_model.layers.4.mlp.shared_expert.gate_proj": {
563 "quant_algo": "W4A16_NVFP4",
564 "group_size": 16
565 },
566 "model.language_model.layers.4.mlp.shared_expert.up_proj": {
567 "quant_algo": "W4A16_NVFP4",
568 "group_size": 16
569 },
570 "model.language_model.layers.4.mlp.shared_expert.down_proj": {
571 "quant_algo": "W4A16_NVFP4",
572 "group_size": 16
573 },
574 "model.language_model.layers.5.linear_attn.out_proj": {
575 "quant_algo": "FP8"
576 },
577 "model.language_model.layers.5.linear_attn.in_proj_qkv": {
578 "quant_algo": "FP8"
579 },
580 "model.language_model.layers.5.linear_attn.in_proj_z": {
581 "quant_algo": "FP8"
582 },
583 "model.language_model.layers.5.mlp.experts": {
584 "quant_algo": "W4A16_NVFP4",
585 "group_size": 16
586 },
587 "model.language_model.layers.5.mlp.shared_expert.gate_proj": {
588 "quant_algo": "W4A16_NVFP4",
589 "group_size": 16
590 },
591 "model.language_model.layers.5.mlp.shared_expert.up_proj": {
592 "quant_algo": "W4A16_NVFP4",
593 "group_size": 16
594 },
595 "model.language_model.layers.5.mlp.shared_expert.down_proj": {
596 "quant_algo": "W4A16_NVFP4",
597 "group_size": 16
598 },
599 "model.language_model.layers.6.linear_attn.out_proj": {
600 "quant_algo": "FP8"
601 },
602 "model.language_model.layers.6.linear_attn.in_proj_qkv": {
603 "quant_algo": "FP8"
604 },
605 "model.language_model.layers.6.linear_attn.in_proj_z": {
606 "quant_algo": "FP8"
607 },
608 "model.language_model.layers.6.mlp.experts": {
609 "quant_algo": "W4A16_NVFP4",
610 "group_size": 16
611 },
612 "model.language_model.layers.6.mlp.shared_expert.gate_proj": {
613 "quant_algo": "W4A16_NVFP4",
614 "group_size": 16
615 },
616 "model.language_model.layers.6.mlp.shared_expert.up_proj": {
617 "quant_algo": "W4A16_NVFP4",
618 "group_size": 16
619 },
620 "model.language_model.layers.6.mlp.shared_expert.down_proj": {
621 "quant_algo": "W4A16_NVFP4",
622 "group_size": 16
623 },
624 "model.language_model.layers.7.self_attn.q_proj": {
625 "quant_algo": "FP8"
626 },
627 "model.language_model.layers.7.self_attn.k_proj": {
628 "quant_algo": "FP8"
629 },
630 "model.language_model.layers.7.self_attn.v_proj": {
631 "quant_algo": "FP8"
632 },
633 "model.language_model.layers.7.self_attn.o_proj": {
634 "quant_algo": "FP8"
635 },
636 "model.language_model.layers.7.mlp.experts": {
637 "quant_algo": "W4A16_NVFP4",
638 "group_size": 16
639 },
640 "model.language_model.layers.7.mlp.shared_expert.gate_proj": {
641 "quant_algo": "W4A16_NVFP4",
642 "group_size": 16
643 },
644 "model.language_model.layers.7.mlp.shared_expert.up_proj": {
645 "quant_algo": "W4A16_NVFP4",
646 "group_size": 16
647 },
648 "model.language_model.layers.7.mlp.shared_expert.down_proj": {
649 "quant_algo": "W4A16_NVFP4",
650 "group_size": 16
651 },
652 "model.language_model.layers.8.linear_attn.out_proj": {
653 "quant_algo": "FP8"
654 },
655 "model.language_model.layers.8.linear_attn.in_proj_qkv": {
656 "quant_algo": "FP8"
657 },
658 "model.language_model.layers.8.linear_attn.in_proj_z": {
659 "quant_algo": "FP8"
660 },
661 "model.language_model.layers.8.mlp.experts": {
662 "quant_algo": "W4A16_NVFP4",
663 "group_size": 16
664 },
665 "model.language_model.layers.8.mlp.shared_expert.gate_proj": {
666 "quant_algo": "W4A16_NVFP4",
667 "group_size": 16
668 },
669 "model.language_model.layers.8.mlp.shared_expert.up_proj": {
670 "quant_algo": "W4A16_NVFP4",
671 "group_size": 16
672 },
673 "model.language_model.layers.8.mlp.shared_expert.down_proj": {
674 "quant_algo": "W4A16_NVFP4",
675 "group_size": 16
676 },
677 "model.language_model.layers.9.linear_attn.out_proj": {
678 "quant_algo": "FP8"
679 },
680 "model.language_model.layers.9.linear_attn.in_proj_qkv": {
681 "quant_algo": "FP8"
682 },
683 "model.language_model.layers.9.linear_attn.in_proj_z": {
684 "quant_algo": "FP8"
685 },
686 "model.language_model.layers.9.mlp.experts": {
687 "quant_algo": "W4A16_NVFP4",
688 "group_size": 16
689 },
690 "model.language_model.layers.9.mlp.shared_expert.gate_proj": {
691 "quant_algo": "W4A16_NVFP4",
692 "group_size": 16
693 },
694 "model.language_model.layers.9.mlp.shared_expert.up_proj": {
695 "quant_algo": "W4A16_NVFP4",
696 "group_size": 16
697 },
698 "model.language_model.layers.9.mlp.shared_expert.down_proj": {
699 "quant_algo": "W4A16_NVFP4",
700 "group_size": 16
701 },
702 "model.language_model.layers.10.linear_attn.out_proj": {
703 "quant_algo": "FP8"
704 },
705 "model.language_model.layers.10.linear_attn.in_proj_qkv": {
706 "quant_algo": "FP8"
707 },
708 "model.language_model.layers.10.linear_attn.in_proj_z": {
709 "quant_algo": "FP8"
710 },
711 "model.language_model.layers.10.mlp.experts": {
712 "quant_algo": "W4A16_NVFP4",
713 "group_size": 16
714 },
715 "model.language_model.layers.10.mlp.shared_expert.gate_proj": {
716 "quant_algo": "W4A16_NVFP4",
717 "group_size": 16
718 },
719 "model.language_model.layers.10.mlp.shared_expert.up_proj": {
720 "quant_algo": "W4A16_NVFP4",
721 "group_size": 16
722 },
723 "model.language_model.layers.10.mlp.shared_expert.down_proj": {
724 "quant_algo": "W4A16_NVFP4",
725 "group_size": 16
726 },
727 "model.language_model.layers.11.self_attn.q_proj": {
728 "quant_algo": "FP8"
729 },
730 "model.language_model.layers.11.self_attn.k_proj": {
731 "quant_algo": "FP8"
732 },
733 "model.language_model.layers.11.self_attn.v_proj": {
734 "quant_algo": "FP8"
735 },
736 "model.language_model.layers.11.self_attn.o_proj": {
737 "quant_algo": "FP8"
738 },
739 "model.language_model.layers.11.mlp.experts": {
740 "quant_algo": "W4A16_NVFP4",
741 "group_size": 16
742 },
743 "model.language_model.layers.11.mlp.shared_expert.gate_proj": {
744 "quant_algo": "W4A16_NVFP4",
745 "group_size": 16
746 },
747 "model.language_model.layers.11.mlp.shared_expert.up_proj": {
748 "quant_algo": "W4A16_NVFP4",
749 "group_size": 16
750 },
751 "model.language_model.layers.11.mlp.shared_expert.down_proj": {
752 "quant_algo": "W4A16_NVFP4",
753 "group_size": 16
754 },
755 "model.language_model.layers.12.linear_attn.out_proj": {
756 "quant_algo": "FP8"
757 },
758 "model.language_model.layers.12.linear_attn.in_proj_qkv": {
759 "quant_algo": "FP8"
760 },
761 "model.language_model.layers.12.linear_attn.in_proj_z": {
762 "quant_algo": "FP8"
763 },
764 "model.language_model.layers.12.mlp.experts": {
765 "quant_algo": "W4A16_NVFP4",
766 "group_size": 16
767 },
768 "model.language_model.layers.12.mlp.shared_expert.gate_proj": {
769 "quant_algo": "W4A16_NVFP4",
770 "group_size": 16
771 },
772 "model.language_model.layers.12.mlp.shared_expert.up_proj": {
773 "quant_algo": "W4A16_NVFP4",
774 "group_size": 16
775 },
776 "model.language_model.layers.12.mlp.shared_expert.down_proj": {
777 "quant_algo": "W4A16_NVFP4",
778 "group_size": 16
779 },
780 "model.language_model.layers.13.linear_attn.out_proj": {
781 "quant_algo": "FP8"
782 },
783 "model.language_model.layers.13.linear_attn.in_proj_qkv": {
784 "quant_algo": "FP8"
785 },
786 "model.language_model.layers.13.linear_attn.in_proj_z": {
787 "quant_algo": "FP8"
788 },
789 "model.language_model.layers.13.mlp.experts": {
790 "quant_algo": "W4A16_NVFP4",
791 "group_size": 16
792 },
793 "model.language_model.layers.13.mlp.shared_expert.gate_proj": {
794 "quant_algo": "W4A16_NVFP4",
795 "group_size": 16
796 },
797 "model.language_model.layers.13.mlp.shared_expert.up_proj": {
798 "quant_algo": "W4A16_NVFP4",
799 "group_size": 16
800 },
801 "model.language_model.layers.13.mlp.shared_expert.down_proj": {
802 "quant_algo": "W4A16_NVFP4",
803 "group_size": 16
804 },
805 "model.language_model.layers.14.linear_attn.out_proj": {
806 "quant_algo": "FP8"
807 },
808 "model.language_model.layers.14.linear_attn.in_proj_qkv": {
809 "quant_algo": "FP8"
810 },
811 "model.language_model.layers.14.linear_attn.in_proj_z": {
812 "quant_algo": "FP8"
813 },
814 "model.language_model.layers.14.mlp.experts": {
815 "quant_algo": "W4A16_NVFP4",
816 "group_size": 16
817 },
818 "model.language_model.layers.14.mlp.shared_expert.gate_proj": {
819 "quant_algo": "W4A16_NVFP4",
820 "group_size": 16
821 },
822 "model.language_model.layers.14.mlp.shared_expert.up_proj": {
823 "quant_algo": "W4A16_NVFP4",
824 "group_size": 16
825 },
826 "model.language_model.layers.14.mlp.shared_expert.down_proj": {
827 "quant_algo": "W4A16_NVFP4",
828 "group_size": 16
829 },
830 "model.language_model.layers.15.self_attn.q_proj": {
831 "quant_algo": "FP8"
832 },
833 "model.language_model.layers.15.self_attn.k_proj": {
834 "quant_algo": "FP8"
835 },
836 "model.language_model.layers.15.self_attn.v_proj": {
837 "quant_algo": "FP8"
838 },
839 "model.language_model.layers.15.self_attn.o_proj": {
840 "quant_algo": "FP8"
841 },
842 "model.language_model.layers.15.mlp.experts": {
843 "quant_algo": "W4A16_NVFP4",
844 "group_size": 16
845 },
846 "model.language_model.layers.15.mlp.shared_expert.gate_proj": {
847 "quant_algo": "W4A16_NVFP4",
848 "group_size": 16
849 },
850 "model.language_model.layers.15.mlp.shared_expert.up_proj": {
851 "quant_algo": "W4A16_NVFP4",
852 "group_size": 16
853 },
854 "model.language_model.layers.15.mlp.shared_expert.down_proj": {
855 "quant_algo": "W4A16_NVFP4",
856 "group_size": 16
857 },
858 "model.language_model.layers.16.linear_attn.out_proj": {
859 "quant_algo": "FP8"
860 },
861 "model.language_model.layers.16.linear_attn.in_proj_qkv": {
862 "quant_algo": "FP8"
863 },
864 "model.language_model.layers.16.linear_attn.in_proj_z": {
865 "quant_algo": "FP8"
866 },
867 "model.language_model.layers.16.mlp.experts": {
868 "quant_algo": "W4A16_NVFP4",
869 "group_size": 16
870 },
871 "model.language_model.layers.16.mlp.shared_expert.gate_proj": {
872 "quant_algo": "W4A16_NVFP4",
873 "group_size": 16
874 },
875 "model.language_model.layers.16.mlp.shared_expert.up_proj": {
876 "quant_algo": "W4A16_NVFP4",
877 "group_size": 16
878 },
879 "model.language_model.layers.16.mlp.shared_expert.down_proj": {
880 "quant_algo": "W4A16_NVFP4",
881 "group_size": 16
882 },
883 "model.language_model.layers.17.linear_attn.out_proj": {
884 "quant_algo": "FP8"
885 },
886 "model.language_model.layers.17.linear_attn.in_proj_qkv": {
887 "quant_algo": "FP8"
888 },
889 "model.language_model.layers.17.linear_attn.in_proj_z": {
890 "quant_algo": "FP8"
891 },
892 "model.language_model.layers.17.mlp.experts": {
893 "quant_algo": "W4A16_NVFP4",
894 "group_size": 16
895 },
896 "model.language_model.layers.17.mlp.shared_expert.gate_proj": {
897 "quant_algo": "W4A16_NVFP4",
898 "group_size": 16
899 },
900 "model.language_model.layers.17.mlp.shared_expert.up_proj": {
901 "quant_algo": "W4A16_NVFP4",
902 "group_size": 16
903 },
904 "model.language_model.layers.17.mlp.shared_expert.down_proj": {
905 "quant_algo": "W4A16_NVFP4",
906 "group_size": 16
907 },
908 "model.language_model.layers.18.linear_attn.out_proj": {
909 "quant_algo": "FP8"
910 },
911 "model.language_model.layers.18.linear_attn.in_proj_qkv": {
912 "quant_algo": "FP8"
913 },
914 "model.language_model.layers.18.linear_attn.in_proj_z": {
915 "quant_algo": "FP8"
916 },
917 "model.language_model.layers.18.mlp.experts": {
918 "quant_algo": "W4A16_NVFP4",
919 "group_size": 16
920 },
921 "model.language_model.layers.18.mlp.shared_expert.gate_proj": {
922 "quant_algo": "W4A16_NVFP4",
923 "group_size": 16
924 },
925 "model.language_model.layers.18.mlp.shared_expert.up_proj": {
926 "quant_algo": "W4A16_NVFP4",
927 "group_size": 16
928 },
929 "model.language_model.layers.18.mlp.shared_expert.down_proj": {
930 "quant_algo": "W4A16_NVFP4",
931 "group_size": 16
932 },
933 "model.language_model.layers.19.self_attn.q_proj": {
934 "quant_algo": "FP8"
935 },
936 "model.language_model.layers.19.self_attn.k_proj": {
937 "quant_algo": "FP8"
938 },
939 "model.language_model.layers.19.self_attn.v_proj": {
940 "quant_algo": "FP8"
941 },
942 "model.language_model.layers.19.self_attn.o_proj": {
943 "quant_algo": "FP8"
944 },
945 "model.language_model.layers.19.mlp.experts": {
946 "quant_algo": "W4A16_NVFP4",
947 "group_size": 16
948 },
949 "model.language_model.layers.19.mlp.shared_expert.gate_proj": {
950 "quant_algo": "W4A16_NVFP4",
951 "group_size": 16
952 },
953 "model.language_model.layers.19.mlp.shared_expert.up_proj": {
954 "quant_algo": "W4A16_NVFP4",
955 "group_size": 16
956 },
957 "model.language_model.layers.19.mlp.shared_expert.down_proj": {
958 "quant_algo": "W4A16_NVFP4",
959 "group_size": 16
960 },
961 "model.language_model.layers.20.linear_attn.out_proj": {
962 "quant_algo": "FP8"
963 },
964 "model.language_model.layers.20.linear_attn.in_proj_qkv": {
965 "quant_algo": "FP8"
966 },
967 "model.language_model.layers.20.linear_attn.in_proj_z": {
968 "quant_algo": "FP8"
969 },
970 "model.language_model.layers.20.mlp.experts": {
971 "quant_algo": "W4A16_NVFP4",
972 "group_size": 16
973 },
974 "model.language_model.layers.20.mlp.shared_expert.gate_proj": {
975 "quant_algo": "W4A16_NVFP4",
976 "group_size": 16
977 },
978 "model.language_model.layers.20.mlp.shared_expert.up_proj": {
979 "quant_algo": "W4A16_NVFP4",
980 "group_size": 16
981 },
982 "model.language_model.layers.20.mlp.shared_expert.down_proj": {
983 "quant_algo": "W4A16_NVFP4",
984 "group_size": 16
985 },
986 "model.language_model.layers.21.linear_attn.out_proj": {
987 "quant_algo": "FP8"
988 },
989 "model.language_model.layers.21.linear_attn.in_proj_qkv": {
990 "quant_algo": "FP8"
991 },
992 "model.language_model.layers.21.linear_attn.in_proj_z": {
993 "quant_algo": "FP8"
994 },
995 "model.language_model.layers.21.mlp.experts": {
996 "quant_algo": "W4A16_NVFP4",
997 "group_size": 16
998 },
999 "model.language_model.layers.21.mlp.shared_expert.gate_proj": {
1000 "quant_algo": "W4A16_NVFP4",
1001 "group_size": 16
1002 },
1003 "model.language_model.layers.21.mlp.shared_expert.up_proj": {
1004 "quant_algo": "W4A16_NVFP4",
1005 "group_size": 16
1006 },
1007 "model.language_model.layers.21.mlp.shared_expert.down_proj": {
1008 "quant_algo": "W4A16_NVFP4",
1009 "group_size": 16
1010 },
1011 "model.language_model.layers.22.linear_attn.out_proj": {
1012 "quant_algo": "FP8"
1013 },
1014 "model.language_model.layers.22.linear_attn.in_proj_qkv": {
1015 "quant_algo": "FP8"
1016 },
1017 "model.language_model.layers.22.linear_attn.in_proj_z": {
1018 "quant_algo": "FP8"
1019 },
1020 "model.language_model.layers.22.mlp.experts": {
1021 "quant_algo": "W4A16_NVFP4",
1022 "group_size": 16
1023 },
1024 "model.language_model.layers.22.mlp.shared_expert.gate_proj": {
1025 "quant_algo": "W4A16_NVFP4",
1026 "group_size": 16
1027 },
1028 "model.language_model.layers.22.mlp.shared_expert.up_proj": {
1029 "quant_algo": "W4A16_NVFP4",
1030 "group_size": 16
1031 },
1032 "model.language_model.layers.22.mlp.shared_expert.down_proj": {
1033 "quant_algo": "W4A16_NVFP4",
1034 "group_size": 16
1035 },
1036 "model.language_model.layers.23.self_attn.q_proj": {
1037 "quant_algo": "FP8"
1038 },
1039 "model.language_model.layers.23.self_attn.k_proj": {
1040 "quant_algo": "FP8"
1041 },
1042 "model.language_model.layers.23.self_attn.v_proj": {
1043 "quant_algo": "FP8"
1044 },
1045 "model.language_model.layers.23.self_attn.o_proj": {
1046 "quant_algo": "FP8"
1047 },
1048 "model.language_model.layers.23.mlp.experts": {
1049 "quant_algo": "W4A16_NVFP4",
1050 "group_size": 16
1051 },
1052 "model.language_model.layers.23.mlp.shared_expert.gate_proj": {
1053 "quant_algo": "W4A16_NVFP4",
1054 "group_size": 16
1055 },
1056 "model.language_model.layers.23.mlp.shared_expert.up_proj": {
1057 "quant_algo": "W4A16_NVFP4",
1058 "group_size": 16
1059 },
1060 "model.language_model.layers.23.mlp.shared_expert.down_proj": {
1061 "quant_algo": "W4A16_NVFP4",
1062 "group_size": 16
1063 },
1064 "model.language_model.layers.24.linear_attn.out_proj": {
1065 "quant_algo": "FP8"
1066 },
1067 "model.language_model.layers.24.linear_attn.in_proj_qkv": {
1068 "quant_algo": "FP8"
1069 },
1070 "model.language_model.layers.24.linear_attn.in_proj_z": {
1071 "quant_algo": "FP8"
1072 },
1073 "model.language_model.layers.24.mlp.experts": {
1074 "quant_algo": "W4A16_NVFP4",
1075 "group_size": 16
1076 },
1077 "model.language_model.layers.24.mlp.shared_expert.gate_proj": {
1078 "quant_algo": "W4A16_NVFP4",
1079 "group_size": 16
1080 },
1081 "model.language_model.layers.24.mlp.shared_expert.up_proj": {
1082 "quant_algo": "W4A16_NVFP4",
1083 "group_size": 16
1084 },
1085 "model.language_model.layers.24.mlp.shared_expert.down_proj": {
1086 "quant_algo": "W4A16_NVFP4",
1087 "group_size": 16
1088 },
1089 "model.language_model.layers.25.linear_attn.out_proj": {
1090 "quant_algo": "FP8"
1091 },
1092 "model.language_model.layers.25.linear_attn.in_proj_qkv": {
1093 "quant_algo": "FP8"
1094 },
1095 "model.language_model.layers.25.linear_attn.in_proj_z": {
1096 "quant_algo": "FP8"
1097 },
1098 "model.language_model.layers.25.mlp.experts": {
1099 "quant_algo": "W4A16_NVFP4",
1100 "group_size": 16
1101 },
1102 "model.language_model.layers.25.mlp.shared_expert.gate_proj": {
1103 "quant_algo": "W4A16_NVFP4",
1104 "group_size": 16
1105 },
1106 "model.language_model.layers.25.mlp.shared_expert.up_proj": {
1107 "quant_algo": "W4A16_NVFP4",
1108 "group_size": 16
1109 },
1110 "model.language_model.layers.25.mlp.shared_expert.down_proj": {
1111 "quant_algo": "W4A16_NVFP4",
1112 "group_size": 16
1113 },
1114 "model.language_model.layers.26.linear_attn.out_proj": {
1115 "quant_algo": "FP8"
1116 },
1117 "model.language_model.layers.26.linear_attn.in_proj_qkv": {
1118 "quant_algo": "FP8"
1119 },
1120 "model.language_model.layers.26.linear_attn.in_proj_z": {
1121 "quant_algo": "FP8"
1122 },
1123 "model.language_model.layers.26.mlp.experts": {
1124 "quant_algo": "W4A16_NVFP4",
1125 "group_size": 16
1126 },
1127 "model.language_model.layers.26.mlp.shared_expert.gate_proj": {
1128 "quant_algo": "W4A16_NVFP4",
1129 "group_size": 16
1130 },
1131 "model.language_model.layers.26.mlp.shared_expert.up_proj": {
1132 "quant_algo": "W4A16_NVFP4",
1133 "group_size": 16
1134 },
1135 "model.language_model.layers.26.mlp.shared_expert.down_proj": {
1136 "quant_algo": "W4A16_NVFP4",
1137 "group_size": 16
1138 },
1139 "model.language_model.layers.27.self_attn.q_proj": {
1140 "quant_algo": "FP8"
1141 },
1142 "model.language_model.layers.27.self_attn.k_proj": {
1143 "quant_algo": "FP8"
1144 },
1145 "model.language_model.layers.27.self_attn.v_proj": {
1146 "quant_algo": "FP8"
1147 },
1148 "model.language_model.layers.27.self_attn.o_proj": {
1149 "quant_algo": "FP8"
1150 },
1151 "model.language_model.layers.27.mlp.experts": {
1152 "quant_algo": "W4A16_NVFP4",
1153 "group_size": 16
1154 },
1155 "model.language_model.layers.27.mlp.shared_expert.gate_proj": {
1156 "quant_algo": "W4A16_NVFP4",
1157 "group_size": 16
1158 },
1159 "model.language_model.layers.27.mlp.shared_expert.up_proj": {
1160 "quant_algo": "W4A16_NVFP4",
1161 "group_size": 16
1162 },
1163 "model.language_model.layers.27.mlp.shared_expert.down_proj": {
1164 "quant_algo": "W4A16_NVFP4",
1165 "group_size": 16
1166 },
1167 "model.language_model.layers.28.linear_attn.out_proj": {
1168 "quant_algo": "FP8"
1169 },
1170 "model.language_model.layers.28.linear_attn.in_proj_qkv": {
1171 "quant_algo": "FP8"
1172 },
1173 "model.language_model.layers.28.linear_attn.in_proj_z": {
1174 "quant_algo": "FP8"
1175 },
1176 "model.language_model.layers.28.mlp.experts": {
1177 "quant_algo": "W4A16_NVFP4",
1178 "group_size": 16
1179 },
1180 "model.language_model.layers.28.mlp.shared_expert.gate_proj": {
1181 "quant_algo": "W4A16_NVFP4",
1182 "group_size": 16
1183 },
1184 "model.language_model.layers.28.mlp.shared_expert.up_proj": {
1185 "quant_algo": "W4A16_NVFP4",
1186 "group_size": 16
1187 },
1188 "model.language_model.layers.28.mlp.shared_expert.down_proj": {
1189 "quant_algo": "W4A16_NVFP4",
1190 "group_size": 16
1191 },
1192 "model.language_model.layers.29.linear_attn.out_proj": {
1193 "quant_algo": "FP8"
1194 },
1195 "model.language_model.layers.29.linear_attn.in_proj_qkv": {
1196 "quant_algo": "FP8"
1197 },
1198 "model.language_model.layers.29.linear_attn.in_proj_z": {
1199 "quant_algo": "FP8"
1200 },
1201 "model.language_model.layers.29.mlp.experts": {
1202 "quant_algo": "W4A16_NVFP4",
1203 "group_size": 16
1204 },
1205 "model.language_model.layers.29.mlp.shared_expert.gate_proj": {
1206 "quant_algo": "W4A16_NVFP4",
1207 "group_size": 16
1208 },
1209 "model.language_model.layers.29.mlp.shared_expert.up_proj": {
1210 "quant_algo": "W4A16_NVFP4",
1211 "group_size": 16
1212 },
1213 "model.language_model.layers.29.mlp.shared_expert.down_proj": {
1214 "quant_algo": "W4A16_NVFP4",
1215 "group_size": 16
1216 },
1217 "model.language_model.layers.30.linear_attn.out_proj": {
1218 "quant_algo": "FP8"
1219 },
1220 "model.language_model.layers.30.linear_attn.in_proj_qkv": {
1221 "quant_algo": "FP8"
1222 },
1223 "model.language_model.layers.30.linear_attn.in_proj_z": {
1224 "quant_algo": "FP8"
1225 },
1226 "model.language_model.layers.30.mlp.experts": {
1227 "quant_algo": "W4A16_NVFP4",
1228 "group_size": 16
1229 },
1230 "model.language_model.layers.30.mlp.shared_expert.gate_proj": {
1231 "quant_algo": "W4A16_NVFP4",
1232 "group_size": 16
1233 },
1234 "model.language_model.layers.30.mlp.shared_expert.up_proj": {
1235 "quant_algo": "W4A16_NVFP4",
1236 "group_size": 16
1237 },
1238 "model.language_model.layers.30.mlp.shared_expert.down_proj": {
1239 "quant_algo": "W4A16_NVFP4",
1240 "group_size": 16
1241 },
1242 "model.language_model.layers.31.self_attn.q_proj": {
1243 "quant_algo": "FP8"
1244 },
1245 "model.language_model.layers.31.self_attn.k_proj": {
1246 "quant_algo": "FP8"
1247 },
1248 "model.language_model.layers.31.self_attn.v_proj": {
1249 "quant_algo": "FP8"
1250 },
1251 "model.language_model.layers.31.self_attn.o_proj": {
1252 "quant_algo": "FP8"
1253 },
1254 "model.language_model.layers.31.mlp.experts": {
1255 "quant_algo": "W4A16_NVFP4",
1256 "group_size": 16
1257 },
1258 "model.language_model.layers.31.mlp.shared_expert.gate_proj": {
1259 "quant_algo": "W4A16_NVFP4",
1260 "group_size": 16
1261 },
1262 "model.language_model.layers.31.mlp.shared_expert.up_proj": {
1263 "quant_algo": "W4A16_NVFP4",
1264 "group_size": 16
1265 },
1266 "model.language_model.layers.31.mlp.shared_expert.down_proj": {
1267 "quant_algo": "W4A16_NVFP4",
1268 "group_size": 16
1269 },
1270 "model.language_model.layers.32.linear_attn.out_proj": {
1271 "quant_algo": "FP8"
1272 },
1273 "model.language_model.layers.32.linear_attn.in_proj_qkv": {
1274 "quant_algo": "FP8"
1275 },
1276 "model.language_model.layers.32.linear_attn.in_proj_z": {
1277 "quant_algo": "FP8"
1278 },
1279 "model.language_model.layers.32.mlp.experts": {
1280 "quant_algo": "W4A16_NVFP4",
1281 "group_size": 16
1282 },
1283 "model.language_model.layers.32.mlp.shared_expert.gate_proj": {
1284 "quant_algo": "W4A16_NVFP4",
1285 "group_size": 16
1286 },
1287 "model.language_model.layers.32.mlp.shared_expert.up_proj": {
1288 "quant_algo": "W4A16_NVFP4",
1289 "group_size": 16
1290 },
1291 "model.language_model.layers.32.mlp.shared_expert.down_proj": {
1292 "quant_algo": "W4A16_NVFP4",
1293 "group_size": 16
1294 },
1295 "model.language_model.layers.33.linear_attn.out_proj": {
1296 "quant_algo": "FP8"
1297 },
1298 "model.language_model.layers.33.linear_attn.in_proj_qkv": {
1299 "quant_algo": "FP8"
1300 },
1301 "model.language_model.layers.33.linear_attn.in_proj_z": {
1302 "quant_algo": "FP8"
1303 },
1304 "model.language_model.layers.33.mlp.experts": {
1305 "quant_algo": "W4A16_NVFP4",
1306 "group_size": 16
1307 },
1308 "model.language_model.layers.33.mlp.shared_expert.gate_proj": {
1309 "quant_algo": "W4A16_NVFP4",
1310 "group_size": 16
1311 },
1312 "model.language_model.layers.33.mlp.shared_expert.up_proj": {
1313 "quant_algo": "W4A16_NVFP4",
1314 "group_size": 16
1315 },
1316 "model.language_model.layers.33.mlp.shared_expert.down_proj": {
1317 "quant_algo": "W4A16_NVFP4",
1318 "group_size": 16
1319 },
1320 "model.language_model.layers.34.linear_attn.out_proj": {
1321 "quant_algo": "FP8"
1322 },
1323 "model.language_model.layers.34.linear_attn.in_proj_qkv": {
1324 "quant_algo": "FP8"
1325 },
1326 "model.language_model.layers.34.linear_attn.in_proj_z": {
1327 "quant_algo": "FP8"
1328 },
1329 "model.language_model.layers.34.mlp.experts": {
1330 "quant_algo": "W4A16_NVFP4",
1331 "group_size": 16
1332 },
1333 "model.language_model.layers.34.mlp.shared_expert.gate_proj": {
1334 "quant_algo": "W4A16_NVFP4",
1335 "group_size": 16
1336 },
1337 "model.language_model.layers.34.mlp.shared_expert.up_proj": {
1338 "quant_algo": "W4A16_NVFP4",
1339 "group_size": 16
1340 },
1341 "model.language_model.layers.34.mlp.shared_expert.down_proj": {
1342 "quant_algo": "W4A16_NVFP4",
1343 "group_size": 16
1344 },
1345 "model.language_model.layers.35.self_attn.q_proj": {
1346 "quant_algo": "FP8"
1347 },
1348 "model.language_model.layers.35.self_attn.k_proj": {
1349 "quant_algo": "FP8"
1350 },
1351 "model.language_model.layers.35.self_attn.v_proj": {
1352 "quant_algo": "FP8"
1353 },
1354 "model.language_model.layers.35.self_attn.o_proj": {
1355 "quant_algo": "FP8"
1356 },
1357 "model.language_model.layers.35.mlp.experts": {
1358 "quant_algo": "W4A16_NVFP4",
1359 "group_size": 16
1360 },
1361 "model.language_model.layers.35.mlp.shared_expert.gate_proj": {
1362 "quant_algo": "W4A16_NVFP4",
1363 "group_size": 16
1364 },
1365 "model.language_model.layers.35.mlp.shared_expert.up_proj": {
1366 "quant_algo": "W4A16_NVFP4",
1367 "group_size": 16
1368 },
1369 "model.language_model.layers.35.mlp.shared_expert.down_proj": {
1370 "quant_algo": "W4A16_NVFP4",
1371 "group_size": 16
1372 },
1373 "model.language_model.layers.36.linear_attn.out_proj": {
1374 "quant_algo": "FP8"
1375 },
1376 "model.language_model.layers.36.linear_attn.in_proj_qkv": {
1377 "quant_algo": "FP8"
1378 },
1379 "model.language_model.layers.36.linear_attn.in_proj_z": {
1380 "quant_algo": "FP8"
1381 },
1382 "model.language_model.layers.36.mlp.experts": {
1383 "quant_algo": "W4A16_NVFP4",
1384 "group_size": 16
1385 },
1386 "model.language_model.layers.36.mlp.shared_expert.gate_proj": {
1387 "quant_algo": "W4A16_NVFP4",
1388 "group_size": 16
1389 },
1390 "model.language_model.layers.36.mlp.shared_expert.up_proj": {
1391 "quant_algo": "W4A16_NVFP4",
1392 "group_size": 16
1393 },
1394 "model.language_model.layers.36.mlp.shared_expert.down_proj": {
1395 "quant_algo": "W4A16_NVFP4",
1396 "group_size": 16
1397 },
1398 "model.language_model.layers.37.linear_attn.out_proj": {
1399 "quant_algo": "FP8"
1400 },
1401 "model.language_model.layers.37.linear_attn.in_proj_qkv": {
1402 "quant_algo": "FP8"
1403 },
1404 "model.language_model.layers.37.linear_attn.in_proj_z": {
1405 "quant_algo": "FP8"
1406 },
1407 "model.language_model.layers.37.mlp.experts": {
1408 "quant_algo": "W4A16_NVFP4",
1409 "group_size": 16
1410 },
1411 "model.language_model.layers.37.mlp.shared_expert.gate_proj": {
1412 "quant_algo": "W4A16_NVFP4",
1413 "group_size": 16
1414 },
1415 "model.language_model.layers.37.mlp.shared_expert.up_proj": {
1416 "quant_algo": "W4A16_NVFP4",
1417 "group_size": 16
1418 },
1419 "model.language_model.layers.37.mlp.shared_expert.down_proj": {
1420 "quant_algo": "W4A16_NVFP4",
1421 "group_size": 16
1422 },
1423 "model.language_model.layers.38.linear_attn.out_proj": {
1424 "quant_algo": "FP8"
1425 },
1426 "model.language_model.layers.38.linear_attn.in_proj_qkv": {
1427 "quant_algo": "FP8"
1428 },
1429 "model.language_model.layers.38.linear_attn.in_proj_z": {
1430 "quant_algo": "FP8"
1431 },
1432 "model.language_model.layers.38.mlp.experts": {
1433 "quant_algo": "W4A16_NVFP4",
1434 "group_size": 16
1435 },
1436 "model.language_model.layers.38.mlp.shared_expert.gate_proj": {
1437 "quant_algo": "W4A16_NVFP4",
1438 "group_size": 16
1439 },
1440 "model.language_model.layers.38.mlp.shared_expert.up_proj": {
1441 "quant_algo": "W4A16_NVFP4",
1442 "group_size": 16
1443 },
1444 "model.language_model.layers.38.mlp.shared_expert.down_proj": {
1445 "quant_algo": "W4A16_NVFP4",
1446 "group_size": 16
1447 },
1448 "model.language_model.layers.39.self_attn.q_proj": {
1449 "quant_algo": "FP8"
1450 },
1451 "model.language_model.layers.39.self_attn.k_proj": {
1452 "quant_algo": "FP8"
1453 },
1454 "model.language_model.layers.39.self_attn.v_proj": {
1455 "quant_algo": "FP8"
1456 },
1457 "model.language_model.layers.39.self_attn.o_proj": {
1458 "quant_algo": "FP8"
1459 },
1460 "model.language_model.layers.39.mlp.experts": {
1461 "quant_algo": "W4A16_NVFP4",
1462 "group_size": 16
1463 },
1464 "model.language_model.layers.39.mlp.shared_expert.gate_proj": {
1465 "quant_algo": "W4A16_NVFP4",
1466 "group_size": 16
1467 },
1468 "model.language_model.layers.39.mlp.shared_expert.up_proj": {
1469 "quant_algo": "W4A16_NVFP4",
1470 "group_size": 16
1471 },
1472 "model.language_model.layers.39.mlp.shared_expert.down_proj": {
1473 "quant_algo": "W4A16_NVFP4",
1474 "group_size": 16
1475 },
1476 "lm_head": {
1477 "quant_algo": "W4A16_NVFP4",
1478 "group_size": 16
1479 }
1480 },
1481 "ignore": [
1482 "mtp.layers.0*",
1483 "mtp*"
1484 ],
1485 "quant_algo": "MIXED_PRECISION",
1486 "producer": {
1487 "name": "modelopt",
1488 "version": "0.37.0"
1489 },
1490 "quant_method": "modelopt"
1491 }
1492 }
1493