config.json
23.5 KB · 494 lines · json Raw
1 {
2 "architectures": [
3 "Gemma4ForConditionalGeneration"
4 ],
5 "audio_config": null,
6 "audio_token_id": 258881,
7 "boa_token_id": 256000,
8 "boi_token_id": 255999,
9 "dtype": "bfloat16",
10 "eoa_token_id": 258883,
11 "eoa_token_index": 258883,
12 "eoi_token_id": 258882,
13 "eos_token_id": [
14 1,
15 106
16 ],
17 "image_token_id": 258880,
18 "initializer_range": 0.02,
19 "model_type": "gemma4",
20 "quantization_config": {
21 "config_groups": {
22 "group_0": {
23 "format": "pack-quantized",
24 "input_activations": null,
25 "output_activations": null,
26 "targets": [
27 "Linear"
28 ],
29 "weights": {
30 "actorder": null,
31 "block_structure": null,
32 "dynamic": false,
33 "group_size": 32,
34 "num_bits": 4,
35 "observer": "mse",
36 "observer_kwargs": {},
37 "scale_dtype": null,
38 "strategy": "group",
39 "symmetric": true,
40 "type": "int",
41 "zp_dtype": null
42 }
43 }
44 },
45 "format": "pack-quantized",
46 "global_compression_ratio": null,
47 "ignore": [
48 "model.language_model.layers.0.mlp.gate_proj",
49 "model.language_model.layers.0.mlp.up_proj",
50 "model.language_model.layers.0.mlp.down_proj",
51 "model.language_model.layers.0.router.proj",
52 "model.language_model.layers.1.mlp.gate_proj",
53 "model.language_model.layers.1.mlp.up_proj",
54 "model.language_model.layers.1.mlp.down_proj",
55 "model.language_model.layers.1.router.proj",
56 "model.language_model.layers.2.mlp.gate_proj",
57 "model.language_model.layers.2.mlp.up_proj",
58 "model.language_model.layers.2.mlp.down_proj",
59 "model.language_model.layers.2.router.proj",
60 "model.language_model.layers.3.mlp.gate_proj",
61 "model.language_model.layers.3.mlp.up_proj",
62 "model.language_model.layers.3.mlp.down_proj",
63 "model.language_model.layers.3.router.proj",
64 "model.language_model.layers.4.mlp.gate_proj",
65 "model.language_model.layers.4.mlp.up_proj",
66 "model.language_model.layers.4.mlp.down_proj",
67 "model.language_model.layers.4.router.proj",
68 "model.language_model.layers.5.mlp.gate_proj",
69 "model.language_model.layers.5.mlp.up_proj",
70 "model.language_model.layers.5.mlp.down_proj",
71 "model.language_model.layers.5.router.proj",
72 "model.language_model.layers.6.mlp.gate_proj",
73 "model.language_model.layers.6.mlp.up_proj",
74 "model.language_model.layers.6.mlp.down_proj",
75 "model.language_model.layers.6.router.proj",
76 "model.language_model.layers.7.mlp.gate_proj",
77 "model.language_model.layers.7.mlp.up_proj",
78 "model.language_model.layers.7.mlp.down_proj",
79 "model.language_model.layers.7.router.proj",
80 "model.language_model.layers.8.mlp.gate_proj",
81 "model.language_model.layers.8.mlp.up_proj",
82 "model.language_model.layers.8.mlp.down_proj",
83 "model.language_model.layers.8.router.proj",
84 "model.language_model.layers.9.mlp.gate_proj",
85 "model.language_model.layers.9.mlp.up_proj",
86 "model.language_model.layers.9.mlp.down_proj",
87 "model.language_model.layers.9.router.proj",
88 "model.language_model.layers.10.mlp.gate_proj",
89 "model.language_model.layers.10.mlp.up_proj",
90 "model.language_model.layers.10.mlp.down_proj",
91 "model.language_model.layers.10.router.proj",
92 "model.language_model.layers.11.mlp.gate_proj",
93 "model.language_model.layers.11.mlp.up_proj",
94 "model.language_model.layers.11.mlp.down_proj",
95 "model.language_model.layers.11.router.proj",
96 "model.language_model.layers.12.mlp.gate_proj",
97 "model.language_model.layers.12.mlp.up_proj",
98 "model.language_model.layers.12.mlp.down_proj",
99 "model.language_model.layers.12.router.proj",
100 "model.language_model.layers.13.mlp.gate_proj",
101 "model.language_model.layers.13.mlp.up_proj",
102 "model.language_model.layers.13.mlp.down_proj",
103 "model.language_model.layers.13.router.proj",
104 "model.language_model.layers.14.mlp.gate_proj",
105 "model.language_model.layers.14.mlp.up_proj",
106 "model.language_model.layers.14.mlp.down_proj",
107 "model.language_model.layers.14.router.proj",
108 "model.language_model.layers.15.mlp.gate_proj",
109 "model.language_model.layers.15.mlp.up_proj",
110 "model.language_model.layers.15.mlp.down_proj",
111 "model.language_model.layers.15.router.proj",
112 "model.language_model.layers.16.mlp.gate_proj",
113 "model.language_model.layers.16.mlp.up_proj",
114 "model.language_model.layers.16.mlp.down_proj",
115 "model.language_model.layers.16.router.proj",
116 "model.language_model.layers.17.mlp.gate_proj",
117 "model.language_model.layers.17.mlp.up_proj",
118 "model.language_model.layers.17.mlp.down_proj",
119 "model.language_model.layers.17.router.proj",
120 "model.language_model.layers.18.mlp.gate_proj",
121 "model.language_model.layers.18.mlp.up_proj",
122 "model.language_model.layers.18.mlp.down_proj",
123 "model.language_model.layers.18.router.proj",
124 "model.language_model.layers.19.mlp.gate_proj",
125 "model.language_model.layers.19.mlp.up_proj",
126 "model.language_model.layers.19.mlp.down_proj",
127 "model.language_model.layers.19.router.proj",
128 "model.language_model.layers.20.mlp.gate_proj",
129 "model.language_model.layers.20.mlp.up_proj",
130 "model.language_model.layers.20.mlp.down_proj",
131 "model.language_model.layers.20.router.proj",
132 "model.language_model.layers.21.mlp.gate_proj",
133 "model.language_model.layers.21.mlp.up_proj",
134 "model.language_model.layers.21.mlp.down_proj",
135 "model.language_model.layers.21.router.proj",
136 "model.language_model.layers.22.mlp.gate_proj",
137 "model.language_model.layers.22.mlp.up_proj",
138 "model.language_model.layers.22.mlp.down_proj",
139 "model.language_model.layers.22.router.proj",
140 "model.language_model.layers.23.mlp.gate_proj",
141 "model.language_model.layers.23.mlp.up_proj",
142 "model.language_model.layers.23.mlp.down_proj",
143 "model.language_model.layers.23.router.proj",
144 "model.language_model.layers.24.mlp.gate_proj",
145 "model.language_model.layers.24.mlp.up_proj",
146 "model.language_model.layers.24.mlp.down_proj",
147 "model.language_model.layers.24.router.proj",
148 "model.language_model.layers.25.mlp.gate_proj",
149 "model.language_model.layers.25.mlp.up_proj",
150 "model.language_model.layers.25.mlp.down_proj",
151 "model.language_model.layers.25.router.proj",
152 "model.language_model.layers.26.mlp.gate_proj",
153 "model.language_model.layers.26.mlp.up_proj",
154 "model.language_model.layers.26.mlp.down_proj",
155 "model.language_model.layers.26.router.proj",
156 "model.language_model.layers.27.mlp.gate_proj",
157 "model.language_model.layers.27.mlp.up_proj",
158 "model.language_model.layers.27.mlp.down_proj",
159 "model.language_model.layers.27.router.proj",
160 "model.language_model.layers.28.mlp.gate_proj",
161 "model.language_model.layers.28.mlp.up_proj",
162 "model.language_model.layers.28.mlp.down_proj",
163 "model.language_model.layers.28.router.proj",
164 "model.language_model.layers.29.mlp.gate_proj",
165 "model.language_model.layers.29.mlp.up_proj",
166 "model.language_model.layers.29.mlp.down_proj",
167 "model.language_model.layers.29.router.proj",
168 "model.vision_tower.patch_embedder.input_proj",
169 "model.vision_tower.encoder.layers.0.self_attn.q_proj.linear",
170 "model.vision_tower.encoder.layers.0.self_attn.k_proj.linear",
171 "model.vision_tower.encoder.layers.0.self_attn.v_proj.linear",
172 "model.vision_tower.encoder.layers.0.self_attn.o_proj.linear",
173 "model.vision_tower.encoder.layers.0.mlp.gate_proj.linear",
174 "model.vision_tower.encoder.layers.0.mlp.up_proj.linear",
175 "model.vision_tower.encoder.layers.0.mlp.down_proj.linear",
176 "model.vision_tower.encoder.layers.1.self_attn.q_proj.linear",
177 "model.vision_tower.encoder.layers.1.self_attn.k_proj.linear",
178 "model.vision_tower.encoder.layers.1.self_attn.v_proj.linear",
179 "model.vision_tower.encoder.layers.1.self_attn.o_proj.linear",
180 "model.vision_tower.encoder.layers.1.mlp.gate_proj.linear",
181 "model.vision_tower.encoder.layers.1.mlp.up_proj.linear",
182 "model.vision_tower.encoder.layers.1.mlp.down_proj.linear",
183 "model.vision_tower.encoder.layers.2.self_attn.q_proj.linear",
184 "model.vision_tower.encoder.layers.2.self_attn.k_proj.linear",
185 "model.vision_tower.encoder.layers.2.self_attn.v_proj.linear",
186 "model.vision_tower.encoder.layers.2.self_attn.o_proj.linear",
187 "model.vision_tower.encoder.layers.2.mlp.gate_proj.linear",
188 "model.vision_tower.encoder.layers.2.mlp.up_proj.linear",
189 "model.vision_tower.encoder.layers.2.mlp.down_proj.linear",
190 "model.vision_tower.encoder.layers.3.self_attn.q_proj.linear",
191 "model.vision_tower.encoder.layers.3.self_attn.k_proj.linear",
192 "model.vision_tower.encoder.layers.3.self_attn.v_proj.linear",
193 "model.vision_tower.encoder.layers.3.self_attn.o_proj.linear",
194 "model.vision_tower.encoder.layers.3.mlp.gate_proj.linear",
195 "model.vision_tower.encoder.layers.3.mlp.up_proj.linear",
196 "model.vision_tower.encoder.layers.3.mlp.down_proj.linear",
197 "model.vision_tower.encoder.layers.4.self_attn.q_proj.linear",
198 "model.vision_tower.encoder.layers.4.self_attn.k_proj.linear",
199 "model.vision_tower.encoder.layers.4.self_attn.v_proj.linear",
200 "model.vision_tower.encoder.layers.4.self_attn.o_proj.linear",
201 "model.vision_tower.encoder.layers.4.mlp.gate_proj.linear",
202 "model.vision_tower.encoder.layers.4.mlp.up_proj.linear",
203 "model.vision_tower.encoder.layers.4.mlp.down_proj.linear",
204 "model.vision_tower.encoder.layers.5.self_attn.q_proj.linear",
205 "model.vision_tower.encoder.layers.5.self_attn.k_proj.linear",
206 "model.vision_tower.encoder.layers.5.self_attn.v_proj.linear",
207 "model.vision_tower.encoder.layers.5.self_attn.o_proj.linear",
208 "model.vision_tower.encoder.layers.5.mlp.gate_proj.linear",
209 "model.vision_tower.encoder.layers.5.mlp.up_proj.linear",
210 "model.vision_tower.encoder.layers.5.mlp.down_proj.linear",
211 "model.vision_tower.encoder.layers.6.self_attn.q_proj.linear",
212 "model.vision_tower.encoder.layers.6.self_attn.k_proj.linear",
213 "model.vision_tower.encoder.layers.6.self_attn.v_proj.linear",
214 "model.vision_tower.encoder.layers.6.self_attn.o_proj.linear",
215 "model.vision_tower.encoder.layers.6.mlp.gate_proj.linear",
216 "model.vision_tower.encoder.layers.6.mlp.up_proj.linear",
217 "model.vision_tower.encoder.layers.6.mlp.down_proj.linear",
218 "model.vision_tower.encoder.layers.7.self_attn.q_proj.linear",
219 "model.vision_tower.encoder.layers.7.self_attn.k_proj.linear",
220 "model.vision_tower.encoder.layers.7.self_attn.v_proj.linear",
221 "model.vision_tower.encoder.layers.7.self_attn.o_proj.linear",
222 "model.vision_tower.encoder.layers.7.mlp.gate_proj.linear",
223 "model.vision_tower.encoder.layers.7.mlp.up_proj.linear",
224 "model.vision_tower.encoder.layers.7.mlp.down_proj.linear",
225 "model.vision_tower.encoder.layers.8.self_attn.q_proj.linear",
226 "model.vision_tower.encoder.layers.8.self_attn.k_proj.linear",
227 "model.vision_tower.encoder.layers.8.self_attn.v_proj.linear",
228 "model.vision_tower.encoder.layers.8.self_attn.o_proj.linear",
229 "model.vision_tower.encoder.layers.8.mlp.gate_proj.linear",
230 "model.vision_tower.encoder.layers.8.mlp.up_proj.linear",
231 "model.vision_tower.encoder.layers.8.mlp.down_proj.linear",
232 "model.vision_tower.encoder.layers.9.self_attn.q_proj.linear",
233 "model.vision_tower.encoder.layers.9.self_attn.k_proj.linear",
234 "model.vision_tower.encoder.layers.9.self_attn.v_proj.linear",
235 "model.vision_tower.encoder.layers.9.self_attn.o_proj.linear",
236 "model.vision_tower.encoder.layers.9.mlp.gate_proj.linear",
237 "model.vision_tower.encoder.layers.9.mlp.up_proj.linear",
238 "model.vision_tower.encoder.layers.9.mlp.down_proj.linear",
239 "model.vision_tower.encoder.layers.10.self_attn.q_proj.linear",
240 "model.vision_tower.encoder.layers.10.self_attn.k_proj.linear",
241 "model.vision_tower.encoder.layers.10.self_attn.v_proj.linear",
242 "model.vision_tower.encoder.layers.10.self_attn.o_proj.linear",
243 "model.vision_tower.encoder.layers.10.mlp.gate_proj.linear",
244 "model.vision_tower.encoder.layers.10.mlp.up_proj.linear",
245 "model.vision_tower.encoder.layers.10.mlp.down_proj.linear",
246 "model.vision_tower.encoder.layers.11.self_attn.q_proj.linear",
247 "model.vision_tower.encoder.layers.11.self_attn.k_proj.linear",
248 "model.vision_tower.encoder.layers.11.self_attn.v_proj.linear",
249 "model.vision_tower.encoder.layers.11.self_attn.o_proj.linear",
250 "model.vision_tower.encoder.layers.11.mlp.gate_proj.linear",
251 "model.vision_tower.encoder.layers.11.mlp.up_proj.linear",
252 "model.vision_tower.encoder.layers.11.mlp.down_proj.linear",
253 "model.vision_tower.encoder.layers.12.self_attn.q_proj.linear",
254 "model.vision_tower.encoder.layers.12.self_attn.k_proj.linear",
255 "model.vision_tower.encoder.layers.12.self_attn.v_proj.linear",
256 "model.vision_tower.encoder.layers.12.self_attn.o_proj.linear",
257 "model.vision_tower.encoder.layers.12.mlp.gate_proj.linear",
258 "model.vision_tower.encoder.layers.12.mlp.up_proj.linear",
259 "model.vision_tower.encoder.layers.12.mlp.down_proj.linear",
260 "model.vision_tower.encoder.layers.13.self_attn.q_proj.linear",
261 "model.vision_tower.encoder.layers.13.self_attn.k_proj.linear",
262 "model.vision_tower.encoder.layers.13.self_attn.v_proj.linear",
263 "model.vision_tower.encoder.layers.13.self_attn.o_proj.linear",
264 "model.vision_tower.encoder.layers.13.mlp.gate_proj.linear",
265 "model.vision_tower.encoder.layers.13.mlp.up_proj.linear",
266 "model.vision_tower.encoder.layers.13.mlp.down_proj.linear",
267 "model.vision_tower.encoder.layers.14.self_attn.q_proj.linear",
268 "model.vision_tower.encoder.layers.14.self_attn.k_proj.linear",
269 "model.vision_tower.encoder.layers.14.self_attn.v_proj.linear",
270 "model.vision_tower.encoder.layers.14.self_attn.o_proj.linear",
271 "model.vision_tower.encoder.layers.14.mlp.gate_proj.linear",
272 "model.vision_tower.encoder.layers.14.mlp.up_proj.linear",
273 "model.vision_tower.encoder.layers.14.mlp.down_proj.linear",
274 "model.vision_tower.encoder.layers.15.self_attn.q_proj.linear",
275 "model.vision_tower.encoder.layers.15.self_attn.k_proj.linear",
276 "model.vision_tower.encoder.layers.15.self_attn.v_proj.linear",
277 "model.vision_tower.encoder.layers.15.self_attn.o_proj.linear",
278 "model.vision_tower.encoder.layers.15.mlp.gate_proj.linear",
279 "model.vision_tower.encoder.layers.15.mlp.up_proj.linear",
280 "model.vision_tower.encoder.layers.15.mlp.down_proj.linear",
281 "model.vision_tower.encoder.layers.16.self_attn.q_proj.linear",
282 "model.vision_tower.encoder.layers.16.self_attn.k_proj.linear",
283 "model.vision_tower.encoder.layers.16.self_attn.v_proj.linear",
284 "model.vision_tower.encoder.layers.16.self_attn.o_proj.linear",
285 "model.vision_tower.encoder.layers.16.mlp.gate_proj.linear",
286 "model.vision_tower.encoder.layers.16.mlp.up_proj.linear",
287 "model.vision_tower.encoder.layers.16.mlp.down_proj.linear",
288 "model.vision_tower.encoder.layers.17.self_attn.q_proj.linear",
289 "model.vision_tower.encoder.layers.17.self_attn.k_proj.linear",
290 "model.vision_tower.encoder.layers.17.self_attn.v_proj.linear",
291 "model.vision_tower.encoder.layers.17.self_attn.o_proj.linear",
292 "model.vision_tower.encoder.layers.17.mlp.gate_proj.linear",
293 "model.vision_tower.encoder.layers.17.mlp.up_proj.linear",
294 "model.vision_tower.encoder.layers.17.mlp.down_proj.linear",
295 "model.vision_tower.encoder.layers.18.self_attn.q_proj.linear",
296 "model.vision_tower.encoder.layers.18.self_attn.k_proj.linear",
297 "model.vision_tower.encoder.layers.18.self_attn.v_proj.linear",
298 "model.vision_tower.encoder.layers.18.self_attn.o_proj.linear",
299 "model.vision_tower.encoder.layers.18.mlp.gate_proj.linear",
300 "model.vision_tower.encoder.layers.18.mlp.up_proj.linear",
301 "model.vision_tower.encoder.layers.18.mlp.down_proj.linear",
302 "model.vision_tower.encoder.layers.19.self_attn.q_proj.linear",
303 "model.vision_tower.encoder.layers.19.self_attn.k_proj.linear",
304 "model.vision_tower.encoder.layers.19.self_attn.v_proj.linear",
305 "model.vision_tower.encoder.layers.19.self_attn.o_proj.linear",
306 "model.vision_tower.encoder.layers.19.mlp.gate_proj.linear",
307 "model.vision_tower.encoder.layers.19.mlp.up_proj.linear",
308 "model.vision_tower.encoder.layers.19.mlp.down_proj.linear",
309 "model.vision_tower.encoder.layers.20.self_attn.q_proj.linear",
310 "model.vision_tower.encoder.layers.20.self_attn.k_proj.linear",
311 "model.vision_tower.encoder.layers.20.self_attn.v_proj.linear",
312 "model.vision_tower.encoder.layers.20.self_attn.o_proj.linear",
313 "model.vision_tower.encoder.layers.20.mlp.gate_proj.linear",
314 "model.vision_tower.encoder.layers.20.mlp.up_proj.linear",
315 "model.vision_tower.encoder.layers.20.mlp.down_proj.linear",
316 "model.vision_tower.encoder.layers.21.self_attn.q_proj.linear",
317 "model.vision_tower.encoder.layers.21.self_attn.k_proj.linear",
318 "model.vision_tower.encoder.layers.21.self_attn.v_proj.linear",
319 "model.vision_tower.encoder.layers.21.self_attn.o_proj.linear",
320 "model.vision_tower.encoder.layers.21.mlp.gate_proj.linear",
321 "model.vision_tower.encoder.layers.21.mlp.up_proj.linear",
322 "model.vision_tower.encoder.layers.21.mlp.down_proj.linear",
323 "model.vision_tower.encoder.layers.22.self_attn.q_proj.linear",
324 "model.vision_tower.encoder.layers.22.self_attn.k_proj.linear",
325 "model.vision_tower.encoder.layers.22.self_attn.v_proj.linear",
326 "model.vision_tower.encoder.layers.22.self_attn.o_proj.linear",
327 "model.vision_tower.encoder.layers.22.mlp.gate_proj.linear",
328 "model.vision_tower.encoder.layers.22.mlp.up_proj.linear",
329 "model.vision_tower.encoder.layers.22.mlp.down_proj.linear",
330 "model.vision_tower.encoder.layers.23.self_attn.q_proj.linear",
331 "model.vision_tower.encoder.layers.23.self_attn.k_proj.linear",
332 "model.vision_tower.encoder.layers.23.self_attn.v_proj.linear",
333 "model.vision_tower.encoder.layers.23.self_attn.o_proj.linear",
334 "model.vision_tower.encoder.layers.23.mlp.gate_proj.linear",
335 "model.vision_tower.encoder.layers.23.mlp.up_proj.linear",
336 "model.vision_tower.encoder.layers.23.mlp.down_proj.linear",
337 "model.vision_tower.encoder.layers.24.self_attn.q_proj.linear",
338 "model.vision_tower.encoder.layers.24.self_attn.k_proj.linear",
339 "model.vision_tower.encoder.layers.24.self_attn.v_proj.linear",
340 "model.vision_tower.encoder.layers.24.self_attn.o_proj.linear",
341 "model.vision_tower.encoder.layers.24.mlp.gate_proj.linear",
342 "model.vision_tower.encoder.layers.24.mlp.up_proj.linear",
343 "model.vision_tower.encoder.layers.24.mlp.down_proj.linear",
344 "model.vision_tower.encoder.layers.25.self_attn.q_proj.linear",
345 "model.vision_tower.encoder.layers.25.self_attn.k_proj.linear",
346 "model.vision_tower.encoder.layers.25.self_attn.v_proj.linear",
347 "model.vision_tower.encoder.layers.25.self_attn.o_proj.linear",
348 "model.vision_tower.encoder.layers.25.mlp.gate_proj.linear",
349 "model.vision_tower.encoder.layers.25.mlp.up_proj.linear",
350 "model.vision_tower.encoder.layers.25.mlp.down_proj.linear",
351 "model.vision_tower.encoder.layers.26.self_attn.q_proj.linear",
352 "model.vision_tower.encoder.layers.26.self_attn.k_proj.linear",
353 "model.vision_tower.encoder.layers.26.self_attn.v_proj.linear",
354 "model.vision_tower.encoder.layers.26.self_attn.o_proj.linear",
355 "model.vision_tower.encoder.layers.26.mlp.gate_proj.linear",
356 "model.vision_tower.encoder.layers.26.mlp.up_proj.linear",
357 "model.vision_tower.encoder.layers.26.mlp.down_proj.linear",
358 "model.embed_vision.embedding_projection",
359 "lm_head"
360 ],
361 "kv_cache_scheme": null,
362 "quant_method": "compressed-tensors",
363 "quantization_status": "compressed",
364 "sparsity_config": {},
365 "transform_config": {},
366 "version": "0.14.1.a20260326"
367 },
368 "text_config": {
369 "attention_bias": false,
370 "attention_dropout": 0.0,
371 "attention_k_eq_v": true,
372 "bos_token_id": 2,
373 "dtype": "bfloat16",
374 "enable_moe_block": true,
375 "eos_token_id": 1,
376 "final_logit_softcapping": 30.0,
377 "global_head_dim": 512,
378 "head_dim": 256,
379 "hidden_activation": "gelu_pytorch_tanh",
380 "hidden_size": 2816,
381 "hidden_size_per_layer_input": 0,
382 "initializer_range": 0.02,
383 "intermediate_size": 2112,
384 "layer_types": [
385 "sliding_attention",
386 "sliding_attention",
387 "sliding_attention",
388 "sliding_attention",
389 "sliding_attention",
390 "full_attention",
391 "sliding_attention",
392 "sliding_attention",
393 "sliding_attention",
394 "sliding_attention",
395 "sliding_attention",
396 "full_attention",
397 "sliding_attention",
398 "sliding_attention",
399 "sliding_attention",
400 "sliding_attention",
401 "sliding_attention",
402 "full_attention",
403 "sliding_attention",
404 "sliding_attention",
405 "sliding_attention",
406 "sliding_attention",
407 "sliding_attention",
408 "full_attention",
409 "sliding_attention",
410 "sliding_attention",
411 "sliding_attention",
412 "sliding_attention",
413 "sliding_attention",
414 "full_attention"
415 ],
416 "max_position_embeddings": 262144,
417 "model_type": "gemma4_text",
418 "moe_intermediate_size": 704,
419 "num_attention_heads": 16,
420 "num_experts": 128,
421 "num_global_key_value_heads": 2,
422 "num_hidden_layers": 30,
423 "num_key_value_heads": 8,
424 "num_kv_shared_layers": 0,
425 "pad_token_id": 0,
426 "rms_norm_eps": 1e-06,
427 "rope_parameters": {
428 "full_attention": {
429 "partial_rotary_factor": 0.25,
430 "rope_theta": 1000000.0,
431 "rope_type": "proportional"
432 },
433 "sliding_attention": {
434 "rope_theta": 10000.0,
435 "rope_type": "default"
436 }
437 },
438 "sliding_window": 1024,
439 "tie_word_embeddings": true,
440 "top_k_experts": 8,
441 "use_bidirectional_attention": "vision",
442 "use_cache": true,
443 "use_double_wide_mlp": false,
444 "vocab_size": 262144,
445 "vocab_size_per_layer_input": 262144
446 },
447 "tie_word_embeddings": true,
448 "transformers_version": "5.5.0.dev0",
449 "video_token_id": 258884,
450 "vision_config": {
451 "_name_or_path": "",
452 "architectures": null,
453 "attention_bias": false,
454 "attention_dropout": 0.0,
455 "chunk_size_feed_forward": 0,
456 "default_output_length": 280,
457 "dtype": "bfloat16",
458 "global_head_dim": 72,
459 "head_dim": 72,
460 "hidden_activation": "gelu_pytorch_tanh",
461 "hidden_size": 1152,
462 "id2label": {
463 "0": "LABEL_0",
464 "1": "LABEL_1"
465 },
466 "initializer_range": 0.02,
467 "intermediate_size": 4304,
468 "is_encoder_decoder": false,
469 "label2id": {
470 "LABEL_0": 0,
471 "LABEL_1": 1
472 },
473 "max_position_embeddings": 131072,
474 "model_type": "gemma4_vision",
475 "num_attention_heads": 16,
476 "num_hidden_layers": 27,
477 "num_key_value_heads": 16,
478 "output_attentions": false,
479 "output_hidden_states": false,
480 "patch_size": 16,
481 "pooling_kernel_size": 3,
482 "position_embedding_size": 10240,
483 "problem_type": null,
484 "return_dict": true,
485 "rms_norm_eps": 1e-06,
486 "rope_parameters": {
487 "rope_theta": 100.0,
488 "rope_type": "default"
489 },
490 "standardize": true,
491 "use_clipped_linears": false
492 },
493 "vision_soft_tokens_per_image": 280
494 }