config.json
5.9 KB · 231 lines · json Raw
1 {
2 "architectures": [
3 "Gemma4ForConditionalGeneration"
4 ],
5 "audio_config": null,
6 "audio_token_id": 258881,
7 "boa_token_id": 256000,
8 "boi_token_id": 255999,
9 "dtype": "bfloat16",
10 "eoa_token_id": 258883,
11 "eoa_token_index": 258883,
12 "eoi_token_id": 258882,
13 "eos_token_id": [
14 1,
15 106
16 ],
17 "image_token_id": 258880,
18 "initializer_range": 0.02,
19 "model_type": "gemma4",
20 "quantization_config": {
21 "config_groups": {
22 "FP8_BLOCK": {
23 "format": "float-quantized",
24 "input_activations": {
25 "actorder": null,
26 "block_structure": null,
27 "dynamic": true,
28 "group_size": 128,
29 "num_bits": 8,
30 "observer": null,
31 "observer_kwargs": {},
32 "scale_dtype": null,
33 "strategy": "group",
34 "symmetric": true,
35 "type": "float",
36 "zp_dtype": null
37 },
38 "output_activations": null,
39 "targets": [
40 "Linear"
41 ],
42 "weights": {
43 "actorder": null,
44 "block_structure": [
45 128,
46 128
47 ],
48 "dynamic": false,
49 "group_size": null,
50 "num_bits": 8,
51 "observer": "memoryless_minmax",
52 "observer_kwargs": {},
53 "scale_dtype": null,
54 "strategy": "block",
55 "symmetric": true,
56 "type": "float",
57 "zp_dtype": null
58 }
59 }
60 },
61 "format": "float-quantized",
62 "global_compression_ratio": null,
63 "ignore": [
64 "re:.*vision.*",
65 "lm_head",
66 "re:.*embed_tokens.*"
67 ],
68 "kv_cache_scheme": null,
69 "quant_method": "compressed-tensors",
70 "quantization_status": "compressed",
71 "sparsity_config": {},
72 "transform_config": {},
73 "version": "0.14.0.1"
74 },
75 "text_config": {
76 "attention_bias": false,
77 "attention_dropout": 0.0,
78 "attention_k_eq_v": true,
79 "bos_token_id": 2,
80 "dtype": "bfloat16",
81 "enable_moe_block": false,
82 "eos_token_id": 1,
83 "expert_intermediate_size": null,
84 "final_logit_softcapping": 30.0,
85 "global_head_dim": 512,
86 "head_dim": 256,
87 "hidden_activation": "gelu_pytorch_tanh",
88 "hidden_size": 5376,
89 "hidden_size_per_layer_input": 0,
90 "initializer_range": 0.02,
91 "intermediate_size": 21504,
92 "layer_types": [
93 "sliding_attention",
94 "sliding_attention",
95 "sliding_attention",
96 "sliding_attention",
97 "sliding_attention",
98 "full_attention",
99 "sliding_attention",
100 "sliding_attention",
101 "sliding_attention",
102 "sliding_attention",
103 "sliding_attention",
104 "full_attention",
105 "sliding_attention",
106 "sliding_attention",
107 "sliding_attention",
108 "sliding_attention",
109 "sliding_attention",
110 "full_attention",
111 "sliding_attention",
112 "sliding_attention",
113 "sliding_attention",
114 "sliding_attention",
115 "sliding_attention",
116 "full_attention",
117 "sliding_attention",
118 "sliding_attention",
119 "sliding_attention",
120 "sliding_attention",
121 "sliding_attention",
122 "full_attention",
123 "sliding_attention",
124 "sliding_attention",
125 "sliding_attention",
126 "sliding_attention",
127 "sliding_attention",
128 "full_attention",
129 "sliding_attention",
130 "sliding_attention",
131 "sliding_attention",
132 "sliding_attention",
133 "sliding_attention",
134 "full_attention",
135 "sliding_attention",
136 "sliding_attention",
137 "sliding_attention",
138 "sliding_attention",
139 "sliding_attention",
140 "full_attention",
141 "sliding_attention",
142 "sliding_attention",
143 "sliding_attention",
144 "sliding_attention",
145 "sliding_attention",
146 "full_attention",
147 "sliding_attention",
148 "sliding_attention",
149 "sliding_attention",
150 "sliding_attention",
151 "sliding_attention",
152 "full_attention"
153 ],
154 "max_position_embeddings": 262144,
155 "model_type": "gemma4_text",
156 "num_attention_heads": 32,
157 "num_experts": null,
158 "num_global_key_value_heads": 4,
159 "num_hidden_layers": 60,
160 "num_key_value_heads": 16,
161 "num_kv_shared_layers": 0,
162 "pad_token_id": 0,
163 "rms_norm_eps": 1e-06,
164 "rope_parameters": {
165 "full_attention": {
166 "partial_rotary_factor": 0.25,
167 "rope_theta": 1000000.0,
168 "rope_type": "proportional"
169 },
170 "sliding_attention": {
171 "rope_theta": 10000.0,
172 "rope_type": "default"
173 }
174 },
175 "sliding_window": 1024,
176 "tie_word_embeddings": true,
177 "top_k_experts": null,
178 "use_bidirectional_attention": "vision",
179 "use_cache": true,
180 "use_double_wide_mlp": false,
181 "vocab_size": 262144,
182 "vocab_size_per_layer_input": 262144
183 },
184 "tie_word_embeddings": true,
185 "transformers_version": "5.5.0.dev0",
186 "video_token_id": 258884,
187 "vision_config": {
188 "_name_or_path": "",
189 "architectures": null,
190 "attention_bias": false,
191 "attention_dropout": 0.0,
192 "chunk_size_feed_forward": 0,
193 "default_output_length": 280,
194 "dtype": "bfloat16",
195 "global_head_dim": 72,
196 "head_dim": 72,
197 "hidden_activation": "gelu_pytorch_tanh",
198 "hidden_size": 1152,
199 "id2label": {
200 "0": "LABEL_0",
201 "1": "LABEL_1"
202 },
203 "initializer_range": 0.02,
204 "intermediate_size": 4304,
205 "is_encoder_decoder": false,
206 "label2id": {
207 "LABEL_0": 0,
208 "LABEL_1": 1
209 },
210 "max_position_embeddings": 131072,
211 "model_type": "gemma4_vision",
212 "num_attention_heads": 16,
213 "num_hidden_layers": 27,
214 "num_key_value_heads": 16,
215 "output_attentions": false,
216 "output_hidden_states": false,
217 "patch_size": 16,
218 "pooling_kernel_size": 3,
219 "position_embedding_size": 10240,
220 "problem_type": null,
221 "return_dict": true,
222 "rms_norm_eps": 1e-06,
223 "rope_parameters": {
224 "rope_theta": 100.0,
225 "rope_type": "default"
226 },
227 "standardize": true,
228 "use_clipped_linears": false
229 },
230 "vision_soft_tokens_per_image": 280
231 }