config.json
5.0 KB · 198 lines · json Raw
1 {
2 "architectures": [
3 "Gemma4ForConditionalGeneration"
4 ],
5 "audio_config": {
6 "_name_or_path": "",
7 "architectures": null,
8 "attention_chunk_size": 12,
9 "attention_context_left": 13,
10 "attention_context_right": 0,
11 "attention_invalid_logits_value": -1000000000.0,
12 "attention_logit_cap": 50.0,
13 "chunk_size_feed_forward": 0,
14 "conv_kernel_size": 5,
15 "dtype": "bfloat16",
16 "gradient_clipping": 10000000000.0,
17 "hidden_act": "silu",
18 "hidden_size": 1024,
19 "id2label": {
20 "0": "LABEL_0",
21 "1": "LABEL_1"
22 },
23 "initializer_range": 0.02,
24 "is_encoder_decoder": false,
25 "label2id": {
26 "LABEL_0": 0,
27 "LABEL_1": 1
28 },
29 "model_type": "gemma4_audio",
30 "num_attention_heads": 8,
31 "num_hidden_layers": 12,
32 "output_attentions": false,
33 "output_hidden_states": false,
34 "output_proj_dims": 1536,
35 "problem_type": null,
36 "residual_weight": 0.5,
37 "return_dict": true,
38 "rms_norm_eps": 1e-06,
39 "subsampling_conv_channels": [
40 128,
41 32
42 ],
43 "use_clipped_linears": true
44 },
45 "audio_token_id": 258881,
46 "boa_token_id": 256000,
47 "boi_token_id": 255999,
48 "dtype": "bfloat16",
49 "eoa_token_id": 258883,
50 "eoa_token_index": 258883,
51 "eoi_token_id": 258882,
52 "eos_token_id": [
53 1,
54 106
55 ],
56 "image_token_id": 258880,
57 "initializer_range": 0.02,
58 "model_type": "gemma4",
59 "text_config": {
60 "attention_bias": false,
61 "attention_dropout": 0.0,
62 "attention_k_eq_v": false,
63 "bos_token_id": 2,
64 "dtype": "bfloat16",
65 "enable_moe_block": false,
66 "eos_token_id": 1,
67 "expert_intermediate_size": null,
68 "final_logit_softcapping": 30.0,
69 "global_head_dim": 512,
70 "head_dim": 256,
71 "hidden_activation": "gelu_pytorch_tanh",
72 "hidden_size": 2560,
73 "hidden_size_per_layer_input": 256,
74 "initializer_range": 0.02,
75 "intermediate_size": 10240,
76 "layer_types": [
77 "sliding_attention",
78 "sliding_attention",
79 "sliding_attention",
80 "sliding_attention",
81 "sliding_attention",
82 "full_attention",
83 "sliding_attention",
84 "sliding_attention",
85 "sliding_attention",
86 "sliding_attention",
87 "sliding_attention",
88 "full_attention",
89 "sliding_attention",
90 "sliding_attention",
91 "sliding_attention",
92 "sliding_attention",
93 "sliding_attention",
94 "full_attention",
95 "sliding_attention",
96 "sliding_attention",
97 "sliding_attention",
98 "sliding_attention",
99 "sliding_attention",
100 "full_attention",
101 "sliding_attention",
102 "sliding_attention",
103 "sliding_attention",
104 "sliding_attention",
105 "sliding_attention",
106 "full_attention",
107 "sliding_attention",
108 "sliding_attention",
109 "sliding_attention",
110 "sliding_attention",
111 "sliding_attention",
112 "full_attention",
113 "sliding_attention",
114 "sliding_attention",
115 "sliding_attention",
116 "sliding_attention",
117 "sliding_attention",
118 "full_attention"
119 ],
120 "max_position_embeddings": 131072,
121 "model_type": "gemma4_text",
122 "num_attention_heads": 8,
123 "num_experts": null,
124 "num_global_key_value_heads": null,
125 "num_hidden_layers": 42,
126 "num_key_value_heads": 2,
127 "num_kv_shared_layers": 18,
128 "pad_token_id": 0,
129 "rms_norm_eps": 1e-06,
130 "rope_parameters": {
131 "full_attention": {
132 "partial_rotary_factor": 0.25,
133 "rope_theta": 1000000.0,
134 "rope_type": "proportional"
135 },
136 "sliding_attention": {
137 "rope_theta": 10000.0,
138 "rope_type": "default"
139 }
140 },
141 "sliding_window": 512,
142 "tie_word_embeddings": true,
143 "top_k_experts": null,
144 "use_bidirectional_attention": null,
145 "use_cache": true,
146 "use_double_wide_mlp": false,
147 "vocab_size": 262144,
148 "vocab_size_per_layer_input": 262144
149 },
150 "tie_word_embeddings": true,
151 "transformers_version": "5.5.0.dev0",
152 "video_token_id": 258884,
153 "vision_config": {
154 "_name_or_path": "",
155 "architectures": null,
156 "attention_bias": false,
157 "attention_dropout": 0.0,
158 "chunk_size_feed_forward": 0,
159 "default_output_length": 280,
160 "dtype": "bfloat16",
161 "global_head_dim": 64,
162 "head_dim": 64,
163 "hidden_activation": "gelu_pytorch_tanh",
164 "hidden_size": 768,
165 "id2label": {
166 "0": "LABEL_0",
167 "1": "LABEL_1"
168 },
169 "initializer_range": 0.02,
170 "intermediate_size": 3072,
171 "is_encoder_decoder": false,
172 "label2id": {
173 "LABEL_0": 0,
174 "LABEL_1": 1
175 },
176 "max_position_embeddings": 131072,
177 "model_type": "gemma4_vision",
178 "num_attention_heads": 12,
179 "num_hidden_layers": 16,
180 "num_key_value_heads": 12,
181 "output_attentions": false,
182 "output_hidden_states": false,
183 "patch_size": 16,
184 "pooling_kernel_size": 3,
185 "position_embedding_size": 10240,
186 "problem_type": null,
187 "return_dict": true,
188 "rms_norm_eps": 1e-06,
189 "rope_parameters": {
190 "rope_theta": 100.0,
191 "rope_type": "default"
192 },
193 "standardize": false,
194 "use_clipped_linears": true
195 },
196 "vision_soft_tokens_per_image": 280
197 }
198