config.json
4.8 KB · 191 lines · json Raw
1 {
2 "architectures": [
3 "Gemma4ForConditionalGeneration"
4 ],
5 "audio_config": {
6 "_name_or_path": "",
7 "architectures": null,
8 "attention_chunk_size": 12,
9 "attention_context_left": 13,
10 "attention_context_right": 0,
11 "attention_invalid_logits_value": -1000000000.0,
12 "attention_logit_cap": 50.0,
13 "chunk_size_feed_forward": 0,
14 "conv_kernel_size": 5,
15 "dtype": "bfloat16",
16 "gradient_clipping": 10000000000.0,
17 "hidden_act": "silu",
18 "hidden_size": 1024,
19 "id2label": {
20 "0": "LABEL_0",
21 "1": "LABEL_1"
22 },
23 "initializer_range": 0.02,
24 "is_encoder_decoder": false,
25 "label2id": {
26 "LABEL_0": 0,
27 "LABEL_1": 1
28 },
29 "model_type": "gemma4_audio",
30 "num_attention_heads": 8,
31 "num_hidden_layers": 12,
32 "output_attentions": false,
33 "output_hidden_states": false,
34 "output_proj_dims": 1536,
35 "problem_type": null,
36 "residual_weight": 0.5,
37 "return_dict": true,
38 "rms_norm_eps": 1e-06,
39 "subsampling_conv_channels": [
40 128,
41 32
42 ],
43 "use_clipped_linears": true
44 },
45 "audio_token_id": 258881,
46 "boa_token_id": 256000,
47 "boi_token_id": 255999,
48 "dtype": "bfloat16",
49 "eoa_token_id": 258883,
50 "eoa_token_index": 258883,
51 "eoi_token_id": 258882,
52 "eos_token_id": [
53 1,
54 106
55 ],
56 "image_token_id": 258880,
57 "initializer_range": 0.02,
58 "model_type": "gemma4",
59 "text_config": {
60 "attention_bias": false,
61 "attention_dropout": 0.0,
62 "attention_k_eq_v": false,
63 "bos_token_id": 2,
64 "dtype": "bfloat16",
65 "enable_moe_block": false,
66 "eos_token_id": 1,
67 "expert_intermediate_size": null,
68 "final_logit_softcapping": 30.0,
69 "global_head_dim": 512,
70 "head_dim": 256,
71 "hidden_activation": "gelu_pytorch_tanh",
72 "hidden_size": 1536,
73 "hidden_size_per_layer_input": 256,
74 "initializer_range": 0.02,
75 "intermediate_size": 6144,
76 "layer_types": [
77 "sliding_attention",
78 "sliding_attention",
79 "sliding_attention",
80 "sliding_attention",
81 "full_attention",
82 "sliding_attention",
83 "sliding_attention",
84 "sliding_attention",
85 "sliding_attention",
86 "full_attention",
87 "sliding_attention",
88 "sliding_attention",
89 "sliding_attention",
90 "sliding_attention",
91 "full_attention",
92 "sliding_attention",
93 "sliding_attention",
94 "sliding_attention",
95 "sliding_attention",
96 "full_attention",
97 "sliding_attention",
98 "sliding_attention",
99 "sliding_attention",
100 "sliding_attention",
101 "full_attention",
102 "sliding_attention",
103 "sliding_attention",
104 "sliding_attention",
105 "sliding_attention",
106 "full_attention",
107 "sliding_attention",
108 "sliding_attention",
109 "sliding_attention",
110 "sliding_attention",
111 "full_attention"
112 ],
113 "max_position_embeddings": 131072,
114 "model_type": "gemma4_text",
115 "num_attention_heads": 8,
116 "num_experts": null,
117 "num_global_key_value_heads": null,
118 "num_hidden_layers": 35,
119 "num_key_value_heads": 1,
120 "num_kv_shared_layers": 20,
121 "pad_token_id": 0,
122 "rms_norm_eps": 1e-06,
123 "rope_parameters": {
124 "full_attention": {
125 "partial_rotary_factor": 0.25,
126 "rope_theta": 1000000.0,
127 "rope_type": "proportional"
128 },
129 "sliding_attention": {
130 "rope_theta": 10000.0,
131 "rope_type": "default"
132 }
133 },
134 "sliding_window": 512,
135 "tie_word_embeddings": true,
136 "top_k_experts": null,
137 "use_bidirectional_attention": null,
138 "use_cache": true,
139 "use_double_wide_mlp": true,
140 "vocab_size": 262144,
141 "vocab_size_per_layer_input": 262144
142 },
143 "tie_word_embeddings": true,
144 "transformers_version": "5.5.0.dev0",
145 "video_token_id": 258884,
146 "vision_config": {
147 "_name_or_path": "",
148 "architectures": null,
149 "attention_bias": false,
150 "attention_dropout": 0.0,
151 "chunk_size_feed_forward": 0,
152 "default_output_length": 280,
153 "dtype": "bfloat16",
154 "global_head_dim": 64,
155 "head_dim": 64,
156 "hidden_activation": "gelu_pytorch_tanh",
157 "hidden_size": 768,
158 "id2label": {
159 "0": "LABEL_0",
160 "1": "LABEL_1"
161 },
162 "initializer_range": 0.02,
163 "intermediate_size": 3072,
164 "is_encoder_decoder": false,
165 "label2id": {
166 "LABEL_0": 0,
167 "LABEL_1": 1
168 },
169 "max_position_embeddings": 131072,
170 "model_type": "gemma4_vision",
171 "num_attention_heads": 12,
172 "num_hidden_layers": 16,
173 "num_key_value_heads": 12,
174 "output_attentions": false,
175 "output_hidden_states": false,
176 "patch_size": 16,
177 "pooling_kernel_size": 3,
178 "position_embedding_size": 10240,
179 "problem_type": null,
180 "return_dict": true,
181 "rms_norm_eps": 1e-06,
182 "rope_parameters": {
183 "rope_theta": 100.0,
184 "rope_type": "default"
185 },
186 "standardize": false,
187 "use_clipped_linears": true
188 },
189 "vision_soft_tokens_per_image": 280
190 }
191