config.json
3.7 KB · 147 lines · json Raw
1 {
2 "architectures": [
3 "Gemma4ForConditionalGeneration"
4 ],
5 "audio_config": null,
6 "audio_token_id": 258881,
7 "boa_token_id": 256000,
8 "boi_token_id": 255999,
9 "dtype": "bfloat16",
10 "eoa_token_id": 258883,
11 "eoa_token_index": 258883,
12 "eoi_token_id": 258882,
13 "eos_token_id": [
14 1,
15 106
16 ],
17 "image_token_id": 258880,
18 "initializer_range": 0.02,
19 "model_type": "gemma4",
20 "text_config": {
21 "attention_bias": false,
22 "attention_dropout": 0.0,
23 "attention_k_eq_v": true,
24 "bos_token_id": 2,
25 "dtype": "bfloat16",
26 "enable_moe_block": true,
27 "eos_token_id": 1,
28 "final_logit_softcapping": 30.0,
29 "global_head_dim": 512,
30 "head_dim": 256,
31 "hidden_activation": "gelu_pytorch_tanh",
32 "hidden_size": 2816,
33 "hidden_size_per_layer_input": 0,
34 "initializer_range": 0.02,
35 "intermediate_size": 2112,
36 "layer_types": [
37 "sliding_attention",
38 "sliding_attention",
39 "sliding_attention",
40 "sliding_attention",
41 "sliding_attention",
42 "full_attention",
43 "sliding_attention",
44 "sliding_attention",
45 "sliding_attention",
46 "sliding_attention",
47 "sliding_attention",
48 "full_attention",
49 "sliding_attention",
50 "sliding_attention",
51 "sliding_attention",
52 "sliding_attention",
53 "sliding_attention",
54 "full_attention",
55 "sliding_attention",
56 "sliding_attention",
57 "sliding_attention",
58 "sliding_attention",
59 "sliding_attention",
60 "full_attention",
61 "sliding_attention",
62 "sliding_attention",
63 "sliding_attention",
64 "sliding_attention",
65 "sliding_attention",
66 "full_attention"
67 ],
68 "max_position_embeddings": 262144,
69 "model_type": "gemma4_text",
70 "moe_intermediate_size": 704,
71 "num_attention_heads": 16,
72 "num_experts": 128,
73 "num_global_key_value_heads": 2,
74 "num_hidden_layers": 30,
75 "num_key_value_heads": 8,
76 "num_kv_shared_layers": 0,
77 "pad_token_id": 0,
78 "rms_norm_eps": 1e-06,
79 "rope_parameters": {
80 "full_attention": {
81 "partial_rotary_factor": 0.25,
82 "rope_theta": 1000000.0,
83 "rope_type": "proportional"
84 },
85 "sliding_attention": {
86 "rope_theta": 10000.0,
87 "rope_type": "default"
88 }
89 },
90 "sliding_window": 1024,
91 "tie_word_embeddings": true,
92 "top_k_experts": 8,
93 "use_bidirectional_attention": "vision",
94 "use_cache": true,
95 "use_double_wide_mlp": false,
96 "vocab_size": 262144,
97 "vocab_size_per_layer_input": 262144
98 },
99 "tie_word_embeddings": true,
100 "transformers_version": "5.5.0.dev0",
101 "video_token_id": 258884,
102 "vision_config": {
103 "_name_or_path": "",
104 "architectures": null,
105 "attention_bias": false,
106 "attention_dropout": 0.0,
107 "chunk_size_feed_forward": 0,
108 "default_output_length": 280,
109 "dtype": "bfloat16",
110 "global_head_dim": 72,
111 "head_dim": 72,
112 "hidden_activation": "gelu_pytorch_tanh",
113 "hidden_size": 1152,
114 "id2label": {
115 "0": "LABEL_0",
116 "1": "LABEL_1"
117 },
118 "initializer_range": 0.02,
119 "intermediate_size": 4304,
120 "is_encoder_decoder": false,
121 "label2id": {
122 "LABEL_0": 0,
123 "LABEL_1": 1
124 },
125 "max_position_embeddings": 131072,
126 "model_type": "gemma4_vision",
127 "num_attention_heads": 16,
128 "num_hidden_layers": 27,
129 "num_key_value_heads": 16,
130 "output_attentions": false,
131 "output_hidden_states": false,
132 "patch_size": 16,
133 "pooling_kernel_size": 3,
134 "position_embedding_size": 10240,
135 "problem_type": null,
136 "return_dict": true,
137 "rms_norm_eps": 1e-06,
138 "rope_parameters": {
139 "rope_theta": 100.0,
140 "rope_type": "default"
141 },
142 "standardize": true,
143 "use_clipped_linears": false
144 },
145 "vision_soft_tokens_per_image": 280
146 }
147