config.json
3.8 KB · 145 lines · json Raw
1 {
2 "architectures": [
3 "Gemma4ForConditionalGeneration"
4 ],
5 "audio_config": null,
6 "audio_token_id": 258881,
7 "boa_token_id": 256000,
8 "boi_token_id": 255999,
9 "torch_dtype": "bfloat16",
10 "eoa_token_id": 258883,
11 "eoa_token_index": 258883,
12 "eoi_token_id": 258882,
13 "eos_token_id": 106,
14 "image_token_id": 258880,
15 "initializer_range": 0.02,
16 "model_type": "gemma4",
17 "pad_token_id": 0,
18 "text_config": {
19 "attention_bias": false,
20 "attention_dropout": 0.0,
21 "attention_k_eq_v": true,
22 "bos_token_id": 2,
23 "torch_dtype": "bfloat16",
24 "enable_moe_block": true,
25 "eos_token_id": 1,
26 "final_logit_softcapping": 30.0,
27 "global_head_dim": 512,
28 "head_dim": 256,
29 "hidden_activation": "gelu_pytorch_tanh",
30 "hidden_size": 2816,
31 "hidden_size_per_layer_input": 0,
32 "initializer_range": 0.02,
33 "intermediate_size": 2112,
34 "layer_types": [
35 "sliding_attention",
36 "sliding_attention",
37 "sliding_attention",
38 "sliding_attention",
39 "sliding_attention",
40 "full_attention",
41 "sliding_attention",
42 "sliding_attention",
43 "sliding_attention",
44 "sliding_attention",
45 "sliding_attention",
46 "full_attention",
47 "sliding_attention",
48 "sliding_attention",
49 "sliding_attention",
50 "sliding_attention",
51 "sliding_attention",
52 "full_attention",
53 "sliding_attention",
54 "sliding_attention",
55 "sliding_attention",
56 "sliding_attention",
57 "sliding_attention",
58 "full_attention",
59 "sliding_attention",
60 "sliding_attention",
61 "sliding_attention",
62 "sliding_attention",
63 "sliding_attention",
64 "full_attention"
65 ],
66 "max_position_embeddings": 262144,
67 "model_type": "gemma4_text",
68 "moe_intermediate_size": 704,
69 "num_attention_heads": 16,
70 "num_experts": 128,
71 "num_global_key_value_heads": 2,
72 "num_hidden_layers": 30,
73 "num_key_value_heads": 8,
74 "num_kv_shared_layers": 0,
75 "pad_token_id": 0,
76 "rms_norm_eps": 1e-06,
77 "rope_parameters": {
78 "full_attention": {
79 "partial_rotary_factor": 0.25,
80 "rope_theta": 1000000.0,
81 "rope_type": "proportional"
82 },
83 "sliding_attention": {
84 "rope_theta": 10000.0,
85 "rope_type": "default"
86 }
87 },
88 "sliding_window": 1024,
89 "tie_word_embeddings": true,
90 "top_k_experts": 8,
91 "use_bidirectional_attention": "vision",
92 "use_cache": true,
93 "use_double_wide_mlp": false,
94 "vocab_size": 262144,
95 "vocab_size_per_layer_input": 262144
96 },
97 "tie_word_embeddings": true,
98 "transformers_version": "5.5.0.dev0",
99 "unsloth_fixed": true,
100 "video_token_id": 258884,
101 "vision_config": {
102 "_name_or_path": "",
103 "architectures": null,
104 "attention_bias": false,
105 "attention_dropout": 0.0,
106 "chunk_size_feed_forward": 0,
107 "default_output_length": 280,
108 "torch_dtype": "bfloat16",
109 "global_head_dim": 72,
110 "head_dim": 72,
111 "hidden_activation": "gelu_pytorch_tanh",
112 "hidden_size": 1152,
113 "id2label": {
114 "0": "LABEL_0",
115 "1": "LABEL_1"
116 },
117 "initializer_range": 0.02,
118 "intermediate_size": 4304,
119 "is_encoder_decoder": false,
120 "label2id": {
121 "LABEL_0": 0,
122 "LABEL_1": 1
123 },
124 "max_position_embeddings": 131072,
125 "model_type": "gemma4_vision",
126 "num_attention_heads": 16,
127 "num_hidden_layers": 27,
128 "num_key_value_heads": 16,
129 "output_attentions": false,
130 "output_hidden_states": false,
131 "patch_size": 16,
132 "pooling_kernel_size": 3,
133 "position_embedding_size": 10240,
134 "problem_type": null,
135 "return_dict": true,
136 "rms_norm_eps": 1e-06,
137 "rope_parameters": {
138 "rope_theta": 100.0,
139 "rope_type": "default"
140 },
141 "standardize": true,
142 "use_clipped_linears": false
143 },
144 "vision_soft_tokens_per_image": 280
145 }