config.json · gemma-4-26B-A4B-it-GGUF

config.json

3.8 KB · 145 lines · json Raw

1	`{`
2	`"architectures": [`
3	`"Gemma4ForConditionalGeneration"`
4	`],`
5	`"audio_config": null,`
6	`"audio_token_id": 258881,`
7	`"boa_token_id": 256000,`
8	`"boi_token_id": 255999,`
9	`"torch_dtype": "bfloat16",`
10	`"eoa_token_id": 258883,`
11	`"eoa_token_index": 258883,`
12	`"eoi_token_id": 258882,`
13	`"eos_token_id": 106,`
14	`"image_token_id": 258880,`
15	`"initializer_range": 0.02,`
16	`"model_type": "gemma4",`
17	`"pad_token_id": 0,`
18	`"text_config": {`
19	`"attention_bias": false,`
20	`"attention_dropout": 0.0,`
21	`"attention_k_eq_v": true,`
22	`"bos_token_id": 2,`
23	`"torch_dtype": "bfloat16",`
24	`"enable_moe_block": true,`
25	`"eos_token_id": 1,`
26	`"final_logit_softcapping": 30.0,`
27	`"global_head_dim": 512,`
28	`"head_dim": 256,`
29	`"hidden_activation": "gelu_pytorch_tanh",`
30	`"hidden_size": 2816,`
31	`"hidden_size_per_layer_input": 0,`
32	`"initializer_range": 0.02,`
33	`"intermediate_size": 2112,`
34	`"layer_types": [`
35	`"sliding_attention",`
36	`"sliding_attention",`
37	`"sliding_attention",`
38	`"sliding_attention",`
39	`"sliding_attention",`
40	`"full_attention",`
41	`"sliding_attention",`
42	`"sliding_attention",`
43	`"sliding_attention",`
44	`"sliding_attention",`
45	`"sliding_attention",`
46	`"full_attention",`
47	`"sliding_attention",`
48	`"sliding_attention",`
49	`"sliding_attention",`
50	`"sliding_attention",`
51	`"sliding_attention",`
52	`"full_attention",`
53	`"sliding_attention",`
54	`"sliding_attention",`
55	`"sliding_attention",`
56	`"sliding_attention",`
57	`"sliding_attention",`
58	`"full_attention",`
59	`"sliding_attention",`
60	`"sliding_attention",`
61	`"sliding_attention",`
62	`"sliding_attention",`
63	`"sliding_attention",`
64	`"full_attention"`
65	`],`
66	`"max_position_embeddings": 262144,`
67	`"model_type": "gemma4_text",`
68	`"moe_intermediate_size": 704,`
69	`"num_attention_heads": 16,`
70	`"num_experts": 128,`
71	`"num_global_key_value_heads": 2,`
72	`"num_hidden_layers": 30,`
73	`"num_key_value_heads": 8,`
74	`"num_kv_shared_layers": 0,`
75	`"pad_token_id": 0,`
76	`"rms_norm_eps": 1e-06,`
77	`"rope_parameters": {`
78	`"full_attention": {`
79	`"partial_rotary_factor": 0.25,`
80	`"rope_theta": 1000000.0,`
81	`"rope_type": "proportional"`
82	`},`
83	`"sliding_attention": {`
84	`"rope_theta": 10000.0,`
85	`"rope_type": "default"`
86	`}`
87	`},`
88	`"sliding_window": 1024,`
89	`"tie_word_embeddings": true,`
90	`"top_k_experts": 8,`
91	`"use_bidirectional_attention": "vision",`
92	`"use_cache": true,`
93	`"use_double_wide_mlp": false,`
94	`"vocab_size": 262144,`
95	`"vocab_size_per_layer_input": 262144`
96	`},`
97	`"tie_word_embeddings": true,`
98	`"transformers_version": "5.5.0.dev0",`
99	`"unsloth_fixed": true,`
100	`"video_token_id": 258884,`
101	`"vision_config": {`
102	`"_name_or_path": "",`
103	`"architectures": null,`
104	`"attention_bias": false,`
105	`"attention_dropout": 0.0,`
106	`"chunk_size_feed_forward": 0,`
107	`"default_output_length": 280,`
108	`"torch_dtype": "bfloat16",`
109	`"global_head_dim": 72,`
110	`"head_dim": 72,`
111	`"hidden_activation": "gelu_pytorch_tanh",`
112	`"hidden_size": 1152,`
113	`"id2label": {`
114	`"0": "LABEL_0",`
115	`"1": "LABEL_1"`
116	`},`
117	`"initializer_range": 0.02,`
118	`"intermediate_size": 4304,`
119	`"is_encoder_decoder": false,`
120	`"label2id": {`
121	`"LABEL_0": 0,`
122	`"LABEL_1": 1`
123	`},`
124	`"max_position_embeddings": 131072,`
125	`"model_type": "gemma4_vision",`
126	`"num_attention_heads": 16,`
127	`"num_hidden_layers": 27,`
128	`"num_key_value_heads": 16,`
129	`"output_attentions": false,`
130	`"output_hidden_states": false,`
131	`"patch_size": 16,`
132	`"pooling_kernel_size": 3,`
133	`"position_embedding_size": 10240,`
134	`"problem_type": null,`
135	`"return_dict": true,`
136	`"rms_norm_eps": 1e-06,`
137	`"rope_parameters": {`
138	`"rope_theta": 100.0,`
139	`"rope_type": "default"`
140	`},`
141	`"standardize": true,`
142	`"use_clipped_linears": false`
143	`},`
144	`"vision_soft_tokens_per_image": 280`
145	`}`