text_encoder/config.json
2.9 KB · 115 lines · json Raw
1 {
2 "architectures": [
3 "Gemma3ForConditionalGeneration"
4 ],
5 "boi_token_index": 255999,
6 "dtype": "float32",
7 "eoi_token_index": 256000,
8 "eos_token_id": [
9 1,
10 106
11 ],
12 "image_token_index": 262144,
13 "initializer_range": 0.02,
14 "mm_tokens_per_image": 256,
15 "model_type": "gemma3",
16 "text_config": {
17 "_sliding_window_pattern": 6,
18 "attention_bias": false,
19 "attention_dropout": 0.0,
20 "attn_logit_softcapping": null,
21 "cache_implementation": "hybrid",
22 "dtype": "float32",
23 "final_logit_softcapping": null,
24 "head_dim": 256,
25 "hidden_activation": "gelu_pytorch_tanh",
26 "hidden_size": 3840,
27 "initializer_range": 0.02,
28 "intermediate_size": 15360,
29 "layer_types": [
30 "sliding_attention",
31 "sliding_attention",
32 "sliding_attention",
33 "sliding_attention",
34 "sliding_attention",
35 "full_attention",
36 "sliding_attention",
37 "sliding_attention",
38 "sliding_attention",
39 "sliding_attention",
40 "sliding_attention",
41 "full_attention",
42 "sliding_attention",
43 "sliding_attention",
44 "sliding_attention",
45 "sliding_attention",
46 "sliding_attention",
47 "full_attention",
48 "sliding_attention",
49 "sliding_attention",
50 "sliding_attention",
51 "sliding_attention",
52 "sliding_attention",
53 "full_attention",
54 "sliding_attention",
55 "sliding_attention",
56 "sliding_attention",
57 "sliding_attention",
58 "sliding_attention",
59 "full_attention",
60 "sliding_attention",
61 "sliding_attention",
62 "sliding_attention",
63 "sliding_attention",
64 "sliding_attention",
65 "full_attention",
66 "sliding_attention",
67 "sliding_attention",
68 "sliding_attention",
69 "sliding_attention",
70 "sliding_attention",
71 "full_attention",
72 "sliding_attention",
73 "sliding_attention",
74 "sliding_attention",
75 "sliding_attention",
76 "sliding_attention",
77 "full_attention"
78 ],
79 "max_position_embeddings": 131072,
80 "model_type": "gemma3_text",
81 "num_attention_heads": 16,
82 "num_hidden_layers": 48,
83 "num_key_value_heads": 8,
84 "query_pre_attn_scalar": 256,
85 "rms_norm_eps": 1e-06,
86 "rope_local_base_freq": 10000,
87 "rope_scaling": {
88 "factor": 8.0,
89 "rope_type": "linear"
90 },
91 "rope_theta": 1000000,
92 "sliding_window": 1024,
93 "sliding_window_pattern": 6,
94 "use_bidirectional_attention": false,
95 "use_cache": true,
96 "vocab_size": 262208
97 },
98 "transformers_version": "4.57.3",
99 "vision_config": {
100 "attention_dropout": 0.0,
101 "dtype": "float32",
102 "hidden_act": "gelu_pytorch_tanh",
103 "hidden_size": 1152,
104 "image_size": 896,
105 "intermediate_size": 4304,
106 "layer_norm_eps": 1e-06,
107 "model_type": "siglip_vision_model",
108 "num_attention_heads": 16,
109 "num_channels": 3,
110 "num_hidden_layers": 27,
111 "patch_size": 14,
112 "vision_use_head": false
113 }
114 }
115