text_encoder/config.json
3.1 KB · 136 lines · json Raw
1 {
2 "architectures": [
3 "Qwen2_5_VLForConditionalGeneration"
4 ],
5 "attention_dropout": 0.0,
6 "bos_token_id": 151643,
7 "eos_token_id": 151645,
8 "hidden_act": "silu",
9 "hidden_size": 3584,
10 "image_token_id": 151655,
11 "initializer_range": 0.02,
12 "intermediate_size": 18944,
13 "max_position_embeddings": 128000,
14 "max_window_layers": 28,
15 "model_type": "qwen2_5_vl",
16 "num_attention_heads": 28,
17 "num_hidden_layers": 28,
18 "num_key_value_heads": 4,
19 "rms_norm_eps": 1e-06,
20 "rope_scaling": {
21 "mrope_section": [
22 16,
23 24,
24 24
25 ],
26 "rope_type": "default",
27 "type": "default"
28 },
29 "rope_theta": 1000000.0,
30 "sliding_window": 32768,
31 "text_config": {
32 "architectures": [
33 "Qwen2_5_VLForConditionalGeneration"
34 ],
35 "attention_dropout": 0.0,
36 "bos_token_id": 151643,
37 "eos_token_id": 151645,
38 "hidden_act": "silu",
39 "hidden_size": 3584,
40 "image_token_id": null,
41 "initializer_range": 0.02,
42 "intermediate_size": 18944,
43 "layer_types": [
44 "full_attention",
45 "full_attention",
46 "full_attention",
47 "full_attention",
48 "full_attention",
49 "full_attention",
50 "full_attention",
51 "full_attention",
52 "full_attention",
53 "full_attention",
54 "full_attention",
55 "full_attention",
56 "full_attention",
57 "full_attention",
58 "full_attention",
59 "full_attention",
60 "full_attention",
61 "full_attention",
62 "full_attention",
63 "full_attention",
64 "full_attention",
65 "full_attention",
66 "full_attention",
67 "full_attention",
68 "full_attention",
69 "full_attention",
70 "full_attention",
71 "full_attention"
72 ],
73 "max_position_embeddings": 128000,
74 "max_window_layers": 28,
75 "model_type": "qwen2_5_vl_text",
76 "num_attention_heads": 28,
77 "num_hidden_layers": 28,
78 "num_key_value_heads": 4,
79 "rms_norm_eps": 1e-06,
80 "rope_scaling": {
81 "mrope_section": [
82 16,
83 24,
84 24
85 ],
86 "rope_type": "default",
87 "type": "default"
88 },
89 "rope_theta": 1000000.0,
90 "sliding_window": null,
91 "torch_dtype": "float32",
92 "use_cache": true,
93 "use_sliding_window": false,
94 "video_token_id": null,
95 "vision_end_token_id": 151653,
96 "vision_start_token_id": 151652,
97 "vision_token_id": 151654,
98 "vocab_size": 152064
99 },
100 "tie_word_embeddings": false,
101 "torch_dtype": "bfloat16",
102 "transformers_version": "4.53.1",
103 "use_cache": true,
104 "use_sliding_window": false,
105 "video_token_id": 151656,
106 "vision_config": {
107 "depth": 32,
108 "fullatt_block_indexes": [
109 7,
110 15,
111 23,
112 31
113 ],
114 "hidden_act": "silu",
115 "hidden_size": 1280,
116 "in_channels": 3,
117 "in_chans": 3,
118 "initializer_range": 0.02,
119 "intermediate_size": 3420,
120 "model_type": "qwen2_5_vl",
121 "num_heads": 16,
122 "out_hidden_size": 3584,
123 "patch_size": 14,
124 "spatial_merge_size": 2,
125 "spatial_patch_size": 14,
126 "temporal_patch_size": 2,
127 "tokens_per_second": 2,
128 "torch_dtype": "float32",
129 "window_size": 112
130 },
131 "vision_end_token_id": 151653,
132 "vision_start_token_id": 151652,
133 "vision_token_id": 151654,
134 "vocab_size": 152064
135 }
136