config.json
4.2 KB · 140 lines · json Raw
1 {
2 "architectures": [
3 "Qwen3_5ForConditionalGeneration"
4 ],
5 "image_token_id": 248056,
6 "language_model_only": false,
7 "model_type": "qwen3_5",
8 "text_config": {
9 "attention_bias": false,
10 "attention_dropout": 0.0,
11 "attn_output_gate": true,
12 "bos_token_id": 248044,
13 "dtype": "bfloat16",
14 "eos_token_id": 248044,
15 "full_attention_interval": 4,
16 "head_dim": 256,
17 "hidden_act": "silu",
18 "hidden_size": 5120,
19 "initializer_range": 0.02,
20 "intermediate_size": 17408,
21 "layer_types": [
22 "linear_attention",
23 "linear_attention",
24 "linear_attention",
25 "full_attention",
26 "linear_attention",
27 "linear_attention",
28 "linear_attention",
29 "full_attention",
30 "linear_attention",
31 "linear_attention",
32 "linear_attention",
33 "full_attention",
34 "linear_attention",
35 "linear_attention",
36 "linear_attention",
37 "full_attention",
38 "linear_attention",
39 "linear_attention",
40 "linear_attention",
41 "full_attention",
42 "linear_attention",
43 "linear_attention",
44 "linear_attention",
45 "full_attention",
46 "linear_attention",
47 "linear_attention",
48 "linear_attention",
49 "full_attention",
50 "linear_attention",
51 "linear_attention",
52 "linear_attention",
53 "full_attention",
54 "linear_attention",
55 "linear_attention",
56 "linear_attention",
57 "full_attention",
58 "linear_attention",
59 "linear_attention",
60 "linear_attention",
61 "full_attention",
62 "linear_attention",
63 "linear_attention",
64 "linear_attention",
65 "full_attention",
66 "linear_attention",
67 "linear_attention",
68 "linear_attention",
69 "full_attention",
70 "linear_attention",
71 "linear_attention",
72 "linear_attention",
73 "full_attention",
74 "linear_attention",
75 "linear_attention",
76 "linear_attention",
77 "full_attention",
78 "linear_attention",
79 "linear_attention",
80 "linear_attention",
81 "full_attention",
82 "linear_attention",
83 "linear_attention",
84 "linear_attention",
85 "full_attention"
86 ],
87 "linear_conv_kernel_dim": 4,
88 "linear_key_head_dim": 128,
89 "linear_num_key_heads": 16,
90 "linear_num_value_heads": 48,
91 "linear_value_head_dim": 128,
92 "mamba_ssm_dtype": "float32",
93 "max_position_embeddings": 262144,
94 "model_type": "qwen3_5_text",
95 "mtp_num_hidden_layers": 1,
96 "mtp_use_dedicated_embeddings": false,
97 "num_attention_heads": 24,
98 "num_hidden_layers": 64,
99 "num_key_value_heads": 4,
100 "output_gate_type": "swish",
101 "pad_token_id": null,
102 "partial_rotary_factor": 0.25,
103 "rms_norm_eps": 1e-06,
104 "rope_parameters": {
105 "mrope_interleaved": true,
106 "mrope_section": [
107 11,
108 11,
109 10
110 ],
111 "partial_rotary_factor": 0.25,
112 "rope_theta": 10000000,
113 "rope_type": "default"
114 },
115 "tie_word_embeddings": false,
116 "use_cache": true,
117 "vocab_size": 248320
118 },
119 "tie_word_embeddings": false,
120 "transformers_version": "4.57.1",
121 "video_token_id": 248057,
122 "vision_config": {
123 "deepstack_visual_indexes": [],
124 "depth": 27,
125 "hidden_act": "gelu_pytorch_tanh",
126 "hidden_size": 1152,
127 "in_channels": 3,
128 "initializer_range": 0.02,
129 "intermediate_size": 4304,
130 "model_type": "qwen3_5",
131 "num_heads": 16,
132 "num_position_embeddings": 2304,
133 "out_hidden_size": 5120,
134 "patch_size": 16,
135 "spatial_merge_size": 2,
136 "temporal_patch_size": 2
137 },
138 "vision_end_token_id": 248054,
139 "vision_start_token_id": 248053
140 }