original/model.safetensors.index.json
36.9 KB · 550 lines · json Raw
1 {
2 "metadata": {
3 "total_size": 65248815744
4 },
5 "weight_map": {
6 "block.0.attn.norm.scale": "model--00001-of-00007.safetensors",
7 "block.0.attn.out.bias": "model--00001-of-00007.safetensors",
8 "block.0.attn.out.weight": "model--00001-of-00007.safetensors",
9 "block.0.attn.qkv.bias": "model--00001-of-00007.safetensors",
10 "block.0.attn.qkv.weight": "model--00001-of-00007.safetensors",
11 "block.0.attn.sinks": "model--00001-of-00007.safetensors",
12 "block.0.mlp.gate.bias": "model--00001-of-00007.safetensors",
13 "block.0.mlp.gate.weight": "model--00001-of-00007.safetensors",
14 "block.0.mlp.mlp1_bias": "model--00001-of-00007.safetensors",
15 "block.0.mlp.mlp1_weight.blocks": "model--00001-of-00007.safetensors",
16 "block.0.mlp.mlp1_weight.scales": "model--00001-of-00007.safetensors",
17 "block.0.mlp.mlp2_bias": "model--00001-of-00007.safetensors",
18 "block.0.mlp.mlp2_weight.blocks": "model--00001-of-00007.safetensors",
19 "block.0.mlp.mlp2_weight.scales": "model--00001-of-00007.safetensors",
20 "block.0.mlp.norm.scale": "model--00001-of-00007.safetensors",
21 "block.1.attn.norm.scale": "model--00001-of-00007.safetensors",
22 "block.1.attn.out.bias": "model--00001-of-00007.safetensors",
23 "block.1.attn.out.weight": "model--00001-of-00007.safetensors",
24 "block.1.attn.qkv.bias": "model--00001-of-00007.safetensors",
25 "block.1.attn.qkv.weight": "model--00001-of-00007.safetensors",
26 "block.1.attn.sinks": "model--00001-of-00007.safetensors",
27 "block.1.mlp.gate.bias": "model--00001-of-00007.safetensors",
28 "block.1.mlp.gate.weight": "model--00001-of-00007.safetensors",
29 "block.1.mlp.mlp1_bias": "model--00001-of-00007.safetensors",
30 "block.1.mlp.mlp1_weight.blocks": "model--00001-of-00007.safetensors",
31 "block.1.mlp.mlp1_weight.scales": "model--00001-of-00007.safetensors",
32 "block.1.mlp.mlp2_bias": "model--00001-of-00007.safetensors",
33 "block.1.mlp.mlp2_weight.blocks": "model--00001-of-00007.safetensors",
34 "block.1.mlp.mlp2_weight.scales": "model--00001-of-00007.safetensors",
35 "block.1.mlp.norm.scale": "model--00001-of-00007.safetensors",
36 "block.10.attn.norm.scale": "model--00001-of-00007.safetensors",
37 "block.10.attn.out.bias": "model--00001-of-00007.safetensors",
38 "block.10.attn.out.weight": "model--00001-of-00007.safetensors",
39 "block.10.attn.qkv.bias": "model--00001-of-00007.safetensors",
40 "block.10.attn.qkv.weight": "model--00001-of-00007.safetensors",
41 "block.10.attn.sinks": "model--00001-of-00007.safetensors",
42 "block.10.mlp.gate.bias": "model--00001-of-00007.safetensors",
43 "block.10.mlp.gate.weight": "model--00001-of-00007.safetensors",
44 "block.10.mlp.mlp1_bias": "model--00001-of-00007.safetensors",
45 "block.10.mlp.mlp1_weight.blocks": "model--00001-of-00007.safetensors",
46 "block.10.mlp.mlp1_weight.scales": "model--00001-of-00007.safetensors",
47 "block.10.mlp.mlp2_bias": "model--00001-of-00007.safetensors",
48 "block.10.mlp.mlp2_weight.blocks": "model--00001-of-00007.safetensors",
49 "block.10.mlp.mlp2_weight.scales": "model--00001-of-00007.safetensors",
50 "block.10.mlp.norm.scale": "model--00001-of-00007.safetensors",
51 "block.11.attn.norm.scale": "model--00001-of-00007.safetensors",
52 "block.11.attn.out.bias": "model--00001-of-00007.safetensors",
53 "block.11.attn.out.weight": "model--00001-of-00007.safetensors",
54 "block.11.attn.qkv.bias": "model--00001-of-00007.safetensors",
55 "block.11.attn.qkv.weight": "model--00001-of-00007.safetensors",
56 "block.11.attn.sinks": "model--00001-of-00007.safetensors",
57 "block.11.mlp.gate.bias": "model--00001-of-00007.safetensors",
58 "block.11.mlp.gate.weight": "model--00001-of-00007.safetensors",
59 "block.11.mlp.mlp1_bias": "model--00001-of-00007.safetensors",
60 "block.11.mlp.mlp1_weight.blocks": "model--00001-of-00007.safetensors",
61 "block.11.mlp.mlp1_weight.scales": "model--00001-of-00007.safetensors",
62 "block.11.mlp.mlp2_bias": "model--00001-of-00007.safetensors",
63 "block.11.mlp.mlp2_weight.blocks": "model--00001-of-00007.safetensors",
64 "block.11.mlp.mlp2_weight.scales": "model--00001-of-00007.safetensors",
65 "block.11.mlp.norm.scale": "model--00001-of-00007.safetensors",
66 "block.12.attn.norm.scale": "model--00001-of-00007.safetensors",
67 "block.12.attn.out.bias": "model--00001-of-00007.safetensors",
68 "block.12.attn.out.weight": "model--00001-of-00007.safetensors",
69 "block.12.attn.qkv.bias": "model--00001-of-00007.safetensors",
70 "block.12.attn.qkv.weight": "model--00001-of-00007.safetensors",
71 "block.12.attn.sinks": "model--00001-of-00007.safetensors",
72 "block.12.mlp.gate.bias": "model--00001-of-00007.safetensors",
73 "block.12.mlp.gate.weight": "model--00001-of-00007.safetensors",
74 "block.12.mlp.mlp1_bias": "model--00001-of-00007.safetensors",
75 "block.12.mlp.mlp1_weight.blocks": "model--00001-of-00007.safetensors",
76 "block.12.mlp.mlp1_weight.scales": "model--00001-of-00007.safetensors",
77 "block.12.mlp.mlp2_bias": "model--00001-of-00007.safetensors",
78 "block.12.mlp.mlp2_weight.blocks": "model--00001-of-00007.safetensors",
79 "block.12.mlp.mlp2_weight.scales": "model--00001-of-00007.safetensors",
80 "block.12.mlp.norm.scale": "model--00001-of-00007.safetensors",
81 "block.13.attn.norm.scale": "model--00001-of-00007.safetensors",
82 "block.13.attn.out.bias": "model--00001-of-00007.safetensors",
83 "block.13.attn.out.weight": "model--00001-of-00007.safetensors",
84 "block.13.attn.qkv.bias": "model--00001-of-00007.safetensors",
85 "block.13.attn.qkv.weight": "model--00001-of-00007.safetensors",
86 "block.13.attn.sinks": "model--00001-of-00007.safetensors",
87 "block.13.mlp.gate.bias": "model--00001-of-00007.safetensors",
88 "block.13.mlp.gate.weight": "model--00001-of-00007.safetensors",
89 "block.13.mlp.mlp1_bias": "model--00001-of-00007.safetensors",
90 "block.13.mlp.mlp1_weight.blocks": "model--00001-of-00007.safetensors",
91 "block.13.mlp.mlp1_weight.scales": "model--00001-of-00007.safetensors",
92 "block.13.mlp.mlp2_bias": "model--00001-of-00007.safetensors",
93 "block.13.mlp.mlp2_weight.blocks": "model--00001-of-00007.safetensors",
94 "block.13.mlp.mlp2_weight.scales": "model--00001-of-00007.safetensors",
95 "block.13.mlp.norm.scale": "model--00001-of-00007.safetensors",
96 "block.14.attn.norm.scale": "model--00001-of-00007.safetensors",
97 "block.14.attn.out.bias": "model--00001-of-00007.safetensors",
98 "block.14.attn.out.weight": "model--00001-of-00007.safetensors",
99 "block.14.attn.qkv.bias": "model--00001-of-00007.safetensors",
100 "block.14.attn.qkv.weight": "model--00001-of-00007.safetensors",
101 "block.14.attn.sinks": "model--00001-of-00007.safetensors",
102 "block.14.mlp.gate.bias": "model--00001-of-00007.safetensors",
103 "block.14.mlp.gate.weight": "model--00001-of-00007.safetensors",
104 "block.14.mlp.mlp1_bias": "model--00001-of-00007.safetensors",
105 "block.14.mlp.mlp1_weight.blocks": "model--00002-of-00007.safetensors",
106 "block.14.mlp.mlp1_weight.scales": "model--00002-of-00007.safetensors",
107 "block.14.mlp.mlp2_bias": "model--00002-of-00007.safetensors",
108 "block.14.mlp.mlp2_weight.blocks": "model--00002-of-00007.safetensors",
109 "block.14.mlp.mlp2_weight.scales": "model--00002-of-00007.safetensors",
110 "block.14.mlp.norm.scale": "model--00002-of-00007.safetensors",
111 "block.15.attn.norm.scale": "model--00002-of-00007.safetensors",
112 "block.15.attn.out.bias": "model--00002-of-00007.safetensors",
113 "block.15.attn.out.weight": "model--00002-of-00007.safetensors",
114 "block.15.attn.qkv.bias": "model--00002-of-00007.safetensors",
115 "block.15.attn.qkv.weight": "model--00002-of-00007.safetensors",
116 "block.15.attn.sinks": "model--00002-of-00007.safetensors",
117 "block.15.mlp.gate.bias": "model--00002-of-00007.safetensors",
118 "block.15.mlp.gate.weight": "model--00002-of-00007.safetensors",
119 "block.15.mlp.mlp1_bias": "model--00002-of-00007.safetensors",
120 "block.15.mlp.mlp1_weight.blocks": "model--00002-of-00007.safetensors",
121 "block.15.mlp.mlp1_weight.scales": "model--00002-of-00007.safetensors",
122 "block.15.mlp.mlp2_bias": "model--00002-of-00007.safetensors",
123 "block.15.mlp.mlp2_weight.blocks": "model--00002-of-00007.safetensors",
124 "block.15.mlp.mlp2_weight.scales": "model--00002-of-00007.safetensors",
125 "block.15.mlp.norm.scale": "model--00002-of-00007.safetensors",
126 "block.16.attn.norm.scale": "model--00002-of-00007.safetensors",
127 "block.16.attn.out.bias": "model--00002-of-00007.safetensors",
128 "block.16.attn.out.weight": "model--00002-of-00007.safetensors",
129 "block.16.attn.qkv.bias": "model--00002-of-00007.safetensors",
130 "block.16.attn.qkv.weight": "model--00002-of-00007.safetensors",
131 "block.16.attn.sinks": "model--00002-of-00007.safetensors",
132 "block.16.mlp.gate.bias": "model--00002-of-00007.safetensors",
133 "block.16.mlp.gate.weight": "model--00002-of-00007.safetensors",
134 "block.16.mlp.mlp1_bias": "model--00002-of-00007.safetensors",
135 "block.16.mlp.mlp1_weight.blocks": "model--00002-of-00007.safetensors",
136 "block.16.mlp.mlp1_weight.scales": "model--00002-of-00007.safetensors",
137 "block.16.mlp.mlp2_bias": "model--00002-of-00007.safetensors",
138 "block.16.mlp.mlp2_weight.blocks": "model--00002-of-00007.safetensors",
139 "block.16.mlp.mlp2_weight.scales": "model--00002-of-00007.safetensors",
140 "block.16.mlp.norm.scale": "model--00002-of-00007.safetensors",
141 "block.17.attn.norm.scale": "model--00002-of-00007.safetensors",
142 "block.17.attn.out.bias": "model--00002-of-00007.safetensors",
143 "block.17.attn.out.weight": "model--00002-of-00007.safetensors",
144 "block.17.attn.qkv.bias": "model--00002-of-00007.safetensors",
145 "block.17.attn.qkv.weight": "model--00002-of-00007.safetensors",
146 "block.17.attn.sinks": "model--00002-of-00007.safetensors",
147 "block.17.mlp.gate.bias": "model--00002-of-00007.safetensors",
148 "block.17.mlp.gate.weight": "model--00002-of-00007.safetensors",
149 "block.17.mlp.mlp1_bias": "model--00002-of-00007.safetensors",
150 "block.17.mlp.mlp1_weight.blocks": "model--00002-of-00007.safetensors",
151 "block.17.mlp.mlp1_weight.scales": "model--00002-of-00007.safetensors",
152 "block.17.mlp.mlp2_bias": "model--00002-of-00007.safetensors",
153 "block.17.mlp.mlp2_weight.blocks": "model--00002-of-00007.safetensors",
154 "block.17.mlp.mlp2_weight.scales": "model--00002-of-00007.safetensors",
155 "block.17.mlp.norm.scale": "model--00002-of-00007.safetensors",
156 "block.18.attn.norm.scale": "model--00002-of-00007.safetensors",
157 "block.18.attn.out.bias": "model--00002-of-00007.safetensors",
158 "block.18.attn.out.weight": "model--00002-of-00007.safetensors",
159 "block.18.attn.qkv.bias": "model--00002-of-00007.safetensors",
160 "block.18.attn.qkv.weight": "model--00002-of-00007.safetensors",
161 "block.18.attn.sinks": "model--00002-of-00007.safetensors",
162 "block.18.mlp.gate.bias": "model--00002-of-00007.safetensors",
163 "block.18.mlp.gate.weight": "model--00002-of-00007.safetensors",
164 "block.18.mlp.mlp1_bias": "model--00002-of-00007.safetensors",
165 "block.18.mlp.mlp1_weight.blocks": "model--00002-of-00007.safetensors",
166 "block.18.mlp.mlp1_weight.scales": "model--00002-of-00007.safetensors",
167 "block.18.mlp.mlp2_bias": "model--00002-of-00007.safetensors",
168 "block.18.mlp.mlp2_weight.blocks": "model--00002-of-00007.safetensors",
169 "block.18.mlp.mlp2_weight.scales": "model--00002-of-00007.safetensors",
170 "block.18.mlp.norm.scale": "model--00002-of-00007.safetensors",
171 "block.19.attn.norm.scale": "model--00002-of-00007.safetensors",
172 "block.19.attn.out.bias": "model--00002-of-00007.safetensors",
173 "block.19.attn.out.weight": "model--00002-of-00007.safetensors",
174 "block.19.attn.qkv.bias": "model--00002-of-00007.safetensors",
175 "block.19.attn.qkv.weight": "model--00002-of-00007.safetensors",
176 "block.19.attn.sinks": "model--00002-of-00007.safetensors",
177 "block.19.mlp.gate.bias": "model--00002-of-00007.safetensors",
178 "block.19.mlp.gate.weight": "model--00002-of-00007.safetensors",
179 "block.19.mlp.mlp1_bias": "model--00002-of-00007.safetensors",
180 "block.19.mlp.mlp1_weight.blocks": "model--00002-of-00007.safetensors",
181 "block.19.mlp.mlp1_weight.scales": "model--00002-of-00007.safetensors",
182 "block.19.mlp.mlp2_bias": "model--00002-of-00007.safetensors",
183 "block.19.mlp.mlp2_weight.blocks": "model--00002-of-00007.safetensors",
184 "block.19.mlp.mlp2_weight.scales": "model--00002-of-00007.safetensors",
185 "block.19.mlp.norm.scale": "model--00002-of-00007.safetensors",
186 "block.2.attn.norm.scale": "model--00002-of-00007.safetensors",
187 "block.2.attn.out.bias": "model--00002-of-00007.safetensors",
188 "block.2.attn.out.weight": "model--00002-of-00007.safetensors",
189 "block.2.attn.qkv.bias": "model--00002-of-00007.safetensors",
190 "block.2.attn.qkv.weight": "model--00002-of-00007.safetensors",
191 "block.2.attn.sinks": "model--00002-of-00007.safetensors",
192 "block.2.mlp.gate.bias": "model--00002-of-00007.safetensors",
193 "block.2.mlp.gate.weight": "model--00002-of-00007.safetensors",
194 "block.2.mlp.mlp1_bias": "model--00002-of-00007.safetensors",
195 "block.2.mlp.mlp1_weight.blocks": "model--00003-of-00007.safetensors",
196 "block.2.mlp.mlp1_weight.scales": "model--00003-of-00007.safetensors",
197 "block.2.mlp.mlp2_bias": "model--00003-of-00007.safetensors",
198 "block.2.mlp.mlp2_weight.blocks": "model--00003-of-00007.safetensors",
199 "block.2.mlp.mlp2_weight.scales": "model--00003-of-00007.safetensors",
200 "block.2.mlp.norm.scale": "model--00003-of-00007.safetensors",
201 "block.20.attn.norm.scale": "model--00003-of-00007.safetensors",
202 "block.20.attn.out.bias": "model--00003-of-00007.safetensors",
203 "block.20.attn.out.weight": "model--00003-of-00007.safetensors",
204 "block.20.attn.qkv.bias": "model--00003-of-00007.safetensors",
205 "block.20.attn.qkv.weight": "model--00003-of-00007.safetensors",
206 "block.20.attn.sinks": "model--00003-of-00007.safetensors",
207 "block.20.mlp.gate.bias": "model--00003-of-00007.safetensors",
208 "block.20.mlp.gate.weight": "model--00003-of-00007.safetensors",
209 "block.20.mlp.mlp1_bias": "model--00003-of-00007.safetensors",
210 "block.20.mlp.mlp1_weight.blocks": "model--00003-of-00007.safetensors",
211 "block.20.mlp.mlp1_weight.scales": "model--00003-of-00007.safetensors",
212 "block.20.mlp.mlp2_bias": "model--00003-of-00007.safetensors",
213 "block.20.mlp.mlp2_weight.blocks": "model--00003-of-00007.safetensors",
214 "block.20.mlp.mlp2_weight.scales": "model--00003-of-00007.safetensors",
215 "block.20.mlp.norm.scale": "model--00003-of-00007.safetensors",
216 "block.21.attn.norm.scale": "model--00003-of-00007.safetensors",
217 "block.21.attn.out.bias": "model--00003-of-00007.safetensors",
218 "block.21.attn.out.weight": "model--00003-of-00007.safetensors",
219 "block.21.attn.qkv.bias": "model--00003-of-00007.safetensors",
220 "block.21.attn.qkv.weight": "model--00003-of-00007.safetensors",
221 "block.21.attn.sinks": "model--00003-of-00007.safetensors",
222 "block.21.mlp.gate.bias": "model--00003-of-00007.safetensors",
223 "block.21.mlp.gate.weight": "model--00003-of-00007.safetensors",
224 "block.21.mlp.mlp1_bias": "model--00003-of-00007.safetensors",
225 "block.21.mlp.mlp1_weight.blocks": "model--00003-of-00007.safetensors",
226 "block.21.mlp.mlp1_weight.scales": "model--00003-of-00007.safetensors",
227 "block.21.mlp.mlp2_bias": "model--00003-of-00007.safetensors",
228 "block.21.mlp.mlp2_weight.blocks": "model--00003-of-00007.safetensors",
229 "block.21.mlp.mlp2_weight.scales": "model--00003-of-00007.safetensors",
230 "block.21.mlp.norm.scale": "model--00003-of-00007.safetensors",
231 "block.22.attn.norm.scale": "model--00003-of-00007.safetensors",
232 "block.22.attn.out.bias": "model--00003-of-00007.safetensors",
233 "block.22.attn.out.weight": "model--00003-of-00007.safetensors",
234 "block.22.attn.qkv.bias": "model--00003-of-00007.safetensors",
235 "block.22.attn.qkv.weight": "model--00003-of-00007.safetensors",
236 "block.22.attn.sinks": "model--00003-of-00007.safetensors",
237 "block.22.mlp.gate.bias": "model--00003-of-00007.safetensors",
238 "block.22.mlp.gate.weight": "model--00003-of-00007.safetensors",
239 "block.22.mlp.mlp1_bias": "model--00003-of-00007.safetensors",
240 "block.22.mlp.mlp1_weight.blocks": "model--00003-of-00007.safetensors",
241 "block.22.mlp.mlp1_weight.scales": "model--00003-of-00007.safetensors",
242 "block.22.mlp.mlp2_bias": "model--00003-of-00007.safetensors",
243 "block.22.mlp.mlp2_weight.blocks": "model--00003-of-00007.safetensors",
244 "block.22.mlp.mlp2_weight.scales": "model--00003-of-00007.safetensors",
245 "block.22.mlp.norm.scale": "model--00003-of-00007.safetensors",
246 "block.23.attn.norm.scale": "model--00003-of-00007.safetensors",
247 "block.23.attn.out.bias": "model--00003-of-00007.safetensors",
248 "block.23.attn.out.weight": "model--00003-of-00007.safetensors",
249 "block.23.attn.qkv.bias": "model--00003-of-00007.safetensors",
250 "block.23.attn.qkv.weight": "model--00003-of-00007.safetensors",
251 "block.23.attn.sinks": "model--00003-of-00007.safetensors",
252 "block.23.mlp.gate.bias": "model--00003-of-00007.safetensors",
253 "block.23.mlp.gate.weight": "model--00003-of-00007.safetensors",
254 "block.23.mlp.mlp1_bias": "model--00003-of-00007.safetensors",
255 "block.23.mlp.mlp1_weight.blocks": "model--00003-of-00007.safetensors",
256 "block.23.mlp.mlp1_weight.scales": "model--00003-of-00007.safetensors",
257 "block.23.mlp.mlp2_bias": "model--00003-of-00007.safetensors",
258 "block.23.mlp.mlp2_weight.blocks": "model--00003-of-00007.safetensors",
259 "block.23.mlp.mlp2_weight.scales": "model--00003-of-00007.safetensors",
260 "block.23.mlp.norm.scale": "model--00003-of-00007.safetensors",
261 "block.24.attn.norm.scale": "model--00003-of-00007.safetensors",
262 "block.24.attn.out.bias": "model--00003-of-00007.safetensors",
263 "block.24.attn.out.weight": "model--00003-of-00007.safetensors",
264 "block.24.attn.qkv.bias": "model--00003-of-00007.safetensors",
265 "block.24.attn.qkv.weight": "model--00003-of-00007.safetensors",
266 "block.24.attn.sinks": "model--00003-of-00007.safetensors",
267 "block.24.mlp.gate.bias": "model--00003-of-00007.safetensors",
268 "block.24.mlp.gate.weight": "model--00003-of-00007.safetensors",
269 "block.24.mlp.mlp1_bias": "model--00003-of-00007.safetensors",
270 "block.24.mlp.mlp1_weight.blocks": "model--00003-of-00007.safetensors",
271 "block.24.mlp.mlp1_weight.scales": "model--00003-of-00007.safetensors",
272 "block.24.mlp.mlp2_bias": "model--00003-of-00007.safetensors",
273 "block.24.mlp.mlp2_weight.blocks": "model--00003-of-00007.safetensors",
274 "block.24.mlp.mlp2_weight.scales": "model--00003-of-00007.safetensors",
275 "block.24.mlp.norm.scale": "model--00003-of-00007.safetensors",
276 "block.25.attn.norm.scale": "model--00003-of-00007.safetensors",
277 "block.25.attn.out.bias": "model--00003-of-00007.safetensors",
278 "block.25.attn.out.weight": "model--00003-of-00007.safetensors",
279 "block.25.attn.qkv.bias": "model--00003-of-00007.safetensors",
280 "block.25.attn.qkv.weight": "model--00003-of-00007.safetensors",
281 "block.25.attn.sinks": "model--00003-of-00007.safetensors",
282 "block.25.mlp.gate.bias": "model--00003-of-00007.safetensors",
283 "block.25.mlp.gate.weight": "model--00003-of-00007.safetensors",
284 "block.25.mlp.mlp1_bias": "model--00003-of-00007.safetensors",
285 "block.25.mlp.mlp1_weight.blocks": "model--00004-of-00007.safetensors",
286 "block.25.mlp.mlp1_weight.scales": "model--00004-of-00007.safetensors",
287 "block.25.mlp.mlp2_bias": "model--00004-of-00007.safetensors",
288 "block.25.mlp.mlp2_weight.blocks": "model--00004-of-00007.safetensors",
289 "block.25.mlp.mlp2_weight.scales": "model--00004-of-00007.safetensors",
290 "block.25.mlp.norm.scale": "model--00004-of-00007.safetensors",
291 "block.26.attn.norm.scale": "model--00004-of-00007.safetensors",
292 "block.26.attn.out.bias": "model--00004-of-00007.safetensors",
293 "block.26.attn.out.weight": "model--00004-of-00007.safetensors",
294 "block.26.attn.qkv.bias": "model--00004-of-00007.safetensors",
295 "block.26.attn.qkv.weight": "model--00004-of-00007.safetensors",
296 "block.26.attn.sinks": "model--00004-of-00007.safetensors",
297 "block.26.mlp.gate.bias": "model--00004-of-00007.safetensors",
298 "block.26.mlp.gate.weight": "model--00004-of-00007.safetensors",
299 "block.26.mlp.mlp1_bias": "model--00004-of-00007.safetensors",
300 "block.26.mlp.mlp1_weight.blocks": "model--00004-of-00007.safetensors",
301 "block.26.mlp.mlp1_weight.scales": "model--00004-of-00007.safetensors",
302 "block.26.mlp.mlp2_bias": "model--00004-of-00007.safetensors",
303 "block.26.mlp.mlp2_weight.blocks": "model--00004-of-00007.safetensors",
304 "block.26.mlp.mlp2_weight.scales": "model--00004-of-00007.safetensors",
305 "block.26.mlp.norm.scale": "model--00004-of-00007.safetensors",
306 "block.27.attn.norm.scale": "model--00004-of-00007.safetensors",
307 "block.27.attn.out.bias": "model--00004-of-00007.safetensors",
308 "block.27.attn.out.weight": "model--00004-of-00007.safetensors",
309 "block.27.attn.qkv.bias": "model--00004-of-00007.safetensors",
310 "block.27.attn.qkv.weight": "model--00004-of-00007.safetensors",
311 "block.27.attn.sinks": "model--00004-of-00007.safetensors",
312 "block.27.mlp.gate.bias": "model--00004-of-00007.safetensors",
313 "block.27.mlp.gate.weight": "model--00004-of-00007.safetensors",
314 "block.27.mlp.mlp1_bias": "model--00004-of-00007.safetensors",
315 "block.27.mlp.mlp1_weight.blocks": "model--00004-of-00007.safetensors",
316 "block.27.mlp.mlp1_weight.scales": "model--00004-of-00007.safetensors",
317 "block.27.mlp.mlp2_bias": "model--00004-of-00007.safetensors",
318 "block.27.mlp.mlp2_weight.blocks": "model--00004-of-00007.safetensors",
319 "block.27.mlp.mlp2_weight.scales": "model--00004-of-00007.safetensors",
320 "block.27.mlp.norm.scale": "model--00004-of-00007.safetensors",
321 "block.28.attn.norm.scale": "model--00004-of-00007.safetensors",
322 "block.28.attn.out.bias": "model--00004-of-00007.safetensors",
323 "block.28.attn.out.weight": "model--00004-of-00007.safetensors",
324 "block.28.attn.qkv.bias": "model--00004-of-00007.safetensors",
325 "block.28.attn.qkv.weight": "model--00004-of-00007.safetensors",
326 "block.28.attn.sinks": "model--00004-of-00007.safetensors",
327 "block.28.mlp.gate.bias": "model--00004-of-00007.safetensors",
328 "block.28.mlp.gate.weight": "model--00004-of-00007.safetensors",
329 "block.28.mlp.mlp1_bias": "model--00004-of-00007.safetensors",
330 "block.28.mlp.mlp1_weight.blocks": "model--00004-of-00007.safetensors",
331 "block.28.mlp.mlp1_weight.scales": "model--00004-of-00007.safetensors",
332 "block.28.mlp.mlp2_bias": "model--00004-of-00007.safetensors",
333 "block.28.mlp.mlp2_weight.blocks": "model--00004-of-00007.safetensors",
334 "block.28.mlp.mlp2_weight.scales": "model--00004-of-00007.safetensors",
335 "block.28.mlp.norm.scale": "model--00004-of-00007.safetensors",
336 "block.29.attn.norm.scale": "model--00004-of-00007.safetensors",
337 "block.29.attn.out.bias": "model--00004-of-00007.safetensors",
338 "block.29.attn.out.weight": "model--00004-of-00007.safetensors",
339 "block.29.attn.qkv.bias": "model--00004-of-00007.safetensors",
340 "block.29.attn.qkv.weight": "model--00004-of-00007.safetensors",
341 "block.29.attn.sinks": "model--00004-of-00007.safetensors",
342 "block.29.mlp.gate.bias": "model--00004-of-00007.safetensors",
343 "block.29.mlp.gate.weight": "model--00004-of-00007.safetensors",
344 "block.29.mlp.mlp1_bias": "model--00004-of-00007.safetensors",
345 "block.29.mlp.mlp1_weight.blocks": "model--00004-of-00007.safetensors",
346 "block.29.mlp.mlp1_weight.scales": "model--00004-of-00007.safetensors",
347 "block.29.mlp.mlp2_bias": "model--00004-of-00007.safetensors",
348 "block.29.mlp.mlp2_weight.blocks": "model--00004-of-00007.safetensors",
349 "block.29.mlp.mlp2_weight.scales": "model--00004-of-00007.safetensors",
350 "block.29.mlp.norm.scale": "model--00004-of-00007.safetensors",
351 "block.3.attn.norm.scale": "model--00004-of-00007.safetensors",
352 "block.3.attn.out.bias": "model--00004-of-00007.safetensors",
353 "block.3.attn.out.weight": "model--00004-of-00007.safetensors",
354 "block.3.attn.qkv.bias": "model--00004-of-00007.safetensors",
355 "block.3.attn.qkv.weight": "model--00004-of-00007.safetensors",
356 "block.3.attn.sinks": "model--00004-of-00007.safetensors",
357 "block.3.mlp.gate.bias": "model--00004-of-00007.safetensors",
358 "block.3.mlp.gate.weight": "model--00004-of-00007.safetensors",
359 "block.3.mlp.mlp1_bias": "model--00004-of-00007.safetensors",
360 "block.3.mlp.mlp1_weight.blocks": "model--00004-of-00007.safetensors",
361 "block.3.mlp.mlp1_weight.scales": "model--00004-of-00007.safetensors",
362 "block.3.mlp.mlp2_bias": "model--00004-of-00007.safetensors",
363 "block.3.mlp.mlp2_weight.blocks": "model--00004-of-00007.safetensors",
364 "block.3.mlp.mlp2_weight.scales": "model--00004-of-00007.safetensors",
365 "block.3.mlp.norm.scale": "model--00004-of-00007.safetensors",
366 "block.30.attn.norm.scale": "model--00004-of-00007.safetensors",
367 "block.30.attn.out.bias": "model--00004-of-00007.safetensors",
368 "block.30.attn.out.weight": "model--00004-of-00007.safetensors",
369 "block.30.attn.qkv.bias": "model--00004-of-00007.safetensors",
370 "block.30.attn.qkv.weight": "model--00004-of-00007.safetensors",
371 "block.30.attn.sinks": "model--00004-of-00007.safetensors",
372 "block.30.mlp.gate.bias": "model--00004-of-00007.safetensors",
373 "block.30.mlp.gate.weight": "model--00004-of-00007.safetensors",
374 "block.30.mlp.mlp1_bias": "model--00004-of-00007.safetensors",
375 "block.30.mlp.mlp1_weight.blocks": "model--00005-of-00007.safetensors",
376 "block.30.mlp.mlp1_weight.scales": "model--00005-of-00007.safetensors",
377 "block.30.mlp.mlp2_bias": "model--00005-of-00007.safetensors",
378 "block.30.mlp.mlp2_weight.blocks": "model--00005-of-00007.safetensors",
379 "block.30.mlp.mlp2_weight.scales": "model--00005-of-00007.safetensors",
380 "block.30.mlp.norm.scale": "model--00005-of-00007.safetensors",
381 "block.31.attn.norm.scale": "model--00005-of-00007.safetensors",
382 "block.31.attn.out.bias": "model--00005-of-00007.safetensors",
383 "block.31.attn.out.weight": "model--00005-of-00007.safetensors",
384 "block.31.attn.qkv.bias": "model--00005-of-00007.safetensors",
385 "block.31.attn.qkv.weight": "model--00005-of-00007.safetensors",
386 "block.31.attn.sinks": "model--00005-of-00007.safetensors",
387 "block.31.mlp.gate.bias": "model--00005-of-00007.safetensors",
388 "block.31.mlp.gate.weight": "model--00005-of-00007.safetensors",
389 "block.31.mlp.mlp1_bias": "model--00005-of-00007.safetensors",
390 "block.31.mlp.mlp1_weight.blocks": "model--00005-of-00007.safetensors",
391 "block.31.mlp.mlp1_weight.scales": "model--00005-of-00007.safetensors",
392 "block.31.mlp.mlp2_bias": "model--00005-of-00007.safetensors",
393 "block.31.mlp.mlp2_weight.blocks": "model--00005-of-00007.safetensors",
394 "block.31.mlp.mlp2_weight.scales": "model--00005-of-00007.safetensors",
395 "block.31.mlp.norm.scale": "model--00005-of-00007.safetensors",
396 "block.32.attn.norm.scale": "model--00005-of-00007.safetensors",
397 "block.32.attn.out.bias": "model--00005-of-00007.safetensors",
398 "block.32.attn.out.weight": "model--00005-of-00007.safetensors",
399 "block.32.attn.qkv.bias": "model--00005-of-00007.safetensors",
400 "block.32.attn.qkv.weight": "model--00005-of-00007.safetensors",
401 "block.32.attn.sinks": "model--00005-of-00007.safetensors",
402 "block.32.mlp.gate.bias": "model--00005-of-00007.safetensors",
403 "block.32.mlp.gate.weight": "model--00005-of-00007.safetensors",
404 "block.32.mlp.mlp1_bias": "model--00005-of-00007.safetensors",
405 "block.32.mlp.mlp1_weight.blocks": "model--00005-of-00007.safetensors",
406 "block.32.mlp.mlp1_weight.scales": "model--00005-of-00007.safetensors",
407 "block.32.mlp.mlp2_bias": "model--00005-of-00007.safetensors",
408 "block.32.mlp.mlp2_weight.blocks": "model--00005-of-00007.safetensors",
409 "block.32.mlp.mlp2_weight.scales": "model--00005-of-00007.safetensors",
410 "block.32.mlp.norm.scale": "model--00005-of-00007.safetensors",
411 "block.33.attn.norm.scale": "model--00005-of-00007.safetensors",
412 "block.33.attn.out.bias": "model--00005-of-00007.safetensors",
413 "block.33.attn.out.weight": "model--00005-of-00007.safetensors",
414 "block.33.attn.qkv.bias": "model--00005-of-00007.safetensors",
415 "block.33.attn.qkv.weight": "model--00005-of-00007.safetensors",
416 "block.33.attn.sinks": "model--00005-of-00007.safetensors",
417 "block.33.mlp.gate.bias": "model--00005-of-00007.safetensors",
418 "block.33.mlp.gate.weight": "model--00005-of-00007.safetensors",
419 "block.33.mlp.mlp1_bias": "model--00005-of-00007.safetensors",
420 "block.33.mlp.mlp1_weight.blocks": "model--00005-of-00007.safetensors",
421 "block.33.mlp.mlp1_weight.scales": "model--00005-of-00007.safetensors",
422 "block.33.mlp.mlp2_bias": "model--00005-of-00007.safetensors",
423 "block.33.mlp.mlp2_weight.blocks": "model--00005-of-00007.safetensors",
424 "block.33.mlp.mlp2_weight.scales": "model--00005-of-00007.safetensors",
425 "block.33.mlp.norm.scale": "model--00005-of-00007.safetensors",
426 "block.34.attn.norm.scale": "model--00005-of-00007.safetensors",
427 "block.34.attn.out.bias": "model--00005-of-00007.safetensors",
428 "block.34.attn.out.weight": "model--00005-of-00007.safetensors",
429 "block.34.attn.qkv.bias": "model--00005-of-00007.safetensors",
430 "block.34.attn.qkv.weight": "model--00005-of-00007.safetensors",
431 "block.34.attn.sinks": "model--00005-of-00007.safetensors",
432 "block.34.mlp.gate.bias": "model--00005-of-00007.safetensors",
433 "block.34.mlp.gate.weight": "model--00005-of-00007.safetensors",
434 "block.34.mlp.mlp1_bias": "model--00005-of-00007.safetensors",
435 "block.34.mlp.mlp1_weight.blocks": "model--00005-of-00007.safetensors",
436 "block.34.mlp.mlp1_weight.scales": "model--00005-of-00007.safetensors",
437 "block.34.mlp.mlp2_bias": "model--00005-of-00007.safetensors",
438 "block.34.mlp.mlp2_weight.blocks": "model--00005-of-00007.safetensors",
439 "block.34.mlp.mlp2_weight.scales": "model--00005-of-00007.safetensors",
440 "block.34.mlp.norm.scale": "model--00005-of-00007.safetensors",
441 "block.35.attn.norm.scale": "model--00005-of-00007.safetensors",
442 "block.35.attn.out.bias": "model--00005-of-00007.safetensors",
443 "block.35.attn.out.weight": "model--00005-of-00007.safetensors",
444 "block.35.attn.qkv.bias": "model--00005-of-00007.safetensors",
445 "block.35.attn.qkv.weight": "model--00005-of-00007.safetensors",
446 "block.35.attn.sinks": "model--00005-of-00007.safetensors",
447 "block.35.mlp.gate.bias": "model--00005-of-00007.safetensors",
448 "block.35.mlp.gate.weight": "model--00005-of-00007.safetensors",
449 "block.35.mlp.mlp1_bias": "model--00005-of-00007.safetensors",
450 "block.35.mlp.mlp1_weight.blocks": "model--00005-of-00007.safetensors",
451 "block.35.mlp.mlp1_weight.scales": "model--00005-of-00007.safetensors",
452 "block.35.mlp.mlp2_bias": "model--00005-of-00007.safetensors",
453 "block.35.mlp.mlp2_weight.blocks": "model--00005-of-00007.safetensors",
454 "block.35.mlp.mlp2_weight.scales": "model--00005-of-00007.safetensors",
455 "block.35.mlp.norm.scale": "model--00005-of-00007.safetensors",
456 "block.4.attn.norm.scale": "model--00005-of-00007.safetensors",
457 "block.4.attn.out.bias": "model--00005-of-00007.safetensors",
458 "block.4.attn.out.weight": "model--00005-of-00007.safetensors",
459 "block.4.attn.qkv.bias": "model--00005-of-00007.safetensors",
460 "block.4.attn.qkv.weight": "model--00005-of-00007.safetensors",
461 "block.4.attn.sinks": "model--00005-of-00007.safetensors",
462 "block.4.mlp.gate.bias": "model--00005-of-00007.safetensors",
463 "block.4.mlp.gate.weight": "model--00005-of-00007.safetensors",
464 "block.4.mlp.mlp1_bias": "model--00005-of-00007.safetensors",
465 "block.4.mlp.mlp1_weight.blocks": "model--00006-of-00007.safetensors",
466 "block.4.mlp.mlp1_weight.scales": "model--00006-of-00007.safetensors",
467 "block.4.mlp.mlp2_bias": "model--00006-of-00007.safetensors",
468 "block.4.mlp.mlp2_weight.blocks": "model--00006-of-00007.safetensors",
469 "block.4.mlp.mlp2_weight.scales": "model--00006-of-00007.safetensors",
470 "block.4.mlp.norm.scale": "model--00006-of-00007.safetensors",
471 "block.5.attn.norm.scale": "model--00006-of-00007.safetensors",
472 "block.5.attn.out.bias": "model--00006-of-00007.safetensors",
473 "block.5.attn.out.weight": "model--00006-of-00007.safetensors",
474 "block.5.attn.qkv.bias": "model--00006-of-00007.safetensors",
475 "block.5.attn.qkv.weight": "model--00006-of-00007.safetensors",
476 "block.5.attn.sinks": "model--00006-of-00007.safetensors",
477 "block.5.mlp.gate.bias": "model--00006-of-00007.safetensors",
478 "block.5.mlp.gate.weight": "model--00006-of-00007.safetensors",
479 "block.5.mlp.mlp1_bias": "model--00006-of-00007.safetensors",
480 "block.5.mlp.mlp1_weight.blocks": "model--00006-of-00007.safetensors",
481 "block.5.mlp.mlp1_weight.scales": "model--00006-of-00007.safetensors",
482 "block.5.mlp.mlp2_bias": "model--00006-of-00007.safetensors",
483 "block.5.mlp.mlp2_weight.blocks": "model--00006-of-00007.safetensors",
484 "block.5.mlp.mlp2_weight.scales": "model--00006-of-00007.safetensors",
485 "block.5.mlp.norm.scale": "model--00006-of-00007.safetensors",
486 "block.6.attn.norm.scale": "model--00006-of-00007.safetensors",
487 "block.6.attn.out.bias": "model--00006-of-00007.safetensors",
488 "block.6.attn.out.weight": "model--00006-of-00007.safetensors",
489 "block.6.attn.qkv.bias": "model--00006-of-00007.safetensors",
490 "block.6.attn.qkv.weight": "model--00006-of-00007.safetensors",
491 "block.6.attn.sinks": "model--00006-of-00007.safetensors",
492 "block.6.mlp.gate.bias": "model--00006-of-00007.safetensors",
493 "block.6.mlp.gate.weight": "model--00006-of-00007.safetensors",
494 "block.6.mlp.mlp1_bias": "model--00006-of-00007.safetensors",
495 "block.6.mlp.mlp1_weight.blocks": "model--00006-of-00007.safetensors",
496 "block.6.mlp.mlp1_weight.scales": "model--00006-of-00007.safetensors",
497 "block.6.mlp.mlp2_bias": "model--00006-of-00007.safetensors",
498 "block.6.mlp.mlp2_weight.blocks": "model--00006-of-00007.safetensors",
499 "block.6.mlp.mlp2_weight.scales": "model--00006-of-00007.safetensors",
500 "block.6.mlp.norm.scale": "model--00006-of-00007.safetensors",
501 "block.7.attn.norm.scale": "model--00006-of-00007.safetensors",
502 "block.7.attn.out.bias": "model--00006-of-00007.safetensors",
503 "block.7.attn.out.weight": "model--00006-of-00007.safetensors",
504 "block.7.attn.qkv.bias": "model--00006-of-00007.safetensors",
505 "block.7.attn.qkv.weight": "model--00006-of-00007.safetensors",
506 "block.7.attn.sinks": "model--00006-of-00007.safetensors",
507 "block.7.mlp.gate.bias": "model--00006-of-00007.safetensors",
508 "block.7.mlp.gate.weight": "model--00006-of-00007.safetensors",
509 "block.7.mlp.mlp1_bias": "model--00006-of-00007.safetensors",
510 "block.7.mlp.mlp1_weight.blocks": "model--00006-of-00007.safetensors",
511 "block.7.mlp.mlp1_weight.scales": "model--00006-of-00007.safetensors",
512 "block.7.mlp.mlp2_bias": "model--00006-of-00007.safetensors",
513 "block.7.mlp.mlp2_weight.blocks": "model--00006-of-00007.safetensors",
514 "block.7.mlp.mlp2_weight.scales": "model--00006-of-00007.safetensors",
515 "block.7.mlp.norm.scale": "model--00006-of-00007.safetensors",
516 "block.8.attn.norm.scale": "model--00006-of-00007.safetensors",
517 "block.8.attn.out.bias": "model--00006-of-00007.safetensors",
518 "block.8.attn.out.weight": "model--00006-of-00007.safetensors",
519 "block.8.attn.qkv.bias": "model--00006-of-00007.safetensors",
520 "block.8.attn.qkv.weight": "model--00006-of-00007.safetensors",
521 "block.8.attn.sinks": "model--00006-of-00007.safetensors",
522 "block.8.mlp.gate.bias": "model--00006-of-00007.safetensors",
523 "block.8.mlp.gate.weight": "model--00006-of-00007.safetensors",
524 "block.8.mlp.mlp1_bias": "model--00006-of-00007.safetensors",
525 "block.8.mlp.mlp1_weight.blocks": "model--00006-of-00007.safetensors",
526 "block.8.mlp.mlp1_weight.scales": "model--00006-of-00007.safetensors",
527 "block.8.mlp.mlp2_bias": "model--00006-of-00007.safetensors",
528 "block.8.mlp.mlp2_weight.blocks": "model--00006-of-00007.safetensors",
529 "block.8.mlp.mlp2_weight.scales": "model--00006-of-00007.safetensors",
530 "block.8.mlp.norm.scale": "model--00006-of-00007.safetensors",
531 "block.9.attn.norm.scale": "model--00006-of-00007.safetensors",
532 "block.9.attn.out.bias": "model--00006-of-00007.safetensors",
533 "block.9.attn.out.weight": "model--00006-of-00007.safetensors",
534 "block.9.attn.qkv.bias": "model--00006-of-00007.safetensors",
535 "block.9.attn.qkv.weight": "model--00006-of-00007.safetensors",
536 "block.9.attn.sinks": "model--00006-of-00007.safetensors",
537 "block.9.mlp.gate.bias": "model--00006-of-00007.safetensors",
538 "block.9.mlp.gate.weight": "model--00006-of-00007.safetensors",
539 "block.9.mlp.mlp1_bias": "model--00006-of-00007.safetensors",
540 "block.9.mlp.mlp1_weight.blocks": "model--00006-of-00007.safetensors",
541 "block.9.mlp.mlp1_weight.scales": "model--00006-of-00007.safetensors",
542 "block.9.mlp.mlp2_bias": "model--00006-of-00007.safetensors",
543 "block.9.mlp.mlp2_weight.blocks": "model--00006-of-00007.safetensors",
544 "block.9.mlp.mlp2_weight.scales": "model--00006-of-00007.safetensors",
545 "block.9.mlp.norm.scale": "model--00006-of-00007.safetensors",
546 "embedding.weight": "model--00007-of-00007.safetensors",
547 "norm.scale": "model--00007-of-00007.safetensors",
548 "unembedding.weight": "model--00007-of-00007.safetensors"
549 }
550 }