inference/config_671B_v3.2.json
605 B · 26 lines · json Raw
1 {
2 "vocab_size": 129280,
3 "dim": 7168,
4 "inter_dim": 18432,
5 "moe_inter_dim": 2048,
6 "n_layers": 61,
7 "n_dense_layers": 3,
8 "n_heads": 128,
9 "n_routed_experts": 256,
10 "n_shared_experts": 1,
11 "n_activated_experts": 8,
12 "n_expert_groups": 8,
13 "n_limited_groups": 4,
14 "route_scale": 2.5,
15 "score_func": "sigmoid",
16 "q_lora_rank": 1536,
17 "kv_lora_rank": 512,
18 "qk_nope_head_dim": 128,
19 "qk_rope_head_dim": 64,
20 "v_head_dim": 128,
21 "dtype": "fp8",
22 "scale_fmt": "ue8m0",
23 "index_n_heads": 64,
24 "index_head_dim": 128,
25 "index_topk": 2048
26 }