inference/config_671B_v3.2.json
| 1 | { |
| 2 | "vocab_size": 129280, |
| 3 | "dim": 7168, |
| 4 | "inter_dim": 18432, |
| 5 | "moe_inter_dim": 2048, |
| 6 | "n_layers": 61, |
| 7 | "n_dense_layers": 3, |
| 8 | "n_heads": 128, |
| 9 | "n_routed_experts": 256, |
| 10 | "n_shared_experts": 1, |
| 11 | "n_activated_experts": 8, |
| 12 | "n_expert_groups": 8, |
| 13 | "n_limited_groups": 4, |
| 14 | "route_scale": 2.5, |
| 15 | "score_func": "sigmoid", |
| 16 | "q_lora_rank": 1536, |
| 17 | "kv_lora_rank": 512, |
| 18 | "qk_nope_head_dim": 128, |
| 19 | "qk_rope_head_dim": 64, |
| 20 | "v_head_dim": 128, |
| 21 | "dtype": "fp8", |
| 22 | "scale_fmt": "ue8m0", |
| 23 | "index_n_heads": 64, |
| 24 | "index_head_dim": 128, |
| 25 | "index_topk": 2048 |
| 26 | } |