config.json
898 B · 34 lines · json Raw
1 {
2 "architectures": [
3 "Qwen2Model"
4 ],
5 "attention_dropout": 0.0,
6 "auto_map": {
7 "AutoModel": "modeling_qwen.Qwen2Model",
8 "AutoModelForCausalLM": "modeling_qwen.Qwen2ForCausalLM",
9 "AutoModelForSequenceClassification": "modeling_qwen.Qwen2ForSequenceClassification"
10 },
11 "bos_token_id": 151643,
12 "eos_token_id": 151643,
13 "hidden_act": "silu",
14 "hidden_size": 3584,
15 "initializer_range": 0.02,
16 "intermediate_size": 18944,
17 "max_position_embeddings": 131072,
18 "max_window_layers": 28,
19 "model_type": "qwen2",
20 "num_attention_heads": 28,
21 "num_hidden_layers": 28,
22 "num_key_value_heads": 4,
23 "rms_norm_eps": 1e-06,
24 "rope_scaling": null,
25 "rope_theta": 1000000.0,
26 "sliding_window": 131072,
27 "tie_word_embeddings": false,
28 "torch_dtype": "float16",
29 "transformers_version": "4.51.0",
30 "use_cache": true,
31 "use_sliding_window": false,
32 "vocab_size": 151646
33 }
34