config.json · Qwen3-0.6B | QuantaMrkt

config.json

726 B · 30 lines · json Raw

1	`{`
2	`"architectures": [`
3	`"Qwen3ForCausalLM"`
4	`],`
5	`"attention_bias": false,`
6	`"attention_dropout": 0.0,`
7	`"bos_token_id": 151643,`
8	`"eos_token_id": 151645,`
9	`"head_dim": 128,`
10	`"hidden_act": "silu",`
11	`"hidden_size": 1024,`
12	`"initializer_range": 0.02,`
13	`"intermediate_size": 3072,`
14	`"max_position_embeddings": 40960,`
15	`"max_window_layers": 28,`
16	`"model_type": "qwen3",`
17	`"num_attention_heads": 16,`
18	`"num_hidden_layers": 28,`
19	`"num_key_value_heads": 8,`
20	`"rms_norm_eps": 1e-06,`
21	`"rope_scaling": null,`
22	`"rope_theta": 1000000,`
23	`"sliding_window": null,`
24	`"tie_word_embeddings": true,`
25	`"torch_dtype": "bfloat16",`
26	`"transformers_version": "4.51.0",`
27	`"use_cache": true,`
28	`"use_sliding_window": false,`
29	`"vocab_size": 151936`
30	`}`