config.json · Hermes-3-Llama-3.1-8B

config.json

883 B · 36 lines · json Raw

1	`{`
2	`"_name_or_path": "NousResearch/Hermes-3-Llama-3.1-8B",`
3	`"architectures": [`
4	`"LlamaForCausalLM"`
5	`],`
6	`"attention_bias": false,`
7	`"attention_dropout": 0.0,`
8	`"bos_token_id": 128000,`
9	`"eos_token_id": 128040,`
10	`"hidden_act": "silu",`
11	`"hidden_size": 4096,`
12	`"initializer_range": 0.02,`
13	`"intermediate_size": 14336,`
14	`"max_position_embeddings": 131072,`
15	`"mlp_bias": false,`
16	`"model_type": "llama",`
17	`"num_attention_heads": 32,`
18	`"num_hidden_layers": 32,`
19	`"num_key_value_heads": 8,`
20	`"pretraining_tp": 1,`
21	`"rms_norm_eps": 1e-05,`
22	`"rope_scaling": {`
23	`"factor": 8.0,`
24	`"high_freq_factor": 4.0,`
25	`"low_freq_factor": 1.0,`
26	`"original_max_position_embeddings": 8192,`
27	`"rope_type": "llama3"`
28	`},`
29	`"rope_theta": 500000.0,`
30	`"tie_word_embeddings": false,`
31	`"torch_dtype": "bfloat16",`
32	`"transformers_version": "4.44.0.dev0",`
33	`"use_cache": true,`
34	`"vocab_size": 128256`
35	`}`
36