config.json
1.6 KB · 125 lines · json Raw
1 {
2 "activation_fn_name": "swish",
3 "architectures": [
4 "OpenELMForCausalLM"
5 ],
6 "auto_map": {
7 "AutoConfig": "configuration_openelm.OpenELMConfig",
8 "AutoModelForCausalLM": "modeling_openelm.OpenELMForCausalLM"
9 },
10 "bos_token_id": 1,
11 "eos_token_id": 2,
12 "ffn_dim_divisor": 256,
13 "ffn_multipliers": [
14 0.5,
15 0.63,
16 0.76,
17 0.89,
18 1.02,
19 1.15,
20 1.28,
21 1.41,
22 1.54,
23 1.67,
24 1.8,
25 1.93,
26 2.06,
27 2.19,
28 2.31,
29 2.44,
30 2.57,
31 2.7,
32 2.83,
33 2.96,
34 3.09,
35 3.22,
36 3.35,
37 3.48,
38 3.61,
39 3.74,
40 3.87,
41 4.0
42 ],
43 "ffn_with_glu": true,
44 "head_dim": 64,
45 "initializer_range": 0.02,
46 "max_context_length": 2048,
47 "model_dim": 2048,
48 "model_type": "openelm",
49 "normalization_layer_name": "rms_norm",
50 "normalize_qk_projections": true,
51 "num_gqa_groups": 4,
52 "num_kv_heads": [
53 4,
54 4,
55 4,
56 5,
57 5,
58 5,
59 5,
60 5,
61 5,
62 5,
63 6,
64 6,
65 6,
66 6,
67 6,
68 6,
69 6,
70 6,
71 7,
72 7,
73 7,
74 7,
75 7,
76 7,
77 8,
78 8,
79 8,
80 8
81 ],
82 "num_query_heads": [
83 16,
84 16,
85 16,
86 20,
87 20,
88 20,
89 20,
90 20,
91 20,
92 20,
93 24,
94 24,
95 24,
96 24,
97 24,
98 24,
99 24,
100 24,
101 28,
102 28,
103 28,
104 28,
105 28,
106 28,
107 32,
108 32,
109 32,
110 32
111 ],
112 "num_transformer_layers": 28,
113 "qkv_multipliers": [
114 0.5,
115 1.0
116 ],
117 "rope_freq_constant": 10000,
118 "rope_max_length": 4096,
119 "share_input_output_layers": true,
120 "torch_dtype": "bfloat16",
121 "transformers_version": "4.39.3",
122 "use_cache": true,
123 "vocab_size": 32000
124 }
125