config.json
1.9 KB · 83 lines · json Raw
1 {
2 "architectures": [
3 "LlamaForCausalLM"
4 ],
5 "attention_bias": false,
6 "attention_dropout": 0.0,
7 "bos_token_id": 128000,
8 "eos_token_id": [
9 128001,
10 128008,
11 128009
12 ],
13 "head_dim": 128,
14 "hidden_act": "silu",
15 "hidden_size": 8192,
16 "initializer_range": 0.02,
17 "intermediate_size": 28672,
18 "max_position_embeddings": 131072,
19 "mlp_bias": false,
20 "model_type": "llama",
21 "num_attention_heads": 64,
22 "num_hidden_layers": 80,
23 "num_key_value_heads": 8,
24 "pretraining_tp": 1,
25 "quantization_config": {
26 "config_groups": {
27 "group_0": {
28 "input_activations": {
29 "actorder": null,
30 "block_structure": null,
31 "dynamic": true,
32 "group_size": null,
33 "num_bits": 8,
34 "observer": null,
35 "observer_kwargs": {},
36 "strategy": "token",
37 "symmetric": true,
38 "type": "float"
39 },
40 "output_activations": null,
41 "targets": [
42 "Linear"
43 ],
44 "weights": {
45 "actorder": null,
46 "block_structure": null,
47 "dynamic": false,
48 "group_size": null,
49 "num_bits": 8,
50 "observer": "minmax",
51 "observer_kwargs": {},
52 "strategy": "channel",
53 "symmetric": true,
54 "type": "float"
55 }
56 }
57 },
58 "format": "float-quantized",
59 "global_compression_ratio": 1.463543865167781,
60 "ignore": [
61 "lm_head"
62 ],
63 "kv_cache_scheme": null,
64 "quant_method": "compressed-tensors",
65 "quantization_status": "compressed",
66 "sparsity_config": {}
67 },
68 "rms_norm_eps": 1e-05,
69 "rope_scaling": {
70 "factor": 8.0,
71 "high_freq_factor": 4.0,
72 "low_freq_factor": 1.0,
73 "original_max_position_embeddings": 8192,
74 "rope_type": "llama3"
75 },
76 "rope_theta": 500000.0,
77 "tie_word_embeddings": false,
78 "torch_dtype": "float16",
79 "transformers_version": "4.47.1",
80 "use_cache": true,
81 "vocab_size": 128256
82 }
83