config.json
2.1 KB · 82 lines · json Raw
1 {
2 "_name_or_path": "/home/eldar/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B-Instruct/snapshots/e9f8effbab1cbdc515c11ee6e098e3d5a9f51e14",
3 "architectures": [
4 "LlamaForCausalLM"
5 ],
6 "attention_bias": false,
7 "attention_dropout": 0.0,
8 "bos_token_id": 128000,
9 "eos_token_id": [
10 128001,
11 128008,
12 128009
13 ],
14 "head_dim": 64,
15 "hidden_act": "silu",
16 "hidden_size": 2048,
17 "initializer_range": 0.02,
18 "intermediate_size": 8192,
19 "max_position_embeddings": 131072,
20 "mlp_bias": false,
21 "model_type": "llama",
22 "num_attention_heads": 32,
23 "num_hidden_layers": 16,
24 "num_key_value_heads": 8,
25 "pretraining_tp": 1,
26 "rms_norm_eps": 1e-05,
27 "rope_scaling": {
28 "factor": 32.0,
29 "high_freq_factor": 4.0,
30 "low_freq_factor": 1.0,
31 "original_max_position_embeddings": 8192,
32 "rope_type": "llama3"
33 },
34 "rope_theta": 500000.0,
35 "tie_word_embeddings": true,
36 "torch_dtype": "bfloat16",
37 "transformers_version": "4.45.0",
38 "use_cache": true,
39 "vocab_size": 128256,
40 "quantization_config": {
41 "config_groups": {
42 "group_0": {
43 "input_activations": {
44 "actorder": null,
45 "block_structure": null,
46 "dynamic": true,
47 "group_size": null,
48 "num_bits": 8,
49 "observer": "memoryless",
50 "observer_kwargs": {},
51 "strategy": "token",
52 "symmetric": true,
53 "type": "float"
54 },
55 "output_activations": null,
56 "targets": [
57 "Linear"
58 ],
59 "weights": {
60 "actorder": null,
61 "block_structure": null,
62 "dynamic": false,
63 "group_size": null,
64 "num_bits": 8,
65 "observer": "minmax",
66 "observer_kwargs": {},
67 "strategy": "channel",
68 "symmetric": true,
69 "type": "float"
70 }
71 }
72 },
73 "format": "float-quantized",
74 "global_compression_ratio": 1.2372433954777822,
75 "ignore": [
76 "lm_head"
77 ],
78 "kv_cache_scheme": null,
79 "quant_method": "compressed-tensors",
80 "quantization_status": "compressed"
81 }
82 }