config.json · Llama-3.2-1B-Instruct-FP8-dynamic

config.json

2.1 KB · 82 lines · json Raw

1	`{`
2	`"_name_or_path": "/home/eldar/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B-Instruct/snapshots/e9f8effbab1cbdc515c11ee6e098e3d5a9f51e14",`
3	`"architectures": [`
4	`"LlamaForCausalLM"`
5	`],`
6	`"attention_bias": false,`
7	`"attention_dropout": 0.0,`
8	`"bos_token_id": 128000,`
9	`"eos_token_id": [`
10	`128001,`
11	`128008,`
12	`128009`
13	`],`
14	`"head_dim": 64,`
15	`"hidden_act": "silu",`
16	`"hidden_size": 2048,`
17	`"initializer_range": 0.02,`
18	`"intermediate_size": 8192,`
19	`"max_position_embeddings": 131072,`
20	`"mlp_bias": false,`
21	`"model_type": "llama",`
22	`"num_attention_heads": 32,`
23	`"num_hidden_layers": 16,`
24	`"num_key_value_heads": 8,`
25	`"pretraining_tp": 1,`
26	`"rms_norm_eps": 1e-05,`
27	`"rope_scaling": {`
28	`"factor": 32.0,`
29	`"high_freq_factor": 4.0,`
30	`"low_freq_factor": 1.0,`
31	`"original_max_position_embeddings": 8192,`
32	`"rope_type": "llama3"`
33	`},`
34	`"rope_theta": 500000.0,`
35	`"tie_word_embeddings": true,`
36	`"torch_dtype": "bfloat16",`
37	`"transformers_version": "4.45.0",`
38	`"use_cache": true,`
39	`"vocab_size": 128256,`
40	`"quantization_config": {`
41	`"config_groups": {`
42	`"group_0": {`
43	`"input_activations": {`
44	`"actorder": null,`
45	`"block_structure": null,`
46	`"dynamic": true,`
47	`"group_size": null,`
48	`"num_bits": 8,`
49	`"observer": "memoryless",`
50	`"observer_kwargs": {},`
51	`"strategy": "token",`
52	`"symmetric": true,`
53	`"type": "float"`
54	`},`
55	`"output_activations": null,`
56	`"targets": [`
57	`"Linear"`
58	`],`
59	`"weights": {`
60	`"actorder": null,`
61	`"block_structure": null,`
62	`"dynamic": false,`
63	`"group_size": null,`
64	`"num_bits": 8,`
65	`"observer": "minmax",`
66	`"observer_kwargs": {},`
67	`"strategy": "channel",`
68	`"symmetric": true,`
69	`"type": "float"`
70	`}`
71	`}`
72	`},`
73	`"format": "float-quantized",`
74	`"global_compression_ratio": 1.2372433954777822,`
75	`"ignore": [`
76	`"lm_head"`
77	`],`
78	`"kv_cache_scheme": null,`
79	`"quant_method": "compressed-tensors",`
80	`"quantization_status": "compressed"`
81	`}`
82	`}`