config.json
3.7 KB · 152 lines · json Raw
1 {
2 "_name_or_path": "Phi-3.5-vision-instruct",
3 "architectures": [
4 "Phi3VForCausalLM"
5 ],
6 "attention_dropout": 0.0,
7 "auto_map": {
8 "AutoConfig": "configuration_phi3_v.Phi3VConfig",
9 "AutoModelForCausalLM": "modeling_phi3_v.Phi3VForCausalLM"
10 },
11 "bos_token_id": 1,
12 "embd_layer": {
13 "embedding_cls": "image",
14 "hd_transform_order": "sub_glb",
15 "projection_cls": "mlp",
16 "use_hd_transform": true,
17 "with_learnable_separator": true
18 },
19 "embd_pdrop": 0.0,
20 "eos_token_id": 2,
21 "hidden_act": "silu",
22 "hidden_size": 3072,
23 "img_processor": {
24 "image_dim_out": 1024,
25 "model_name": "openai/clip-vit-large-patch14-336",
26 "name": "clip_vision_model",
27 "num_img_tokens": 144
28 },
29 "initializer_range": 0.02,
30 "intermediate_size": 8192,
31 "max_position_embeddings": 131072,
32 "model_type": "phi3_v",
33 "num_attention_heads": 32,
34 "num_hidden_layers": 32,
35 "num_key_value_heads": 32,
36 "original_max_position_embeddings": 4096,
37 "pad_token_id": 32000,
38 "resid_pdrop": 0.0,
39 "rms_norm_eps": 1e-05,
40 "rope_scaling": {
41 "long_factor": [
42 1.0800000429153442,
43 1.1100000143051147,
44 1.1399999856948853,
45 1.340000033378601,
46 1.5899999141693115,
47 1.600000023841858,
48 1.6200000047683716,
49 2.620000123977661,
50 3.2300000190734863,
51 3.2300000190734863,
52 4.789999961853027,
53 7.400000095367432,
54 7.700000286102295,
55 9.09000015258789,
56 12.199999809265137,
57 17.670000076293945,
58 24.46000099182129,
59 28.57000160217285,
60 30.420001983642578,
61 30.840002059936523,
62 32.590003967285156,
63 32.93000411987305,
64 42.320003509521484,
65 44.96000289916992,
66 50.340003967285156,
67 50.45000457763672,
68 57.55000305175781,
69 57.93000411987305,
70 58.21000289916992,
71 60.1400032043457,
72 62.61000442504883,
73 62.62000274658203,
74 62.71000289916992,
75 63.1400032043457,
76 63.1400032043457,
77 63.77000427246094,
78 63.93000411987305,
79 63.96000289916992,
80 63.970001220703125,
81 64.02999877929688,
82 64.06999969482422,
83 64.08000183105469,
84 64.12000274658203,
85 64.41000366210938,
86 64.4800033569336,
87 64.51000213623047,
88 64.52999877929688,
89 64.83999633789062
90 ],
91 "short_factor": [
92 1.08,
93 1.1,
94 1.1300000000000001,
95 1.2800000000000002,
96 1.3100000000000003,
97 1.4500000000000004,
98 1.4500000000000004,
99 1.9500000000000008,
100 2.030000000000001,
101 2.4299999999999926,
102 2.5699999999999896,
103 2.9499999999999815,
104 3.729999999999965,
105 3.869999999999962,
106 4.189999999999955,
107 4.43999999999995,
108 4.6399999999999455,
109 4.979999999999938,
110 5.159999999999934,
111 5.279999999999932,
112 5.759999999999922,
113 5.889999999999919,
114 5.889999999999919,
115 5.969999999999917,
116 6.089999999999915,
117 6.2799999999999105,
118 6.7699999999999,
119 6.8899999999998975,
120 7.109999999999893,
121 7.129999999999892,
122 7.179999999999891,
123 7.289999999999889,
124 7.339999999999888,
125 7.559999999999883,
126 7.619999999999882,
127 7.69999999999988,
128 7.879999999999876,
129 7.879999999999876,
130 7.879999999999876,
131 7.939999999999875,
132 7.949999999999875,
133 7.979999999999874,
134 8.19999999999987,
135 8.439999999999864,
136 8.469999999999864,
137 8.589999999999861,
138 8.809999999999857,
139 8.999999999999853
140 ],
141 "type": "su"
142 },
143 "rope_theta": 10000.0,
144 "sliding_window": 262144,
145 "tie_word_embeddings": false,
146 "torch_dtype": "bfloat16",
147 "transformers_version": "4.38.1",
148 "use_cache": true,
149 "vocab_size": 32064,
150 "_attn_implementation": "flash_attention_2"
151 }
152