config.json
3.5 KB · 170 lines · json Raw
1 {
2 "_name_or_path": "openvla/openvla-7b",
3 "arch_specifier": "no-align+fused-gelu-mlp",
4 "architectures": [
5 "OpenVLAForActionPrediction"
6 ],
7 "auto_map": {
8 "AutoConfig": "openvla/openvla-7b--configuration_prismatic.OpenVLAConfig",
9 "AutoModelForVision2Seq": "openvla/openvla-7b--modeling_prismatic.OpenVLAForActionPrediction"
10 },
11 "hf_llm_id": "meta-llama/Llama-2-7b-hf",
12 "image_resize_strategy": "resize-naive",
13 "image_sizes": [
14 224,
15 224
16 ],
17 "llm_backbone_id": "llama2-7b-pure",
18 "llm_max_length": 2048,
19 "model_type": "openvla",
20 "n_action_bins": 256,
21 "norm_stats": {
22 "libero_object": {
23 "action": {
24 "mean": [
25 0.07096529006958008,
26 0.13498851656913757,
27 -0.04601382836699486,
28 0.00123520044144243,
29 0.006998839322477579,
30 -0.015027612447738647,
31 0.46428999304771423
32 ],
33 "std": [
34 0.2681235373020172,
35 0.43846824765205383,
36 0.4474974274635315,
37 0.024446550756692886,
38 0.049355510622262955,
39 0.042107198387384415,
40 0.49879148602485657
41 ],
42 "max": [
43 0.9375,
44 0.8919642567634583,
45 0.9375,
46 0.17678570747375488,
47 0.35035714507102966,
48 0.1810714304447174,
49 1.0
50 ],
51 "min": [
52 -0.8839285969734192,
53 -0.9375,
54 -0.9375,
55 -0.15000000596046448,
56 -0.29035714268684387,
57 -0.32892856001853943,
58 0.0
59 ],
60 "q01": [
61 -0.5383928418159485,
62 -0.8758928775787354,
63 -0.9375,
64 -0.06964285671710968,
65 -0.11678571254014969,
66 -0.15964286029338837,
67 0.0
68 ],
69 "q99": [
70 0.8464285731315613,
71 0.84375,
72 0.9375,
73 0.08142857253551483,
74 0.14892856776714325,
75 0.0867857113480568,
76 1.0
77 ],
78 "mask": [
79 true,
80 true,
81 true,
82 true,
83 true,
84 true,
85 false
86 ]
87 },
88 "proprio": {
89 "mean": [
90 0.0,
91 0.0,
92 0.0,
93 0.0,
94 0.0,
95 0.0,
96 0.0
97 ],
98 "std": [
99 0.0,
100 0.0,
101 0.0,
102 0.0,
103 0.0,
104 0.0,
105 0.0
106 ],
107 "max": [
108 0.0,
109 0.0,
110 0.0,
111 0.0,
112 0.0,
113 0.0,
114 0.0
115 ],
116 "min": [
117 0.0,
118 0.0,
119 0.0,
120 0.0,
121 0.0,
122 0.0,
123 0.0
124 ],
125 "q01": [
126 0.0,
127 0.0,
128 0.0,
129 0.0,
130 0.0,
131 0.0,
132 0.0
133 ],
134 "q99": [
135 0.0,
136 0.0,
137 0.0,
138 0.0,
139 0.0,
140 0.0,
141 0.0
142 ]
143 },
144 "num_transitions": 66984,
145 "num_trajectories": 454
146 }
147 },
148 "output_projector_states": false,
149 "pad_to_multiple_of": 64,
150 "pad_token_id": 32000,
151 "text_config": {
152 "model_type": "llama",
153 "pad_token_id": 32000,
154 "torch_dtype": "bfloat16",
155 "vocab_size": 32064
156 },
157 "timm_model_ids": [
158 "vit_large_patch14_reg4_dinov2.lvd142m",
159 "vit_so400m_patch14_siglip_224"
160 ],
161 "timm_override_act_layers": [
162 null,
163 null
164 ],
165 "torch_dtype": "bfloat16",
166 "transformers_version": "4.40.1",
167 "use_fused_vision_backbone": true,
168 "vision_backbone_id": "dinosiglip-vit-so-224px"
169 }
170