config.json · smolvla_libero

config.json

2.1 KB · 88 lines · json Raw

1	`{`
2	`"type": "smolvla",`
3	`"n_obs_steps": 1,`
4	`"input_features": {`
5	`"observation.images.image": {`
6	`"type": "VISUAL",`
7	`"shape": [`
8	`3,`
9	`256,`
10	`256`
11	`]`
12	`},`
13	`"observation.images.image2": {`
14	`"type": "VISUAL",`
15	`"shape": [`
16	`3,`
17	`256,`
18	`256`
19	`]`
20	`},`
21	`"observation.state": {`
22	`"type": "STATE",`
23	`"shape": [`
24	`8`
25	`]`
26	`}`
27	`},`
28	`"output_features": {`
29	`"action": {`
30	`"type": "ACTION",`
31	`"shape": [`
32	`7`
33	`]`
34	`}`
35	`},`
36	`"device": "cuda",`
37	`"use_amp": false,`
38	`"push_to_hub": true,`
39	`"repo_id": "None",`
40	`"private": null,`
41	`"tags": null,`
42	`"license": null,`
43	`"chunk_size": 50,`
44	`"n_action_steps": 1,`
45	`"normalization_mapping": {`
46	`"VISUAL": "IDENTITY",`
47	`"STATE": "MEAN_STD",`
48	`"ACTION": "MEAN_STD"`
49	`},`
50	`"max_state_dim": 32,`
51	`"max_action_dim": 32,`
52	`"resize_imgs_with_padding": [`
53	`512,`
54	`512`
55	`],`
56	`"empty_cameras": 0,`
57	`"adapt_to_pi_aloha": false,`
58	`"use_delta_joint_actions_aloha": false,`
59	`"tokenizer_max_length": 48,`
60	`"num_steps": 10,`
61	`"use_cache": true,`
62	`"freeze_vision_encoder": true,`
63	`"train_expert_only": true,`
64	`"train_state_proj": true,`
65	`"optimizer_lr": 0.0001,`
66	`"optimizer_betas": [`
67	`0.9,`
68	`0.95`
69	`],`
70	`"optimizer_eps": 1e-08,`
71	`"optimizer_weight_decay": 1e-10,`
72	`"optimizer_grad_clip_norm": 10,`
73	`"scheduler_warmup_steps": 1000,`
74	`"scheduler_decay_steps": 30000,`
75	`"scheduler_decay_lr": 2.5e-06,`
76	`"vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Instruct",`
77	`"load_vlm_weights": true,`
78	`"add_image_special_tokens": false,`
79	`"attention_mode": "cross_attn",`
80	`"prefix_length": 0,`
81	`"pad_language_to": "longest",`
82	`"num_expert_layers": -1,`
83	`"num_vlm_layers": 0,`
84	`"self_attn_every_n_layers": 2,`
85	`"expert_width_multiplier": 0.5,`
86	`"min_period": 0.004,`
87	`"max_period": 4.0`
88	`}`