experiment_cfg/config.yaml
34.2 KB · 1342 lines · yaml Raw
1 !!python/object:groot.vla.omni.configs.base_config.Config
2 data: !!python/object:groot.vla.omni.configs.data.data_config.DataConfig
3 datasets:
4 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
5 dataset_paths:
6 - /mnt/aws-lfs-02/shared/datasets/xdof.yam_v7_all_merged_global_task_exclude_bad_subtasks
7 dataset_type: physical_embodiment
8 embodiment_tag: xdof_relative_eef_relative_joint
9 mix_ratio: 0.1
10 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
11 dataset_paths:
12 - /mnt/aws-lfs-02/shared/datasets/xdof.yam_v7_subtask_only_merged_global_task
13 dataset_type: physical_embodiment
14 embodiment_tag: xdof_relative_eef_relative_joint_subtask
15 mix_ratio: 0.2
16 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
17 dataset_paths:
18 - /mnt/aws-lfs-02/shared/datasets/droid_101_success_idlefiltered_n17
19 - /mnt/aws-lfs-02/shared/datasets/droid_101_success_idlefiltered_n17_swapped
20 dataset_type: physical_embodiment
21 embodiment_tag: oxe_droid_relative_eef_relative_joint
22 mix_ratio: 0.1
23 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
24 dataset_paths:
25 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_g1.g1-in-the-wild-merged
26 dataset_type: physical_embodiment
27 embodiment_tag: real_g1_relative_eef_relative_joints
28 mix_ratio: 0.05
29 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
30 dataset_paths:
31 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_real_robot_batch_1
32 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_real_robot_batch_2
33 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.miscellaneous_1k_trajectories
34 dataset_type: physical_embodiment
35 embodiment_tag: real_r1_pro_sharpa_relative_eef
36 mix_ratio: 0.05
37 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
38 dataset_paths:
39 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch1-2025-12-10-merged
40 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch3_2026-01-04-merged_backup
41 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch4_2026-01-05-merged_backup
42 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch5_2026-01-05-merged_backup
43 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch6_2026-01-05-merged_backup
44 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch10_2026-01-10-merged_backup
45 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch11_2026-01-10-merged_backup
46 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch12_2026-01-10-merged_backup
47 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch8_2026-01-10-merged_backup
48 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch9_2026-01-10-merged_backup
49 dataset_type: physical_embodiment
50 embodiment_tag: real_r1_pro_sharpa_relative_eef_mecka
51 mix_ratio: 0.25
52 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
53 dataset_paths:
54 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/maxinsights_lerobot_updated/1530hrs/real_r1_pro_sharpa.maxinsights_1530hrs_updated_train_set_merged
55 dataset_type: physical_embodiment
56 embodiment_tag: real_r1_pro_sharpa_relative_eef_maxinsights
57 mix_ratio: 0.2
58 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
59 dataset_paths:
60 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_human_batch1
61 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_human_batch2
62 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.shirt_rolling_task24_2000_human_video_filter_n6_keep1619_demo_stats
63 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.shirt_rolling_task15_2000_human_video_filter_n6_keep572_demo_stats
64 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.sort_cards_human_filter_n6_keep523_demo_stats_overwrite_left_side_stats
65 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.tong_task38_2000_human_video_overwrite_left_side_stats
66 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.syringe_task30i_2000_human_video_filtered
67 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.unscrew_bottle_task43_2000_human_video_fixed-duration
68 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.unscrew_Jim_bottle_task47_600_human_video
69 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.fold_shirt_task30b_500_human_video_halfdone
70 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.fold_towel_task30c_500_human_video_halfdone
71 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.sort_cards_task32e_1000_human_video
72 dataset_type: physical_embodiment
73 embodiment_tag: real_r1_pro_sharpa_relative_eef_human
74 mix_ratio: 0.05
75 download_cache: false
76 episode_sampling_rate: 0.1
77 image_crop_size:
78 - 244
79 - 244
80 image_target_size:
81 - 224
82 - 224
83 max_prompt_trajectories: 5
84 mock_dataset_mode: false
85 modality_configs:
86 oxe_droid_relative_eef_relative_joint:
87 action: !!python/object:groot.vla.omni.data.types.ModalityConfig
88 action_format:
89 - &id004 !!python/object/apply:groot.vla.omni.data.types.ActionFormat
90 - xyz+rot6d
91 - &id001 !!python/object/apply:groot.vla.omni.data.types.ActionFormat
92 - default
93 - *id001
94 action_representation:
95 - &id002 !!python/object/apply:groot.vla.omni.data.types.ActionRepresentation
96 - relative
97 - &id005 !!python/object/apply:groot.vla.omni.data.types.ActionRepresentation
98 - absolute
99 - *id002
100 action_type:
101 - &id006 !!python/object/apply:groot.vla.omni.data.types.ActionType
102 - eef
103 - &id003 !!python/object/apply:groot.vla.omni.data.types.ActionType
104 - non_eef
105 - *id003
106 delta_indices:
107 - 0
108 - 1
109 - 2
110 - 3
111 - 4
112 - 5
113 - 6
114 - 7
115 - 8
116 - 9
117 - 10
118 - 11
119 - 12
120 - 13
121 - 14
122 - 15
123 - 16
124 - 17
125 - 18
126 - 19
127 - 20
128 - 21
129 - 22
130 - 23
131 - 24
132 - 25
133 - 26
134 - 27
135 - 28
136 - 29
137 - 30
138 - 31
139 - 32
140 - 33
141 - 34
142 - 35
143 - 36
144 - 37
145 - 38
146 - 39
147 exclude_state: false
148 extra_keys:
149 - joint_position
150 hand_keys:
151 - gripper_position
152 loss_weights: null
153 modality_keys:
154 - eef_9d
155 - gripper_position
156 - joint_position
157 normalization_mode: null
158 normalize_rotation: true
159 wrist_keys:
160 - eef_9d
161 language: !!python/object:groot.vla.omni.data.types.ModalityConfig
162 action_format: null
163 action_representation: null
164 action_type: null
165 delta_indices:
166 - 0
167 exclude_state: false
168 extra_keys: null
169 hand_keys: null
170 loss_weights: null
171 modality_keys:
172 - annotation.language.language_instruction
173 - annotation.language.language_instruction_2
174 - annotation.language.language_instruction_3
175 normalization_mode: null
176 normalize_rotation: true
177 wrist_keys: null
178 state: !!python/object:groot.vla.omni.data.types.ModalityConfig
179 action_format: null
180 action_representation: null
181 action_type: null
182 delta_indices:
183 - 0
184 exclude_state: false
185 extra_keys: null
186 hand_keys: null
187 loss_weights: null
188 modality_keys:
189 - eef_9d
190 - gripper_position
191 - joint_position
192 normalization_mode: null
193 normalize_rotation: true
194 wrist_keys: null
195 video: !!python/object:groot.vla.omni.data.types.ModalityConfig
196 action_format: null
197 action_representation: null
198 action_type: null
199 delta_indices:
200 - -15
201 - 0
202 exclude_state: false
203 extra_keys: null
204 hand_keys: null
205 loss_weights: null
206 modality_keys:
207 - exterior_image_1_left
208 - wrist_image_left
209 normalization_mode: null
210 normalize_rotation: true
211 wrist_keys: null
212 real_g1_relative_eef_relative_joints:
213 action: !!python/object:groot.vla.omni.data.types.ModalityConfig
214 action_format:
215 - *id004
216 - *id004
217 - *id001
218 - *id001
219 - *id001
220 - *id001
221 - *id001
222 - *id001
223 - *id001
224 action_representation:
225 - *id002
226 - *id002
227 - *id005
228 - *id005
229 - *id002
230 - *id002
231 - *id005
232 - *id005
233 - *id005
234 action_type:
235 - *id006
236 - *id006
237 - *id003
238 - *id003
239 - *id003
240 - *id003
241 - *id003
242 - *id003
243 - *id003
244 delta_indices:
245 - 0
246 - 1
247 - 2
248 - 3
249 - 4
250 - 5
251 - 6
252 - 7
253 - 8
254 - 9
255 - 10
256 - 11
257 - 12
258 - 13
259 - 14
260 - 15
261 - 16
262 - 17
263 - 18
264 - 19
265 - 20
266 - 21
267 - 22
268 - 23
269 - 24
270 - 25
271 - 26
272 - 27
273 - 28
274 - 29
275 - 30
276 - 31
277 - 32
278 - 33
279 - 34
280 - 35
281 - 36
282 - 37
283 - 38
284 - 39
285 exclude_state: false
286 extra_keys:
287 - left_arm
288 - right_arm
289 - waist
290 - base_height_command
291 - navigate_command
292 hand_keys:
293 - left_hand
294 - right_hand
295 loss_weights: null
296 modality_keys:
297 - left_wrist_eef_9d
298 - right_wrist_eef_9d
299 - left_hand
300 - right_hand
301 - left_arm
302 - right_arm
303 - waist
304 - base_height_command
305 - navigate_command
306 normalization_mode: null
307 normalize_rotation: true
308 wrist_keys:
309 - left_wrist_eef_9d
310 - right_wrist_eef_9d
311 language: !!python/object:groot.vla.omni.data.types.ModalityConfig
312 action_format: null
313 action_representation: null
314 action_type: null
315 delta_indices:
316 - 0
317 exclude_state: false
318 extra_keys: null
319 hand_keys: null
320 loss_weights: null
321 modality_keys:
322 - annotation.human.task_description
323 normalization_mode: null
324 normalize_rotation: true
325 wrist_keys: null
326 state: !!python/object:groot.vla.omni.data.types.ModalityConfig
327 action_format: null
328 action_representation: null
329 action_type: null
330 delta_indices:
331 - 0
332 exclude_state: false
333 extra_keys: null
334 hand_keys: null
335 loss_weights: null
336 modality_keys:
337 - left_wrist_eef_9d
338 - right_wrist_eef_9d
339 - left_hand
340 - right_hand
341 - left_arm
342 - right_arm
343 - waist
344 normalization_mode: null
345 normalize_rotation: true
346 wrist_keys: null
347 video: !!python/object:groot.vla.omni.data.types.ModalityConfig
348 action_format: null
349 action_representation: null
350 action_type: null
351 delta_indices:
352 - -20
353 - 0
354 exclude_state: false
355 extra_keys: null
356 hand_keys: null
357 loss_weights: null
358 modality_keys:
359 - ego_view
360 normalization_mode: null
361 normalize_rotation: true
362 wrist_keys: null
363 real_r1_pro_sharpa_relative_eef:
364 action: !!python/object:groot.vla.omni.data.types.ModalityConfig
365 action_format:
366 - *id004
367 - *id004
368 - *id001
369 - *id001
370 action_representation:
371 - *id002
372 - *id002
373 - *id005
374 - *id005
375 action_type:
376 - *id006
377 - *id006
378 - *id003
379 - *id003
380 delta_indices:
381 - 0
382 - 1
383 - 2
384 - 3
385 - 4
386 - 5
387 - 6
388 - 7
389 - 8
390 - 9
391 - 10
392 - 11
393 - 12
394 - 13
395 - 14
396 - 15
397 - 16
398 - 17
399 - 18
400 - 19
401 - 20
402 - 21
403 - 22
404 - 23
405 - 24
406 - 25
407 - 26
408 - 27
409 - 28
410 - 29
411 - 30
412 - 31
413 - 32
414 - 33
415 - 34
416 - 35
417 - 36
418 - 37
419 - 38
420 - 39
421 exclude_state: false
422 extra_keys: []
423 hand_keys:
424 - left_hand_joints
425 - right_hand_joints
426 loss_weights: null
427 modality_keys:
428 - left_wrist_eef
429 - right_wrist_eef
430 - left_hand_joints
431 - right_hand_joints
432 normalization_mode: null
433 normalize_rotation: true
434 wrist_keys:
435 - left_wrist_eef
436 - right_wrist_eef
437 language: !!python/object:groot.vla.omni.data.types.ModalityConfig
438 action_format: null
439 action_representation: null
440 action_type: null
441 delta_indices:
442 - 0
443 exclude_state: false
444 extra_keys: null
445 hand_keys: null
446 loss_weights: null
447 modality_keys:
448 - annotation.human.coarse_action
449 normalization_mode: null
450 normalize_rotation: true
451 wrist_keys: null
452 state: !!python/object:groot.vla.omni.data.types.ModalityConfig
453 action_format: null
454 action_representation: null
455 action_type: null
456 delta_indices:
457 - 0
458 exclude_state: false
459 extra_keys: null
460 hand_keys: null
461 loss_weights: null
462 modality_keys:
463 - left_wrist_eef
464 - right_wrist_eef
465 - left_hand_joints
466 - right_hand_joints
467 normalization_mode: null
468 normalize_rotation: true
469 wrist_keys: null
470 video: !!python/object:groot.vla.omni.data.types.ModalityConfig
471 action_format: null
472 action_representation: null
473 action_type: null
474 delta_indices:
475 - -20
476 - 0
477 exclude_state: false
478 extra_keys: null
479 hand_keys: null
480 loss_weights: null
481 modality_keys:
482 - ego_view_res320x240_freq20
483 - left_wrist_view_res320x240_freq20
484 - right_wrist_view_res320x240_freq20
485 normalization_mode: null
486 normalize_rotation: true
487 wrist_keys: null
488 real_r1_pro_sharpa_relative_eef_human:
489 action: !!python/object:groot.vla.omni.data.types.ModalityConfig
490 action_format:
491 - *id004
492 - *id004
493 - *id001
494 - *id001
495 action_representation:
496 - *id002
497 - *id002
498 - *id005
499 - *id005
500 action_type:
501 - *id006
502 - *id006
503 - *id003
504 - *id003
505 delta_indices:
506 - 0
507 - 1
508 - 2
509 - 3
510 - 4
511 - 5
512 - 6
513 - 7
514 - 8
515 - 9
516 - 10
517 - 11
518 - 12
519 - 13
520 - 14
521 - 15
522 - 16
523 - 17
524 - 18
525 - 19
526 - 20
527 - 21
528 - 22
529 - 23
530 - 24
531 - 25
532 - 26
533 - 27
534 - 28
535 - 29
536 - 30
537 - 31
538 - 32
539 - 33
540 - 34
541 - 35
542 - 36
543 - 37
544 - 38
545 - 39
546 exclude_state: false
547 extra_keys: []
548 hand_keys:
549 - left_hand_joints
550 - right_hand_joints
551 loss_weights: null
552 modality_keys:
553 - left_wrist_eef
554 - right_wrist_eef
555 - left_hand_joints
556 - right_hand_joints
557 normalization_mode: null
558 normalize_rotation: true
559 wrist_keys:
560 - left_wrist_eef
561 - right_wrist_eef
562 language: !!python/object:groot.vla.omni.data.types.ModalityConfig
563 action_format: null
564 action_representation: null
565 action_type: null
566 delta_indices:
567 - 0
568 exclude_state: false
569 extra_keys: null
570 hand_keys: null
571 loss_weights: null
572 modality_keys:
573 - annotation.human.coarse_action
574 normalization_mode: null
575 normalize_rotation: true
576 wrist_keys: null
577 state: !!python/object:groot.vla.omni.data.types.ModalityConfig
578 action_format: null
579 action_representation: null
580 action_type: null
581 delta_indices:
582 - 0
583 exclude_state: true
584 extra_keys: null
585 hand_keys: null
586 loss_weights: null
587 modality_keys:
588 - left_wrist_eef
589 - right_wrist_eef
590 - left_hand_joints
591 - right_hand_joints
592 normalization_mode: null
593 normalize_rotation: true
594 wrist_keys: null
595 video: !!python/object:groot.vla.omni.data.types.ModalityConfig
596 action_format: null
597 action_representation: null
598 action_type: null
599 delta_indices:
600 - -20
601 - 0
602 exclude_state: false
603 extra_keys: null
604 hand_keys: null
605 loss_weights: null
606 modality_keys:
607 - ego_view_res320x240_freq20
608 - left_wrist_view_res320x240_freq20
609 - right_wrist_view_res320x240_freq20
610 normalization_mode: null
611 normalize_rotation: true
612 wrist_keys: null
613 real_r1_pro_sharpa_relative_eef_maxinsights:
614 action: !!python/object:groot.vla.omni.data.types.ModalityConfig
615 action_format:
616 - *id004
617 - *id004
618 - *id001
619 - *id001
620 action_representation:
621 - *id002
622 - *id002
623 - *id005
624 - *id005
625 action_type:
626 - *id006
627 - *id006
628 - *id003
629 - *id003
630 delta_indices:
631 - 0
632 - 1
633 - 2
634 - 3
635 - 4
636 - 5
637 - 6
638 - 7
639 - 8
640 - 9
641 - 10
642 - 11
643 - 12
644 - 13
645 - 14
646 - 15
647 - 16
648 - 17
649 - 18
650 - 19
651 - 20
652 - 21
653 - 22
654 - 23
655 - 24
656 - 25
657 - 26
658 - 27
659 - 28
660 - 29
661 - 30
662 - 31
663 - 32
664 - 33
665 - 34
666 - 35
667 - 36
668 - 37
669 - 38
670 - 39
671 exclude_state: false
672 extra_keys: []
673 hand_keys:
674 - left_hand_joints
675 - right_hand_joints
676 loss_weights: null
677 modality_keys:
678 - left_wrist_eef
679 - right_wrist_eef
680 - left_hand_joints
681 - right_hand_joints
682 normalization_mode: null
683 normalize_rotation: true
684 wrist_keys:
685 - left_wrist_eef
686 - right_wrist_eef
687 language: !!python/object:groot.vla.omni.data.types.ModalityConfig
688 action_format: null
689 action_representation: null
690 action_type: null
691 delta_indices:
692 - 0
693 exclude_state: false
694 extra_keys: null
695 hand_keys: null
696 loss_weights: null
697 modality_keys:
698 - annotation.human.coarse_action
699 normalization_mode: null
700 normalize_rotation: true
701 wrist_keys: null
702 state: !!python/object:groot.vla.omni.data.types.ModalityConfig
703 action_format: null
704 action_representation: null
705 action_type: null
706 delta_indices:
707 - 0
708 exclude_state: true
709 extra_keys: null
710 hand_keys: null
711 loss_weights: null
712 modality_keys:
713 - left_wrist_eef
714 - right_wrist_eef
715 - left_hand_joints
716 - right_hand_joints
717 normalization_mode: null
718 normalize_rotation: true
719 wrist_keys: null
720 video: !!python/object:groot.vla.omni.data.types.ModalityConfig
721 action_format: null
722 action_representation: null
723 action_type: null
724 delta_indices:
725 - -30
726 - 0
727 exclude_state: false
728 extra_keys: null
729 hand_keys: null
730 loss_weights: null
731 modality_keys:
732 - ego_view_cropratio_res320x240_freq30
733 normalization_mode: null
734 normalize_rotation: true
735 wrist_keys: null
736 real_r1_pro_sharpa_relative_eef_mecka:
737 action: !!python/object:groot.vla.omni.data.types.ModalityConfig
738 action_format:
739 - *id004
740 - *id004
741 - *id001
742 - *id001
743 action_representation:
744 - *id002
745 - *id002
746 - *id005
747 - *id005
748 action_type:
749 - *id006
750 - *id006
751 - *id003
752 - *id003
753 delta_indices:
754 - 0
755 - 1
756 - 2
757 - 3
758 - 4
759 - 5
760 - 6
761 - 7
762 - 8
763 - 9
764 - 10
765 - 11
766 - 12
767 - 13
768 - 14
769 - 15
770 - 16
771 - 17
772 - 18
773 - 19
774 - 20
775 - 21
776 - 22
777 - 23
778 - 24
779 - 25
780 - 26
781 - 27
782 - 28
783 - 29
784 - 30
785 - 31
786 - 32
787 - 33
788 - 34
789 - 35
790 - 36
791 - 37
792 - 38
793 - 39
794 exclude_state: false
795 extra_keys: []
796 hand_keys:
797 - left_hand_joints
798 - right_hand_joints
799 loss_weights: null
800 modality_keys:
801 - left_wrist_eef
802 - right_wrist_eef
803 - left_hand_joints
804 - right_hand_joints
805 normalization_mode: null
806 normalize_rotation: true
807 wrist_keys:
808 - left_wrist_eef
809 - right_wrist_eef
810 language: !!python/object:groot.vla.omni.data.types.ModalityConfig
811 action_format: null
812 action_representation: null
813 action_type: null
814 delta_indices:
815 - 0
816 exclude_state: false
817 extra_keys: null
818 hand_keys: null
819 loss_weights: null
820 modality_keys:
821 - annotation.human.coarse_action
822 normalization_mode: null
823 normalize_rotation: true
824 wrist_keys: null
825 state: !!python/object:groot.vla.omni.data.types.ModalityConfig
826 action_format: null
827 action_representation: null
828 action_type: null
829 delta_indices:
830 - 0
831 exclude_state: true
832 extra_keys: null
833 hand_keys: null
834 loss_weights: null
835 modality_keys:
836 - left_wrist_eef
837 - right_wrist_eef
838 - left_hand_joints
839 - right_hand_joints
840 normalization_mode: null
841 normalize_rotation: true
842 wrist_keys: null
843 video: !!python/object:groot.vla.omni.data.types.ModalityConfig
844 action_format: null
845 action_representation: null
846 action_type: null
847 delta_indices:
848 - -30
849 - 0
850 exclude_state: false
851 extra_keys: null
852 hand_keys: null
853 loss_weights: null
854 modality_keys:
855 - ego_view_cropratio_res320x240_freq30
856 normalization_mode: null
857 normalize_rotation: true
858 wrist_keys: null
859 xdof_relative_eef_relative_joint:
860 action: !!python/object:groot.vla.omni.data.types.ModalityConfig
861 action_format:
862 - *id004
863 - *id004
864 - *id001
865 - *id001
866 - *id001
867 - *id001
868 action_representation:
869 - *id002
870 - *id002
871 - *id005
872 - *id005
873 - *id002
874 - *id002
875 action_type:
876 - *id006
877 - *id006
878 - *id003
879 - *id003
880 - *id003
881 - *id003
882 delta_indices:
883 - 0
884 - 1
885 - 2
886 - 3
887 - 4
888 - 5
889 - 6
890 - 7
891 - 8
892 - 9
893 - 10
894 - 11
895 - 12
896 - 13
897 - 14
898 - 15
899 - 16
900 - 17
901 - 18
902 - 19
903 - 20
904 - 21
905 - 22
906 - 23
907 - 24
908 - 25
909 - 26
910 - 27
911 - 28
912 - 29
913 - 30
914 - 31
915 - 32
916 - 33
917 - 34
918 - 35
919 - 36
920 - 37
921 - 38
922 - 39
923 exclude_state: false
924 extra_keys:
925 - left_joint_pos
926 - right_joint_pos
927 hand_keys:
928 - left_gripper_pos
929 - right_gripper_pos
930 loss_weights: null
931 modality_keys:
932 - left_wrist_eef
933 - right_wrist_eef
934 - left_gripper_pos
935 - right_gripper_pos
936 - left_joint_pos
937 - right_joint_pos
938 normalization_mode: null
939 normalize_rotation: true
940 wrist_keys:
941 - left_wrist_eef
942 - right_wrist_eef
943 language: !!python/object:groot.vla.omni.data.types.ModalityConfig
944 action_format: null
945 action_representation: null
946 action_type: null
947 delta_indices:
948 - 0
949 exclude_state: false
950 extra_keys: null
951 hand_keys: null
952 loss_weights: null
953 modality_keys:
954 - annotation.task
955 normalization_mode: null
956 normalize_rotation: true
957 wrist_keys: null
958 state: !!python/object:groot.vla.omni.data.types.ModalityConfig
959 action_format: null
960 action_representation: null
961 action_type: null
962 delta_indices:
963 - 0
964 exclude_state: false
965 extra_keys: null
966 hand_keys: null
967 loss_weights: null
968 modality_keys:
969 - left_wrist_eef
970 - right_wrist_eef
971 - left_gripper_pos
972 - right_gripper_pos
973 - left_joint_pos
974 - right_joint_pos
975 normalization_mode: null
976 normalize_rotation: true
977 wrist_keys: null
978 video: !!python/object:groot.vla.omni.data.types.ModalityConfig
979 action_format: null
980 action_representation: null
981 action_type: null
982 delta_indices:
983 - -30
984 - 0
985 exclude_state: false
986 extra_keys: null
987 hand_keys: null
988 loss_weights: null
989 modality_keys:
990 - top_camera-images-rgb_320_240
991 - left_camera-images-rgb_320_240
992 - right_camera-images-rgb_320_240
993 normalization_mode: null
994 normalize_rotation: true
995 wrist_keys: null
996 xdof_relative_eef_relative_joint_subtask:
997 action: !!python/object:groot.vla.omni.data.types.ModalityConfig
998 action_format:
999 - *id004
1000 - *id004
1001 - *id001
1002 - *id001
1003 - *id001
1004 - *id001
1005 action_representation:
1006 - *id002
1007 - *id002
1008 - *id005
1009 - *id005
1010 - *id002
1011 - *id002
1012 action_type:
1013 - *id006
1014 - *id006
1015 - *id003
1016 - *id003
1017 - *id003
1018 - *id003
1019 delta_indices:
1020 - 0
1021 - 1
1022 - 2
1023 - 3
1024 - 4
1025 - 5
1026 - 6
1027 - 7
1028 - 8
1029 - 9
1030 - 10
1031 - 11
1032 - 12
1033 - 13
1034 - 14
1035 - 15
1036 - 16
1037 - 17
1038 - 18
1039 - 19
1040 - 20
1041 - 21
1042 - 22
1043 - 23
1044 - 24
1045 - 25
1046 - 26
1047 - 27
1048 - 28
1049 - 29
1050 - 30
1051 - 31
1052 - 32
1053 - 33
1054 - 34
1055 - 35
1056 - 36
1057 - 37
1058 - 38
1059 - 39
1060 exclude_state: false
1061 extra_keys:
1062 - left_joint_pos
1063 - right_joint_pos
1064 hand_keys:
1065 - left_gripper_pos
1066 - right_gripper_pos
1067 loss_weights: null
1068 modality_keys:
1069 - left_wrist_eef
1070 - right_wrist_eef
1071 - left_gripper_pos
1072 - right_gripper_pos
1073 - left_joint_pos
1074 - right_joint_pos
1075 normalization_mode: null
1076 normalize_rotation: true
1077 wrist_keys:
1078 - left_wrist_eef
1079 - right_wrist_eef
1080 language: !!python/object:groot.vla.omni.data.types.ModalityConfig
1081 action_format: null
1082 action_representation: null
1083 action_type: null
1084 delta_indices:
1085 - 0
1086 exclude_state: false
1087 extra_keys: null
1088 hand_keys: null
1089 loss_weights: null
1090 modality_keys:
1091 - annotation.sub_task
1092 normalization_mode: null
1093 normalize_rotation: true
1094 wrist_keys: null
1095 state: !!python/object:groot.vla.omni.data.types.ModalityConfig
1096 action_format: null
1097 action_representation: null
1098 action_type: null
1099 delta_indices:
1100 - 0
1101 exclude_state: false
1102 extra_keys: null
1103 hand_keys: null
1104 loss_weights: null
1105 modality_keys:
1106 - left_wrist_eef
1107 - right_wrist_eef
1108 - left_gripper_pos
1109 - right_gripper_pos
1110 - left_joint_pos
1111 - right_joint_pos
1112 normalization_mode: null
1113 normalize_rotation: true
1114 wrist_keys: null
1115 video: !!python/object:groot.vla.omni.data.types.ModalityConfig
1116 action_format: null
1117 action_representation: null
1118 action_type: null
1119 delta_indices:
1120 - -30
1121 - 0
1122 exclude_state: false
1123 extra_keys: null
1124 hand_keys: null
1125 loss_weights: null
1126 modality_keys:
1127 - top_camera-images-rgb_320_240
1128 - left_camera-images-rgb_320_240
1129 - right_camera-images-rgb_320_240
1130 normalization_mode: null
1131 normalize_rotation: true
1132 wrist_keys: null
1133 mode: single_turn
1134 num_prompt_trajectories: 2
1135 num_shards_per_epoch: 100000
1136 override_pretraining_statistics: false
1137 random_chop: 0.0
1138 seed: 24
1139 shard_size: 1024
1140 shuffle: true
1141 subsample_ratio: 1.0
1142 variable_num_demos: false
1143 video_backend: torchcodec
1144 load_config_path: groot/vla/omni/configs/experiments/r1_pro/sharpa/n17_pretrain/n17_pretrain_human_robot_cross_embodiment_fix_yam_absolute_hand_2step.yaml
1145 model: !!python/object:groot.vla.omni.configs.model.groot_n1d5_qwen.GrootN1d5QwenConfig
1146 _attn_implementation_internal: null
1147 _commit_hash: null
1148 _name_or_path: ''
1149 _output_attentions: false
1150 action_horizon: 40
1151 action_space_prompt: false
1152 add_cross_attention: false
1153 add_pos_embed: true
1154 apply_sincos_state_encoding: false
1155 architectures: null
1156 attn_dropout: 0.2
1157 backbone_embedding_dim: 2048
1158 bad_words_ids: null
1159 begin_suppress_tokens: null
1160 bos_token_id: null
1161 chunk_size_feed_forward: 0
1162 color_jitter_params:
1163 brightness: 0.3
1164 contrast: 0.4
1165 hue: 0.08
1166 saturation: 0.5
1167 crop_fraction: 0.95
1168 cross_attention_hidden_size: null
1169 decoder_start_token_id: null
1170 diffusion_model_cfg:
1171 attention_head_dim: 48
1172 cross_attention_dim: 2048
1173 dropout: 0.2
1174 final_dropout: true
1175 interleave_self_attention: true
1176 norm_type: ada_norm
1177 num_attention_heads: 32
1178 num_layers: 32
1179 output_dim: 1024
1180 positional_embeddings: null
1181 dit_latent_dim: 1536
1182 diversity_penalty: 0.0
1183 do_human_interpolation: false
1184 do_sample: false
1185 dtype: null
1186 early_stopping: false
1187 encoder_no_repeat_ngram_size: 0
1188 eos_token_id: null
1189 exclude_state: false
1190 exponential_decay_length_penalty: null
1191 finetuning_task: null
1192 forced_bos_token_id: null
1193 forced_eos_token_id: null
1194 formalize_language: true
1195 hidden_size: 1024
1196 human_embodiment_tags: null
1197 id2label:
1198 0: LABEL_0
1199 1: LABEL_1
1200 image_crop_size: !!python/tuple
1201 - 230
1202 - 230
1203 image_target_size: !!python/tuple
1204 - 256
1205 - 256
1206 interpolation_steps: 20
1207 is_decoder: false
1208 is_encoder_decoder: false
1209 label2id:
1210 LABEL_0: 0
1211 LABEL_1: 1
1212 language_dropout_prob: 0.0
1213 length_penalty: 1.0
1214 letter_box_transform: false
1215 load_bf16: true
1216 max_action_dim: 132
1217 max_length: 20
1218 max_num_embodiments: 32
1219 max_seq_len: 1024
1220 max_state_dim: 132
1221 min_length: 0
1222 model_dtype: bfloat16
1223 model_type: GrootN1d5Qwen
1224 no_repeat_ngram_size: 0
1225 noise_beta_alpha: 1.5
1226 noise_beta_beta: 1.0
1227 noise_s: 0.999
1228 num_beam_groups: 1
1229 num_beams: 1
1230 num_inference_timesteps: 4
1231 num_return_sequences: 1
1232 num_timestep_buckets: 1000
1233 output_hidden_states: false
1234 output_scores: false
1235 pad_token_id: null
1236 prefix: null
1237 problem_type: null
1238 pruned_heads: {}
1239 random_history_crop: true
1240 random_rotation_angle: 0
1241 remove_invalid_values: false
1242 repetition_penalty: 1.0
1243 reproject_vision: false
1244 return_dict: true
1245 return_dict_in_generate: false
1246 rtc_ramp_rate: 6.0
1247 select_layer: 16
1248 sep_token_id: null
1249 shortest_image_edge: 256
1250 state_dropout_prob: 0.2
1251 state_gaussian_noise_std: 0.0
1252 suppress_tokens: null
1253 task_specific_params: null
1254 temperature: 1.0
1255 tf_legacy_loss: false
1256 tie_encoder_decoder: false
1257 tie_word_embeddings: true
1258 tokenizer_class: null
1259 top_k: 50
1260 top_p: 1.0
1261 torchscript: false
1262 transformers_version: null
1263 tune_diffusion_model: true
1264 tune_linear: true
1265 tune_llm: false
1266 tune_projector: true
1267 tune_top_llm_layers: 0
1268 tune_visual: false
1269 tune_vlln: true
1270 typical_p: 1.0
1271 use_albumentations: true
1272 use_alternate_vl_dit: true
1273 use_bfloat16: false
1274 use_flash_attention: true
1275 use_future_tokens: false
1276 use_mean_std: false
1277 use_percentiles: true
1278 use_vl_self_attention: true
1279 use_vlln: true
1280 vl_self_attention_cfg:
1281 attention_head_dim: 64
1282 dropout: 0.2
1283 final_dropout: true
1284 num_attention_heads: 32
1285 num_layers: 4
1286 positional_embeddings: null
1287 vlm_backend: qwen3
1288 vlm_model_path: nvidia/Cosmos-Reason2-2B
1289 training: !!python/object:groot.vla.omni.configs.training.training_config.TrainingConfig
1290 assert_loss_less_than: null
1291 batch_size: 32
1292 bf16: true
1293 dataloader_num_workers: 4
1294 deepspeed_stage: 2
1295 enable_profiling: false
1296 eval_batch_size: 2
1297 eval_bf16: true
1298 eval_set_split_ratio: 0.1
1299 eval_steps: 500
1300 eval_strategy: 'no'
1301 experiment_name: null
1302 fp16: false
1303 global_batch_size: 1024
1304 gradient_accumulation_steps: 1
1305 gradient_checkpointing: false
1306 learning_rate: 5.0e-05
1307 logging_steps: 10
1308 lr_scheduler_type: cosine
1309 max_concurrent_uploads: 2
1310 max_grad_norm: 1.0
1311 max_retries: 3
1312 max_steps: 200000
1313 muon_lr: 0.005
1314 num_gpus: 256
1315 optim: adamw_torch_fused
1316 output_dir: nvidia/Cosmos-Reason2-2B
1317 remove_unused_columns: false
1318 save_best_eval_metric_greater_is_better: true
1319 save_best_eval_metric_name: ''
1320 save_steps: 1000
1321 save_total_limit: 5
1322 save_vl_model: false
1323 skip_spike: true
1324 skip_spike_ema_alpha: 0.99
1325 skip_spike_max_consecutive: 10
1326 skip_spike_threshold: 5.0
1327 start_from_checkpoint: null
1328 tf32: true
1329 upload_checkpoints: true
1330 upload_every: 1000
1331 upload_last_n_checkpoints: 5
1332 use_ddp: false
1333 use_legacy_wd_application: false
1334 use_muon: false
1335 use_wandb: true
1336 wandb_project: human_pretraining_n15_galaxea_sharpa
1337 warmup_ratio: 0.05
1338 warmup_steps: 0
1339 weight_decay: 1.0e-05
1340 wsd_decay_type: cosine
1341 wsd_stable_ratio: 0.8
1342