Question about MIMO / Action Chunk configuration in bc_transformer

Thank you for your great project!
I am currently experimenting with using the bc_transformer's MIMO module to generate multiple actions at once (similar to action chunking). To achieve this, I modified the following configuration options:
seq_length: 10 - To generate 10 actions in one forward pass.
    frame_stack: 10 - To receive 10 observations at a time.
    supervise_all_steps: true - To supervise all actions in the sequence, not just the current step.
    pred_future_acs: true - I found that when this is false, the model only predicts actions before the current step.
While the model trains, I encountered an issue with the generated action trajectory:
    Within a chunk: The changes between consecutive action frames are very small (almost static).
    Between chunks: When inferring the next chunk, there is a large jump/discontinuity relative to the last frame of the previous action chunk.
This results in a very "choppy" motion, as demonstrated in the attached video.

https://github.com/user-attachments/assets/857822ed-b45e-44c9-8907-1b473c7033e4

I would like to verify if my configuration is correct for this use case. Below is my full configuration for reference:

> {
    "algo_name": "bc",
    "experiment": {
        "name": "test",
        "validate": false,
        "logging": {
            "terminal_output_to_txt": true,
            "log_tb": true,
            "log_wandb": false,
            "wandb_proj_name": "debug"
        },
        "save": {
            "enabled": true,
            "every_n_seconds": null,
            "every_n_epochs": 50,
            "epochs": [],
            "on_best_validation": false,
            "on_best_rollout_return": false,
            "on_best_rollout_success_rate": true
        },
        "epoch_every_n_steps": 200,
        "validation_epoch_every_n_steps": 10,
        "env": null,
        "additional_envs": null,
        "render": false,
        "render_video": true,
        "keep_all_videos": false,
        "video_skip": 5,
        "rollout": {
            "enabled": true,
            "n": 50,
            "horizon": 400,
            "rate": 50,
            "warmstart": 0,
            "terminate_on_success": true
        }
    },
    "train": {
        "data": null,
        "output_dir": "../bc_transformer_trained_models",
        "num_data_workers": 0,
        "hdf5_cache_mode": "all",
        "hdf5_use_swmr": true,
        "hdf5_load_next_obs": false,
        "hdf5_normalize_obs": false,
        "hdf5_filter_key": null,
        "seq_length": 10,
        "pad_seq_length": true,
        "frame_stack": 10,
        "pad_frame_stack": true,
        "dataset_keys": [
            "actions"
        ],
        "goal_mode": null,
        "cuda": true,
        "batch_size": 100,
        "num_epochs": 2000,
        "max_grad_norm": 100.0,
        "seed": 1
    },
    "algo": {
        "optim_params": {
            "policy": {
                "optimizer_type": "adamw",
                "learning_rate": {
                    "initial": 0.0001,
                    "decay_factor": 1.0,
                    "epoch_schedule": [100],
                    "scheduler_type": "linear"
                },
                "regularization": {
                    "L2": 0.01
                }
            }
        },
        "loss": {
            "l2_weight": 1.0,
            "l1_weight": 0.0,
            "cos_weight": 0.0
        },
        "actor_layer_dims": [],
        "gaussian": {
            "enabled": false
        },
        "gmm": {
            "enabled": true,
            "num_modes": 5,
            "min_std": 0.0001,
            "std_activation": "softplus",
            "low_noise_eval": true
        },
        "vae": {
            "enabled": false
        },
        "rnn": {
            "enabled": false
        },
        "transformer": {
            "enabled": true,
            "supervise_all_steps": true,
            "pred_future_acs": true,
            "num_layers": 6,
            "embed_dim": 512,
            "num_heads": 8
        }
    },
    "observation": {
        "modalities": {
            "obs": {
                "low_dim": [
                    "eef_pos",
                    "eef_ori",
                    "gripper_states"
                ],
                "rgb": [
                    "agentview_rgb",
                    "wrist_rgb"
                ],
                "depth": [],
                "scan": []
            },
            "goal": {
                "low_dim": [],
                "rgb": [],
                "depth": [],
                "scan": []
            }
        },
        "encoder": {
            "low_dim": {
                "core_class": null,
                "core_kwargs": {},
                "obs_randomizer_class": null,
                "obs_randomizer_kwargs": {}
            },
            "rgb": {
                "core_class": "VisualCore",
                "core_kwargs": {
                    "feature_dimension": 64,
                    "backbone_class": "ResNet18Conv",
                    "backbone_kwargs": {
                        "pretrained": false,
                        "input_coord_conv": false
                    },
                    "pool_class": "SpatialSoftmax",
                    "pool_kwargs": {
                        "num_kp": 32,
                        "learnable_temperature": false,
                        "temperature": 1.0,
                        "noise_std": 0.0
                    }
                },
                "obs_randomizer_class": "CropRandomizer",
                "obs_randomizer_kwargs": {
                    "crop_height": 120,
                    "crop_width": 120,
                    "num_crops": 1,
                    "pos_enc": false
                }
            },
            "depth": {
                "core_class": "VisualCore",
                "core_kwargs": {},
                "obs_randomizer_class": null,
                "obs_randomizer_kwargs": {}
            },
            "scan": {
                "core_class": "ScanCore",
                "core_kwargs": {},
                "obs_randomizer_class": null,
                "obs_randomizer_kwargs": {}
            }
        }
    }
}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Question about MIMO / Action Chunk configuration in bc_transformer #292

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Question about MIMO / Action Chunk configuration in bc_transformer #292

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions