-
Notifications
You must be signed in to change notification settings - Fork 367
Description
Thank you for your great project!
I am currently experimenting with using the bc_transformer's MIMO module to generate multiple actions at once (similar to action chunking). To achieve this, I modified the following configuration options:
seq_length: 10 - To generate 10 actions in one forward pass.
frame_stack: 10 - To receive 10 observations at a time.
supervise_all_steps: true - To supervise all actions in the sequence, not just the current step.
pred_future_acs: true - I found that when this is false, the model only predicts actions before the current step.
While the model trains, I encountered an issue with the generated action trajectory:
Within a chunk: The changes between consecutive action frames are very small (almost static).
Between chunks: When inferring the next chunk, there is a large jump/discontinuity relative to the last frame of the previous action chunk.
This results in a very "choppy" motion, as demonstrated in the attached video.
episode0.mp4
I would like to verify if my configuration is correct for this use case. Below is my full configuration for reference:
{
"algo_name": "bc",
"experiment": {
"name": "test",
"validate": false,
"logging": {
"terminal_output_to_txt": true,
"log_tb": true,
"log_wandb": false,
"wandb_proj_name": "debug"
},
"save": {
"enabled": true,
"every_n_seconds": null,
"every_n_epochs": 50,
"epochs": [],
"on_best_validation": false,
"on_best_rollout_return": false,
"on_best_rollout_success_rate": true
},
"epoch_every_n_steps": 200,
"validation_epoch_every_n_steps": 10,
"env": null,
"additional_envs": null,
"render": false,
"render_video": true,
"keep_all_videos": false,
"video_skip": 5,
"rollout": {
"enabled": true,
"n": 50,
"horizon": 400,
"rate": 50,
"warmstart": 0,
"terminate_on_success": true
}
},
"train": {
"data": null,
"output_dir": "../bc_transformer_trained_models",
"num_data_workers": 0,
"hdf5_cache_mode": "all",
"hdf5_use_swmr": true,
"hdf5_load_next_obs": false,
"hdf5_normalize_obs": false,
"hdf5_filter_key": null,
"seq_length": 10,
"pad_seq_length": true,
"frame_stack": 10,
"pad_frame_stack": true,
"dataset_keys": [
"actions"
],
"goal_mode": null,
"cuda": true,
"batch_size": 100,
"num_epochs": 2000,
"max_grad_norm": 100.0,
"seed": 1
},
"algo": {
"optim_params": {
"policy": {
"optimizer_type": "adamw",
"learning_rate": {
"initial": 0.0001,
"decay_factor": 1.0,
"epoch_schedule": [100],
"scheduler_type": "linear"
},
"regularization": {
"L2": 0.01
}
}
},
"loss": {
"l2_weight": 1.0,
"l1_weight": 0.0,
"cos_weight": 0.0
},
"actor_layer_dims": [],
"gaussian": {
"enabled": false
},
"gmm": {
"enabled": true,
"num_modes": 5,
"min_std": 0.0001,
"std_activation": "softplus",
"low_noise_eval": true
},
"vae": {
"enabled": false
},
"rnn": {
"enabled": false
},
"transformer": {
"enabled": true,
"supervise_all_steps": true,
"pred_future_acs": true,
"num_layers": 6,
"embed_dim": 512,
"num_heads": 8
}
},
"observation": {
"modalities": {
"obs": {
"low_dim": [
"eef_pos",
"eef_ori",
"gripper_states"
],
"rgb": [
"agentview_rgb",
"wrist_rgb"
],
"depth": [],
"scan": []
},
"goal": {
"low_dim": [],
"rgb": [],
"depth": [],
"scan": []
}
},
"encoder": {
"low_dim": {
"core_class": null,
"core_kwargs": {},
"obs_randomizer_class": null,
"obs_randomizer_kwargs": {}
},
"rgb": {
"core_class": "VisualCore",
"core_kwargs": {
"feature_dimension": 64,
"backbone_class": "ResNet18Conv",
"backbone_kwargs": {
"pretrained": false,
"input_coord_conv": false
},
"pool_class": "SpatialSoftmax",
"pool_kwargs": {
"num_kp": 32,
"learnable_temperature": false,
"temperature": 1.0,
"noise_std": 0.0
}
},
"obs_randomizer_class": "CropRandomizer",
"obs_randomizer_kwargs": {
"crop_height": 120,
"crop_width": 120,
"num_crops": 1,
"pos_enc": false
}
},
"depth": {
"core_class": "VisualCore",
"core_kwargs": {},
"obs_randomizer_class": null,
"obs_randomizer_kwargs": {}
},
"scan": {
"core_class": "ScanCore",
"core_kwargs": {},
"obs_randomizer_class": null,
"obs_randomizer_kwargs": {}
}
}
}
}