Skip to content

Commit 90c37b3

Browse files
committed
Update agent prompt, tests, and add domino integration test
- Update agent_option_learning system prompt with new skill factory signatures and canonical params - Fix test_skill_factories to match current defaults (grasp_tol, use_motion_planning) and new factory signatures - Add end-to-end integration test: human_interaction approach with scripted domino2.txt plan solves pybullet_domino test task
1 parent 96fc7a6 commit 90c37b3

3 files changed

Lines changed: 138 additions & 81 deletions

File tree

predicators/approaches/agent_option_learning_approach.py

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -77,31 +77,32 @@ def _get_agent_system_prompt(self) -> str:
7777
Read the reference files in /sandbox/reference/skill_factories/ for the
7878
full API. Key factory functions available in the exec context for
7979
propose_options:
80-
- `create_pick_skill(name, types, params_space, config, \
81-
get_target_pose_fn, transport_z=0.7, grasp_z_offset=0.0, \
82-
grasp_terminal_fn=None)` — pick up an object (move above, descend, \
83-
grasp, lift)
84-
- `create_place_skill(name, types, params_space, config, \
85-
get_target_pose_fn, transport_z, drop_z)` — place a held object \
86-
(move above, descend, release, retreat)
87-
- `create_push_skill(name, types, params_space, config, \
88-
get_target_pose_fn, offset_x, offset_z, transport_z=0.7, \
89-
offset_rot=0.0, push_through_frac=0.25)` — push with standard \
90-
4-waypoint trajectory. Requires `config.robot_home_pos` to be set. \
91-
Facing direction is `(sin(yaw), cos(yaw))` from the yaw returned by \
92-
`get_target_pose_fn`. The robot approaches from `offset_x` behind, \
93-
pushes `offset_x * push_through_frac` past the target, then retreats \
94-
to `robot_home_pos`.
95-
- `create_pour_skill(name, types, params_space, config, \
96-
get_target_pose_fn, pour_tilt, transport_z)` — pour from a held \
97-
container (move above, descend, tilt)
80+
- `create_pick_skill(name, types, config, get_target_pose_fn)` — \
81+
pick up an object (move above, descend, grasp, lift). \
82+
Continuous params: `(grasp_z_offset,)`.
83+
- `create_place_skill(name, types, config)` — place a held object \
84+
(move above, descend, release, retreat). No get_target_pose_fn; \
85+
target comes from continuous params: `(x, y, yaw, drop_z)`.
86+
- `create_push_skill(name, types, config, get_target_pose_fn)` — \
87+
push with standard 4-waypoint trajectory. Requires \
88+
`config.robot_home_pos` to be set. Facing direction is \
89+
`(sin(yaw), cos(yaw))` from `get_target_pose_fn`. \
90+
Continuous params: `(offset_x, offset_z, offset_rot, \
91+
push_through_frac)`.
92+
- `create_pour_skill(name, types, config, get_target_pose_fn, \
93+
tilt_terminal_fn=None)` — pour from a held container \
94+
(move above, descend, tilt). Continuous params: `(pour_tilt,)`.
9895
- `create_move_to_skill(name, types, params_space, config, \
9996
pose_fn)` — move end-effector to a target pose
10097
- `create_wait_option(name, config, robot_type)` — hold current pose
10198
102-
All factories take a `SkillConfig` (available as `skill_config` in the
103-
exec context) and a `get_target_pose_fn` callback with signature
104-
`(state, objects, params, config) -> (x, y, z, yaw)`.
99+
All factories (except `create_place_skill`) take a `SkillConfig` \
100+
(available as `skill_config` in the exec context) and a \
101+
`get_target_pose_fn` callback with signature \
102+
`(state, objects, params, config) -> (x, y, z, yaw)`. The callback \
103+
receives empty params; geometry params are now continuous params of \
104+
the output ParameterizedOption. `config.transport_z` controls the \
105+
transport height.
105106
106107
Also available: `Phase`, `PhaseSkill`, `PhaseAction`,
107108
`make_move_to_phase` for building custom multi-phase skills, and

tests/test_skill_factories.py

Lines changed: 42 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ def test_default_tolerances(self, robot_scene):
185185
cfg = _make_config(robot)
186186
assert cfg.move_to_pose_tol == pytest.approx(1e-4)
187187
assert cfg.max_vel_norm == pytest.approx(0.05)
188-
assert cfg.grasp_tol == pytest.approx(1e-3)
188+
assert cfg.grasp_tol == pytest.approx(5e-4)
189189
assert cfg.collision_bodies == ()
190190
assert cfg.ik_validate is True
191191
assert cfg.robot_init_tilt == pytest.approx(0.0)
@@ -232,7 +232,7 @@ def dummy_target(state, objects, params, cfg):
232232
assert phase.name == "TestMove"
233233
assert phase.action_type == PhaseAction.MOVE_TO_POSE
234234
assert phase.terminal_fn is None
235-
assert phase.use_motion_planning is True # default
235+
assert phase.use_motion_planning is False # default from CFG
236236

237237
def test_change_fingers_phase(self):
238238
def dummy_target(state, objects, params, cfg):
@@ -745,7 +745,7 @@ def test_returns_phase_with_move_action_type(self):
745745
assert isinstance(phase, Phase)
746746
assert phase.action_type == PhaseAction.MOVE_TO_POSE
747747
assert phase.name == "MoveTest"
748-
assert phase.use_motion_planning is True # default
748+
assert phase.use_motion_planning is False # default from CFG
749749

750750
def test_explicit_open_finger_status(self, robot_scene):
751751
_, robot = robot_scene
@@ -865,18 +865,19 @@ def test_policy_returns_valid_action(self, robot_scene):
865865

866866
class TestCreatePickSkill:
867867

868-
def _make_pick(self, robot, with_grasp_terminal=False):
869-
config = _make_config(robot)
870-
grasp_fn = (lambda s, o, p_, c: True) if with_grasp_terminal else None
868+
def _make_pick(self, robot):
869+
config = SkillConfig(
870+
robot=robot,
871+
open_fingers_joint=robot.open_fingers,
872+
closed_fingers_joint=robot.closed_fingers,
873+
fingers_state_to_joint=_fingers_state_to_joint,
874+
transport_z=0.8,
875+
)
871876
return create_pick_skill(
872877
name="Pick",
873878
types=[_ROBOT_TYPE, _OBJ_TYPE],
874-
params_space=Box(0, 1, (0,)),
875879
config=config,
876880
get_target_pose_fn=lambda s, o, p_, c: (1.35, 0.75, 0.4, 0.0),
877-
transport_z=0.8,
878-
grasp_z_offset=0.02,
879-
grasp_terminal_fn=grasp_fn,
880881
)
881882

882883
def test_returns_parameterized_option(self, robot_scene):
@@ -888,18 +889,12 @@ def test_returns_parameterized_option(self, robot_scene):
888889
def test_pick_policy_returns_valid_action(self, robot_scene):
889890
_, robot = robot_scene
890891
utils.reset_config({"seed": 123})
891-
config = _make_config(robot)
892892
robot_obj = _make_robot_obj()
893893
obj = _make_obj()
894-
opt = create_pick_skill(
895-
name="Pick",
896-
types=[_ROBOT_TYPE, _OBJ_TYPE],
897-
params_space=Box(0, 1, (0,)),
898-
config=config,
899-
get_target_pose_fn=lambda s, o, p_, c: (1.35, 0.75, 0.4, 0.0),
900-
transport_z=0.8,
901-
)
902-
grounded = opt.ground([robot_obj, obj], np.zeros(0))
894+
opt = self._make_pick(robot)
895+
# Pick params: (grasp_z_offset,) — use 0.02
896+
grounded = opt.ground([robot_obj, obj], np.array([0.02],
897+
dtype=np.float32))
903898
state = _make_home_state(robot_obj, robot, obj=obj,
904899
obj_xyz=(1.35, 0.75, 0.4))
905900
grounded.initiable(state)
@@ -915,15 +910,17 @@ def test_pick_policy_returns_valid_action(self, robot_scene):
915910
class TestCreatePlaceSkill:
916911

917912
def _make_place(self, robot):
918-
config = _make_config(robot)
913+
config = SkillConfig(
914+
robot=robot,
915+
open_fingers_joint=robot.open_fingers,
916+
closed_fingers_joint=robot.closed_fingers,
917+
fingers_state_to_joint=_fingers_state_to_joint,
918+
transport_z=0.8,
919+
)
919920
return create_place_skill(
920921
name="Place",
921922
types=[_ROBOT_TYPE],
922-
params_space=Box(0, 1, (0,)),
923923
config=config,
924-
get_target_pose_fn=lambda s, o, p_, c: (1.35, 0.75, 0.4, 0.0),
925-
transport_z=0.8,
926-
drop_z=0.45,
927924
)
928925

929926
def test_returns_parameterized_option(self, robot_scene):
@@ -935,18 +932,11 @@ def test_returns_parameterized_option(self, robot_scene):
935932
def test_place_policy_returns_valid_action(self, robot_scene):
936933
_, robot = robot_scene
937934
utils.reset_config({"seed": 123})
938-
config = _make_config(robot)
939935
robot_obj = _make_robot_obj()
940-
opt = create_place_skill(
941-
name="Place",
942-
types=[_ROBOT_TYPE],
943-
params_space=Box(0, 1, (0,)),
944-
config=config,
945-
get_target_pose_fn=lambda s, o, p_, c: (1.35, 0.75, 0.4, 0.0),
946-
transport_z=0.8,
947-
drop_z=0.45,
948-
)
949-
grounded = opt.ground([robot_obj], np.zeros(0))
936+
opt = self._make_place(robot)
937+
# Place params: (x, y, yaw, drop_z) — within bounds
938+
grounded = opt.ground([robot_obj], np.array([0.75, 1.35, 0.0, 0.45],
939+
dtype=np.float32))
950940
state = _make_home_state(robot_obj, robot)
951941
grounded.initiable(state)
952942
action = grounded.policy(state)
@@ -962,50 +952,42 @@ class TestCreatePushSkill:
962952

963953
@staticmethod
964954
def _make_push_config(robot):
965-
config = _make_config(robot)
966955
# robot_home_pos is required for create_push_skill
967956
return SkillConfig(
968-
robot=config.robot,
969-
open_fingers_joint=config.open_fingers_joint,
970-
closed_fingers_joint=config.closed_fingers_joint,
971-
fingers_state_to_joint=config.fingers_state_to_joint,
957+
robot=robot,
958+
open_fingers_joint=robot.open_fingers,
959+
closed_fingers_joint=robot.closed_fingers,
960+
fingers_state_to_joint=_fingers_state_to_joint,
972961
robot_home_pos=_EE_HOME,
962+
transport_z=0.8,
973963
)
974964

975-
def test_returns_parameterized_option(self, robot_scene):
976-
_, robot = robot_scene
965+
def _make_push(self, robot):
977966
config = self._make_push_config(robot)
978-
opt = create_push_skill(
967+
return create_push_skill(
979968
name="Push",
980969
types=[_ROBOT_TYPE, _OBJ_TYPE],
981-
params_space=Box(0, 1, (0,)),
982970
config=config,
983971
get_target_pose_fn=lambda s, o, p_, c: (1.35, 0.75, 0.4, 0.0),
984-
offset_x=0.05,
985-
offset_z=0.02,
986-
transport_z=0.8,
987972
)
973+
974+
def test_returns_parameterized_option(self, robot_scene):
975+
_, robot = robot_scene
976+
opt = self._make_push(robot)
988977
assert isinstance(opt, ParameterizedOption)
989978
assert opt.name == "Push"
990979

991980
def test_push_policy_close_fingers_returns_valid_action(self, robot_scene):
992981
"""First call lands in CloseFingers phase -> action within bounds."""
993982
_, robot = robot_scene
994983
utils.reset_config({"seed": 123})
995-
config = self._make_push_config(robot)
996984
robot_obj = _make_robot_obj()
997985
obj = _make_obj()
998-
opt = create_push_skill(
999-
name="Push",
1000-
types=[_ROBOT_TYPE, _OBJ_TYPE],
1001-
params_space=Box(0, 1, (0,)),
1002-
config=config,
1003-
get_target_pose_fn=lambda s, o, p_, c: (1.35, 0.75, 0.4, 0.0),
1004-
offset_x=0.05,
1005-
offset_z=0.02,
1006-
transport_z=0.8,
1007-
)
1008-
grounded = opt.ground([robot_obj, obj], np.zeros(0))
986+
opt = self._make_push(robot)
987+
# Push params: (offset_x, offset_z, offset_rot, push_through_frac)
988+
grounded = opt.ground([robot_obj, obj],
989+
np.array([0.05, 0.02, 0.0, 0.25],
990+
dtype=np.float32))
1009991
state = _build_state(robot_obj, robot, *_EE_HOME,
1010992
finger_state=_OPEN_STATE, obj=obj,
1011993
obj_xyz=(1.35, 0.75, 0.4))

tests/test_skill_factories_integration.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,3 +1185,77 @@ class _ExposedDominoEnv(_ExposedEnvMixin, PyBulletDominoEnv):
11851185
f"Domino should be held after Pick without motion planning, "
11861186
f"is_held={is_held}"
11871187
)
1188+
1189+
1190+
def test_human_interaction_scripted_domino_solves_task():
1191+
"""Full pipeline: human_interaction approach with scripted option plan
1192+
(domino2.txt) solves the 1st test task in pybullet_domino."""
1193+
try:
1194+
from predicators.envs.pybullet_domino import PyBulletDominoEnv
1195+
except ImportError:
1196+
pytest.skip("pybullet_domino not available")
1197+
1198+
from predicators.approaches import create_approach
1199+
from predicators.cogman import CogMan, run_episode_and_get_observations
1200+
from predicators.execution_monitoring import create_execution_monitor
1201+
from predicators.ground_truth_models import get_gt_options
1202+
from predicators.perception import create_perceiver
1203+
1204+
utils.reset_config({
1205+
"env": "pybullet_domino",
1206+
"approach": "human_interaction",
1207+
"seed": 0,
1208+
"use_gui": False,
1209+
"pybullet_control_mode": "position",
1210+
"pybullet_ik_validate": False,
1211+
"num_train_tasks": 1,
1212+
"num_test_tasks": 1,
1213+
"horizon": 200,
1214+
"domino_use_skill_factories": True,
1215+
"domino_initialize_at_finished_state": False,
1216+
"domino_use_domino_blocks_as_target": True,
1217+
"domino_use_grid": True,
1218+
"domino_include_connected_predicate": False,
1219+
"domino_use_continuous_place": True,
1220+
"domino_restricted_push": True,
1221+
"domino_prune_actions": False,
1222+
"domino_has_glued_dominos": False,
1223+
"keep_failed_demos": True,
1224+
"skill_phase_use_motion_planning": True,
1225+
"human_interaction_approach_use_scripted_option": True,
1226+
"human_interaction_approach_use_all_options": True,
1227+
"scripted_option_dir": "scripted_option_policies",
1228+
"script_option_file_name": "domino2.txt",
1229+
})
1230+
1231+
env = PyBulletDominoEnv(use_gui=False)
1232+
_MOST_RECENT_ENV_INSTANCE[env.get_name()] = env
1233+
1234+
perceiver = create_perceiver("trivial")
1235+
train_tasks = [perceiver.reset(t) for t in env.get_train_tasks()]
1236+
1237+
options = get_gt_options(env.get_name())
1238+
approach = create_approach(
1239+
"human_interaction",
1240+
env.predicates,
1241+
options,
1242+
env.types,
1243+
env.action_space,
1244+
train_tasks,
1245+
)
1246+
1247+
cogman = CogMan(approach, perceiver,
1248+
create_execution_monitor("trivial"))
1249+
1250+
test_env_task = env.get_test_tasks()[0]
1251+
cogman.reset(test_env_task)
1252+
1253+
traj, solved, metrics = run_episode_and_get_observations(
1254+
cogman, env, "test", task_idx=0,
1255+
max_num_steps=200,
1256+
terminate_on_goal_reached=True,
1257+
)
1258+
1259+
assert solved, (
1260+
"Scripted domino2.txt plan should solve the 1st test task"
1261+
)

0 commit comments

Comments
 (0)