Update agent prompt, tests, and add domino integration test

yichao-liang · yichao-liang · commit 90c37b31983f · 2026-03-07T16:44:11.000Z
- Update agent_option_learning system prompt with new skill factory
  signatures and canonical params
- Fix test_skill_factories to match current defaults (grasp_tol,
  use_motion_planning) and new factory signatures
- Add end-to-end integration test: human_interaction approach with
  scripted domino2.txt plan solves pybullet_domino test task
diff --git a/predicators/approaches/agent_option_learning_approach.py b/predicators/approaches/agent_option_learning_approach.py
@@ -77,31 +77,32 @@ def _get_agent_system_prompt(self) -> str:
 Read the reference files in /sandbox/reference/skill_factories/ for the
 full API. Key factory functions available in the exec context for
 propose_options:
-- `create_pick_skill(name, types, params_space, config, \
-get_target_pose_fn, transport_z=0.7, grasp_z_offset=0.0, \
-grasp_terminal_fn=None)` — pick up an object (move above, descend, \
-grasp, lift)
-- `create_place_skill(name, types, params_space, config, \
-get_target_pose_fn, transport_z, drop_z)` — place a held object \
-(move above, descend, release, retreat)
-- `create_push_skill(name, types, params_space, config, \
-get_target_pose_fn, offset_x, offset_z, transport_z=0.7, \
-offset_rot=0.0, push_through_frac=0.25)` — push with standard \
-4-waypoint trajectory. Requires `config.robot_home_pos` to be set. \
-Facing direction is `(sin(yaw), cos(yaw))` from the yaw returned by \
-`get_target_pose_fn`. The robot approaches from `offset_x` behind, \
-pushes `offset_x * push_through_frac` past the target, then retreats \
-to `robot_home_pos`.
-- `create_pour_skill(name, types, params_space, config, \
-get_target_pose_fn, pour_tilt, transport_z)` — pour from a held \
-container (move above, descend, tilt)
+- `create_pick_skill(name, types, config, get_target_pose_fn)` — \
+pick up an object (move above, descend, grasp, lift). \
+Continuous params: `(grasp_z_offset,)`.
+- `create_place_skill(name, types, config)` — place a held object \
+(move above, descend, release, retreat). No get_target_pose_fn; \
+target comes from continuous params: `(x, y, yaw, drop_z)`.
+- `create_push_skill(name, types, config, get_target_pose_fn)` — \
+push with standard 4-waypoint trajectory. Requires \
+`config.robot_home_pos` to be set. Facing direction is \
+`(sin(yaw), cos(yaw))` from `get_target_pose_fn`. \
+Continuous params: `(offset_x, offset_z, offset_rot, \
+push_through_frac)`.
+- `create_pour_skill(name, types, config, get_target_pose_fn, \
+tilt_terminal_fn=None)` — pour from a held container \
+(move above, descend, tilt). Continuous params: `(pour_tilt,)`.
 - `create_move_to_skill(name, types, params_space, config, \
 pose_fn)` — move end-effector to a target pose
 - `create_wait_option(name, config, robot_type)` — hold current pose
 
-All factories take a `SkillConfig` (available as `skill_config` in the
-exec context) and a `get_target_pose_fn` callback with signature
-`(state, objects, params, config) -> (x, y, z, yaw)`.
+All factories (except `create_place_skill`) take a `SkillConfig` \
+(available as `skill_config` in the exec context) and a \
+`get_target_pose_fn` callback with signature \
+`(state, objects, params, config) -> (x, y, z, yaw)`. The callback \
+receives empty params; geometry params are now continuous params of \
+the output ParameterizedOption. `config.transport_z` controls the \
+transport height.
 
 Also available: `Phase`, `PhaseSkill`, `PhaseAction`,
 `make_move_to_phase` for building custom multi-phase skills, and
diff --git a/tests/test_skill_factories.py b/tests/test_skill_factories.py
@@ -185,7 +185,7 @@ def test_default_tolerances(self, robot_scene):
         cfg = _make_config(robot)
         assert cfg.move_to_pose_tol == pytest.approx(1e-4)
         assert cfg.max_vel_norm == pytest.approx(0.05)
-        assert cfg.grasp_tol == pytest.approx(1e-3)
+        assert cfg.grasp_tol == pytest.approx(5e-4)
         assert cfg.collision_bodies == ()
         assert cfg.ik_validate is True
         assert cfg.robot_init_tilt == pytest.approx(0.0)
@@ -232,7 +232,7 @@ def dummy_target(state, objects, params, cfg):
         assert phase.name == "TestMove"
         assert phase.action_type == PhaseAction.MOVE_TO_POSE
         assert phase.terminal_fn is None
-        assert phase.use_motion_planning is True  # default
+        assert phase.use_motion_planning is False  # default from CFG
 
     def test_change_fingers_phase(self):
         def dummy_target(state, objects, params, cfg):
@@ -745,7 +745,7 @@ def test_returns_phase_with_move_action_type(self):
         assert isinstance(phase, Phase)
         assert phase.action_type == PhaseAction.MOVE_TO_POSE
         assert phase.name == "MoveTest"
-        assert phase.use_motion_planning is True  # default
+        assert phase.use_motion_planning is False  # default from CFG
 
     def test_explicit_open_finger_status(self, robot_scene):
         _, robot = robot_scene
@@ -865,18 +865,19 @@ def test_policy_returns_valid_action(self, robot_scene):
 
 class TestCreatePickSkill:
 
-    def _make_pick(self, robot, with_grasp_terminal=False):
-        config = _make_config(robot)
-        grasp_fn = (lambda s, o, p_, c: True) if with_grasp_terminal else None
+    def _make_pick(self, robot):
+        config = SkillConfig(
+            robot=robot,
+            open_fingers_joint=robot.open_fingers,
+            closed_fingers_joint=robot.closed_fingers,
+            fingers_state_to_joint=_fingers_state_to_joint,
+            transport_z=0.8,
+        )
         return create_pick_skill(
             name="Pick",
             types=[_ROBOT_TYPE, _OBJ_TYPE],
-            params_space=Box(0, 1, (0,)),
             config=config,
             get_target_pose_fn=lambda s, o, p_, c: (1.35, 0.75, 0.4, 0.0),
-            transport_z=0.8,
-            grasp_z_offset=0.02,
-            grasp_terminal_fn=grasp_fn,
         )
 
     def test_returns_parameterized_option(self, robot_scene):
@@ -888,18 +889,12 @@ def test_returns_parameterized_option(self, robot_scene):
     def test_pick_policy_returns_valid_action(self, robot_scene):
         _, robot = robot_scene
         utils.reset_config({"seed": 123})
-        config = _make_config(robot)
         robot_obj = _make_robot_obj()
         obj = _make_obj()
-        opt = create_pick_skill(
-            name="Pick",
-            types=[_ROBOT_TYPE, _OBJ_TYPE],
-            params_space=Box(0, 1, (0,)),
-            config=config,
-            get_target_pose_fn=lambda s, o, p_, c: (1.35, 0.75, 0.4, 0.0),
-            transport_z=0.8,
-        )
-        grounded = opt.ground([robot_obj, obj], np.zeros(0))
+        opt = self._make_pick(robot)
+        # Pick params: (grasp_z_offset,) — use 0.02
+        grounded = opt.ground([robot_obj, obj], np.array([0.02],
+                              dtype=np.float32))
         state = _make_home_state(robot_obj, robot, obj=obj,
                                  obj_xyz=(1.35, 0.75, 0.4))
         grounded.initiable(state)
@@ -915,15 +910,17 @@ def test_pick_policy_returns_valid_action(self, robot_scene):
 class TestCreatePlaceSkill:
 
     def _make_place(self, robot):
-        config = _make_config(robot)
+        config = SkillConfig(
+            robot=robot,
+            open_fingers_joint=robot.open_fingers,
+            closed_fingers_joint=robot.closed_fingers,
+            fingers_state_to_joint=_fingers_state_to_joint,
+            transport_z=0.8,
+        )
         return create_place_skill(
             name="Place",
             types=[_ROBOT_TYPE],
-            params_space=Box(0, 1, (0,)),
             config=config,
-            get_target_pose_fn=lambda s, o, p_, c: (1.35, 0.75, 0.4, 0.0),
-            transport_z=0.8,
-            drop_z=0.45,
         )
 
     def test_returns_parameterized_option(self, robot_scene):
@@ -935,18 +932,11 @@ def test_returns_parameterized_option(self, robot_scene):
     def test_place_policy_returns_valid_action(self, robot_scene):
         _, robot = robot_scene
         utils.reset_config({"seed": 123})
-        config = _make_config(robot)
         robot_obj = _make_robot_obj()
-        opt = create_place_skill(
-            name="Place",
-            types=[_ROBOT_TYPE],
-            params_space=Box(0, 1, (0,)),
-            config=config,
-            get_target_pose_fn=lambda s, o, p_, c: (1.35, 0.75, 0.4, 0.0),
-            transport_z=0.8,
-            drop_z=0.45,
-        )
-        grounded = opt.ground([robot_obj], np.zeros(0))
+        opt = self._make_place(robot)
+        # Place params: (x, y, yaw, drop_z) — within bounds
+        grounded = opt.ground([robot_obj], np.array([0.75, 1.35, 0.0, 0.45],
+                              dtype=np.float32))
         state = _make_home_state(robot_obj, robot)
         grounded.initiable(state)
         action = grounded.policy(state)
@@ -962,50 +952,42 @@ class TestCreatePushSkill:
 
     @staticmethod
     def _make_push_config(robot):
-        config = _make_config(robot)
         # robot_home_pos is required for create_push_skill
         return SkillConfig(
-            robot=config.robot,
-            open_fingers_joint=config.open_fingers_joint,
-            closed_fingers_joint=config.closed_fingers_joint,
-            fingers_state_to_joint=config.fingers_state_to_joint,
+            robot=robot,
+            open_fingers_joint=robot.open_fingers,
+            closed_fingers_joint=robot.closed_fingers,
+            fingers_state_to_joint=_fingers_state_to_joint,
             robot_home_pos=_EE_HOME,
+            transport_z=0.8,
         )
 
-    def test_returns_parameterized_option(self, robot_scene):
-        _, robot = robot_scene
+    def _make_push(self, robot):
         config = self._make_push_config(robot)
-        opt = create_push_skill(
+        return create_push_skill(
             name="Push",
             types=[_ROBOT_TYPE, _OBJ_TYPE],
-            params_space=Box(0, 1, (0,)),
             config=config,
             get_target_pose_fn=lambda s, o, p_, c: (1.35, 0.75, 0.4, 0.0),
-            offset_x=0.05,
-            offset_z=0.02,
-            transport_z=0.8,
         )
+
+    def test_returns_parameterized_option(self, robot_scene):
+        _, robot = robot_scene
+        opt = self._make_push(robot)
         assert isinstance(opt, ParameterizedOption)
         assert opt.name == "Push"
 
     def test_push_policy_close_fingers_returns_valid_action(self, robot_scene):
         """First call lands in CloseFingers phase -> action within bounds."""
         _, robot = robot_scene
         utils.reset_config({"seed": 123})
-        config = self._make_push_config(robot)
         robot_obj = _make_robot_obj()
         obj = _make_obj()
-        opt = create_push_skill(
-            name="Push",
-            types=[_ROBOT_TYPE, _OBJ_TYPE],
-            params_space=Box(0, 1, (0,)),
-            config=config,
-            get_target_pose_fn=lambda s, o, p_, c: (1.35, 0.75, 0.4, 0.0),
-            offset_x=0.05,
-            offset_z=0.02,
-            transport_z=0.8,
-        )
-        grounded = opt.ground([robot_obj, obj], np.zeros(0))
+        opt = self._make_push(robot)
+        # Push params: (offset_x, offset_z, offset_rot, push_through_frac)
+        grounded = opt.ground([robot_obj, obj],
+                              np.array([0.05, 0.02, 0.0, 0.25],
+                                       dtype=np.float32))
         state = _build_state(robot_obj, robot, *_EE_HOME,
                               finger_state=_OPEN_STATE, obj=obj,
                               obj_xyz=(1.35, 0.75, 0.4))
diff --git a/tests/test_skill_factories_integration.py b/tests/test_skill_factories_integration.py
@@ -1185,3 +1185,77 @@ class _ExposedDominoEnv(_ExposedEnvMixin, PyBulletDominoEnv):
         f"Domino should be held after Pick without motion planning, "
         f"is_held={is_held}"
     )
+
+
+def test_human_interaction_scripted_domino_solves_task():
+    """Full pipeline: human_interaction approach with scripted option plan
+    (domino2.txt) solves the 1st test task in pybullet_domino."""
+    try:
+        from predicators.envs.pybullet_domino import PyBulletDominoEnv
+    except ImportError:
+        pytest.skip("pybullet_domino not available")
+
+    from predicators.approaches import create_approach
+    from predicators.cogman import CogMan, run_episode_and_get_observations
+    from predicators.execution_monitoring import create_execution_monitor
+    from predicators.ground_truth_models import get_gt_options
+    from predicators.perception import create_perceiver
+
+    utils.reset_config({
+        "env": "pybullet_domino",
+        "approach": "human_interaction",
+        "seed": 0,
+        "use_gui": False,
+        "pybullet_control_mode": "position",
+        "pybullet_ik_validate": False,
+        "num_train_tasks": 1,
+        "num_test_tasks": 1,
+        "horizon": 200,
+        "domino_use_skill_factories": True,
+        "domino_initialize_at_finished_state": False,
+        "domino_use_domino_blocks_as_target": True,
+        "domino_use_grid": True,
+        "domino_include_connected_predicate": False,
+        "domino_use_continuous_place": True,
+        "domino_restricted_push": True,
+        "domino_prune_actions": False,
+        "domino_has_glued_dominos": False,
+        "keep_failed_demos": True,
+        "skill_phase_use_motion_planning": True,
+        "human_interaction_approach_use_scripted_option": True,
+        "human_interaction_approach_use_all_options": True,
+        "scripted_option_dir": "scripted_option_policies",
+        "script_option_file_name": "domino2.txt",
+    })
+
+    env = PyBulletDominoEnv(use_gui=False)
+    _MOST_RECENT_ENV_INSTANCE[env.get_name()] = env
+
+    perceiver = create_perceiver("trivial")
+    train_tasks = [perceiver.reset(t) for t in env.get_train_tasks()]
+
+    options = get_gt_options(env.get_name())
+    approach = create_approach(
+        "human_interaction",
+        env.predicates,
+        options,
+        env.types,
+        env.action_space,
+        train_tasks,
+    )
+
+    cogman = CogMan(approach, perceiver,
+                    create_execution_monitor("trivial"))
+
+    test_env_task = env.get_test_tasks()[0]
+    cogman.reset(test_env_task)
+
+    traj, solved, metrics = run_episode_and_get_observations(
+        cogman, env, "test", task_idx=0,
+        max_num_steps=200,
+        terminate_on_goal_reached=True,
+    )
+
+    assert solved, (
+        "Scripted domino2.txt plan should solve the 1st test task"
+    )