fix: AgentTask deadlock when on_enter awaits generate_reply that triggers another AgentTask (#5377)

longcw · devin-ai-integration[bot] · web-flow · commit 90af80cf60ad · 2026-04-10T10:04:07.000+08:00
Co-authored-by: devin-ai-integration[bot] &lt;158243242+devin-ai-integration[bot]@users.noreply.github.com&gt;
diff --git a/livekit-agents/livekit/agents/voice/agent.py b/livekit-agents/livekit/agents/voice/agent.py
@@ -848,9 +848,14 @@ def _handle_task_done(_: asyncio.Task[Any]) -> None:
 
         # TODO(theomonnom): could the RunResult watcher & the blocked_tasks share the same logic?
         self.__inactive_ev.clear()
+        suspended_handles: list[SpeechHandle | asyncio.Task[Any]] = []
+        pending_on_enter_task: asyncio.Task[None] | None = None
         try:
+            # use wait_on_enter=False to avoid deadlock: on_enter may spawn nested
+            # AgentTasks that require user input, but session.run() can't return until
+            # all watched handles complete — creating a circular wait.
             await session._update_activity(
-                self, previous_activity="pause", blocked_tasks=blocked_tasks
+                self, previous_activity="pause", blocked_tasks=blocked_tasks, wait_on_enter=False
             )
 
             if not self._activity and not self.done():
@@ -860,14 +865,29 @@ def _handle_task_done(_: asyncio.Task[Any]) -> None:
                     )
                 )
 
-            # NOTE: _update_activity is calling the on_enter method, so the RunResult can capture all speeches
             run_state = session._global_run_state
-            if speech_handle and run_state and not run_state.done():
-                # make sure to not deadlock on the current speech handle
-                run_state._unwatch_handle(speech_handle)
-                # it is OK to call _mark_done_if_needed here, the above _update_activity will call on_enter
-                # so handles added inside the on_enter will make sure we're not completing the run_state too early.
-                run_state._mark_done_if_needed(None)
+
+            if self._activity and (on_enter_task := self._activity._on_enter_task):
+                if run_state and not run_state.done():
+                    # watch the on_enter task as a guard so RunResult won't complete
+                    # before on_enter has registered its own speech handles
+                    run_state._watch_handle(on_enter_task)
+                    pending_on_enter_task = on_enter_task
+                else:
+                    # no active run to guard — just wait for on_enter directly
+                    await asyncio.shield(on_enter_task)
+
+            # now unwatch the parent speech handle and blocked tasks that belong to the
+            # old activity — they can't complete while this AgentTask is running, and
+            # keeping them watched would block RunResult from completing.
+            if run_state and not run_state.done():
+                if speech_handle and run_state._unwatch_handle(speech_handle):
+                    suspended_handles.append(speech_handle)
+                for task in blocked_tasks:
+                    if run_state._unwatch_handle(task):
+                        suspended_handles.append(task)
+                if suspended_handles:
+                    run_state._mark_done_if_needed(None)
         except Exception:
             self.__inactive_ev.set()
             raise
@@ -883,24 +903,32 @@ def _handle_task_done(_: asyncio.Task[Any]) -> None:
             # run_state could have changed after self.__fut
             run_state = session._global_run_state
 
+            # re-watch the suspended handles so the resumed parent activity
+            # is tracked by the current RunResult again
+            if run_state and not run_state.done():
+                for handle in suspended_handles:
+                    run_state._watch_handle(handle)
+
+            if pending_on_enter_task:
+                try:
+                    await asyncio.shield(pending_on_enter_task)
+                except BaseException:
+                    logger.exception("error in on_enter task of agent %s", self.id)
+
             if session.current_agent != self:
                 logger.warning(
                     f"{self.__class__.__name__} completed, but the agent has changed in the meantime. "
                     "Ignoring handoff to the previous agent, likely due to `AgentSession.update_agent` being invoked."
                 )
                 await old_activity.aclose()
             else:
-                if speech_handle and run_state and not run_state.done():
-                    run_state._watch_handle(speech_handle)
-
                 merged_chat_ctx = old_agent.chat_ctx.merge(
                     self.chat_ctx,
                     exclude_function_call=not self._preserve_function_call_history,
                     exclude_instructions=True,
                 )
                 # set the chat_ctx directly, `session._update_activity` will sync it to the rt_session if needed
                 old_agent._chat_ctx.items[:] = merged_chat_ctx.items
-                # await old_agent.update_chat_ctx(merged_chat_ctx)
 
                 await session._update_activity(
                     old_agent, new_activity="resume", wait_on_enter=False
diff --git a/livekit-agents/livekit/agents/voice/run_result.py b/livekit-agents/livekit/agents/voice/run_result.py
@@ -169,12 +169,16 @@ def _watch_handle(self, handle: SpeechHandle | asyncio.Task) -> None:
 
         handle.add_done_callback(self._mark_done_if_needed)
 
-    def _unwatch_handle(self, handle: SpeechHandle | asyncio.Task) -> None:
+    def _unwatch_handle(self, handle: SpeechHandle | asyncio.Task) -> bool:
+        if handle not in self._handles:
+            return False
+
         self._handles.discard(handle)
         handle.remove_done_callback(self._mark_done_if_needed)
 
         if isinstance(handle, SpeechHandle):
             handle._remove_item_added_callback(self._item_added)
+        return True
 
     def _mark_done_if_needed(self, handle: SpeechHandle | asyncio.Task | None) -> None:
         if isinstance(handle, SpeechHandle):
diff --git a/tests/test_nested_agent_task.py b/tests/test_nested_agent_task.py
@@ -0,0 +1,126 @@
+from __future__ import annotations
+
+import asyncio
+
+import pytest
+
+from livekit.agents import Agent, AgentSession, AgentTask, RunContext, function_tool
+from livekit.agents.llm import FunctionToolCall
+
+from .fake_llm import FakeLLM, FakeLLMResponse
+
+
+class InnerTask(AgentTask):
+    """A task that needs a second user turn to complete (user must trigger 'finish')."""
+
+    def __init__(self) -> None:
+        super().__init__(instructions="inner task")
+
+    async def on_enter(self) -> None:
+        self.session.generate_reply(instructions="inner_greeting")
+
+    @function_tool
+    async def finish(self, ctx: RunContext) -> str:
+        """Called to complete the inner task."""
+        self.complete(None)
+        return "done"
+
+
+class OuterTask(AgentTask):
+    """A task whose on_enter triggers a tool call that awaits InnerTask."""
+
+    def __init__(self) -> None:
+        super().__init__(instructions="outer task")
+
+    async def on_enter(self) -> None:
+        await self.session.generate_reply(instructions="outer_greeting")
+
+    @function_tool
+    async def start_inner(self, ctx: RunContext) -> str:
+        """Transitions into InnerTask."""
+        await InnerTask()
+        self.complete(None)
+        return "inner completed"
+
+
+class RootAgent(Agent):
+    def __init__(self) -> None:
+        super().__init__(instructions="root agent")
+
+    @function_tool
+    async def start_outer(self, ctx: RunContext) -> str:
+        """Transitions into OuterTask."""
+        await OuterTask()
+        return "outer completed"
+
+
+@pytest.mark.asyncio
+async def test_nested_agent_task_no_deadlock():
+    """session.run() must return when an AgentTask hands off to a nested task
+    that collects additional user input before completing."""
+    llm = _build_fake_llm()
+    async with AgentSession(llm=llm) as sess:
+        await sess.start(RootAgent())
+
+        # This must not deadlock — it should return once the on_enter chain
+        # has started, even though InnerTask is still waiting for user input.
+        first_result = await asyncio.wait_for(sess.run(user_input="go"), timeout=5.0)
+        assert first_result is not None
+
+        # Now complete InnerTask by triggering the finish tool
+        second_result = await asyncio.wait_for(sess.run(user_input="done"), timeout=5.0)
+        assert second_result is not None
+
+
+def _build_fake_llm() -> FakeLLM:
+    return FakeLLM(
+        fake_responses=[
+            # user says "go" -> LLM calls start_outer
+            FakeLLMResponse(
+                input="go",
+                content="",
+                ttft=0,
+                duration=0,
+                tool_calls=[FunctionToolCall(name="start_outer", arguments="{}", call_id="call_1")],
+            ),
+            # OuterTask on_enter generate_reply(instructions="outer_greeting")
+            # -> LLM calls start_inner
+            FakeLLMResponse(
+                input="outer_greeting",
+                content="",
+                ttft=0,
+                duration=0,
+                tool_calls=[FunctionToolCall(name="start_inner", arguments="{}", call_id="call_2")],
+            ),
+            # InnerTask on_enter generate_reply(instructions="inner_greeting")
+            # -> LLM just says hello (no tool call yet — needs user input to finish)
+            FakeLLMResponse(
+                input="inner_greeting",
+                content="hello from inner",
+                ttft=0,
+                duration=0,
+            ),
+            # user says "done" -> LLM calls finish
+            FakeLLMResponse(
+                input="done",
+                content="",
+                ttft=0,
+                duration=0,
+                tool_calls=[FunctionToolCall(name="finish", arguments="{}", call_id="call_3")],
+            ),
+            # after finish tool output, LLM responds to start_inner tool output
+            FakeLLMResponse(
+                input="inner completed",
+                content="",
+                ttft=0,
+                duration=0,
+            ),
+            # after start_outer tool output, LLM responds
+            FakeLLMResponse(
+                input="outer completed",
+                content="all done",
+                ttft=0,
+                duration=0,
+            ),
+        ]
+    )