livekit · shawnfeldman · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/livekit-agents/livekit/agents/voice/agent_activity.py b/livekit-agents/livekit/agents/voice/agent_activity.py
@@ -2484,6 +2484,7 @@ def _tool_execution_completed_cb(out: ToolExecutionOutput) -> None:
                 interrupted=speech_handle.interrupted,
                 created_at=reply_started_at,
                 metrics=assistant_metrics,
+                extra=llm_gen_data.extra if llm_gen_data.extra else NOT_GIVEN,
             )
             self._agent._chat_ctx.insert(msg)
             self._session._conversation_item_added(msg)

diff --git a/livekit-agents/livekit/agents/voice/generation.py b/livekit-agents/livekit/agents/voice/generation.py
@@ -50,6 +50,7 @@ class _LLMGenerationData:
     function_ch: aio.Chan[llm.FunctionCall]
     generated_text: str = ""
     generated_functions: list[llm.FunctionCall] = field(default_factory=list)
+    extra: dict[str, Any] = field(default_factory=dict)
     id: str = field(default_factory=lambda: utils.shortuuid("item_"))
     started_fut: asyncio.Future[None] = field(default_factory=asyncio.Future)
     ttft: float | None = None
@@ -176,6 +177,9 @@ async def _llm_inference_task(
                         data.generated_functions.append(fnc_call)
                         function_ch.send_nowait(fnc_call)
 
+                if chunk.delta.extra:
+                    data.extra.update(chunk.delta.extra)
+
                 if chunk.delta.content:
                     data.generated_text += chunk.delta.content
                     text_ch.send_nowait(chunk.delta.content)

diff --git a/tests/fake_llm.py b/tests/fake_llm.py
@@ -34,6 +34,7 @@ class FakeLLMResponse(BaseModel):
     ttft: float
     duration: float
     tool_calls: list[FunctionToolCall] = Field(default_factory=list)
+    extra: dict[str, Any] = Field(default_factory=dict)
 
     def speed_up(self, factor: float) -> FakeLLMResponse:
         obj = copy.deepcopy(self)
@@ -98,10 +99,14 @@ async def _run(self) -> None:
 
         await asyncio.sleep(resp.duration - (time.perf_counter() - start_time))
 
-        self._send_chunk(tool_calls=resp.tool_calls)
+        self._send_chunk(tool_calls=resp.tool_calls, extra=resp.extra if resp.extra else None)
 
     def _send_chunk(
-        self, *, delta: str | None = None, tool_calls: list[FunctionToolCall] | None = None
+        self,
+        *,
+        delta: str | None = None,
+        tool_calls: list[FunctionToolCall] | None = None,
+        extra: dict[str, Any] | None = None,
     ) -> None:
         self._event_ch.send_nowait(
             ChatChunk(
@@ -110,6 +115,7 @@ def _send_chunk(
                     role="assistant",
                     content=delta,
                     tool_calls=tool_calls or [],
+                    extra=extra,
                 ),
             )
         )

diff --git a/tests/fake_session.py b/tests/fake_session.py
@@ -148,6 +148,7 @@ def add_llm(
         input: NotGivenOr[str] = NOT_GIVEN,
         ttft: float = 0.1,
         duration: float = 0.3,
+        extra: dict[str, Any] | None = None,
     ) -> None:
         if (
             not utils.is_given(input)
@@ -167,6 +168,7 @@ def add_llm(
                 ttft=ttft,
                 duration=duration,
                 tool_calls=tool_calls or [],
+                extra=extra or {},
             )
         )
 

diff --git a/tests/test_agent_session.py b/tests/test_agent_session.py
@@ -733,3 +733,54 @@ def check_timestamp(
     assert abs(t_event - t_target) <= max_abs_diff, (
         f"event timestamp {t_event} is not within {max_abs_diff} of target {t_target}"
     )
+
+
+async def test_llm_extra_propagation() -> None:
+    """
+    Test that ChoiceDelta.extra is propagated to ChatMessage.extra.
+
+    This ensures message-level metadata (like Gemini thought signatures) flows
+    through the inference proxy path and is preserved in the chat context.
+    """
+    speed = 5.0
+    actions = FakeActions()
+    actions.add_user_speech(0.5, 2.5, "Hello, how are you?", stt_delay=0.2)
+    # LLM response with extra data (simulating thought signatures)
+    actions.add_llm(
+        "I'm doing well, thank you!",
+        ttft=0.1,
+        duration=0.3,
+        extra={"thought_signature": "test_signature_123", "other_metadata": {"key": "value"}},
+    )
+    actions.add_tts(2.0, ttfb=0.2, duration=0.3)
+
+    session = create_session(actions, speed_factor=speed)
+    agent = MyAgent()
+
+    conversation_events: list[ConversationItemAddedEvent] = []
+    session.on("conversation_item_added", conversation_events.append)
+
+    await asyncio.wait_for(run_session(session, agent), timeout=SESSION_TIMEOUT)
+
+    # Find the assistant message
+    assistant_events = [
+        ev
+        for ev in conversation_events
+        if ev.item.type == "message" and ev.item.role == "assistant"
+    ]
+    assert len(assistant_events) == 1
+
+    assistant_msg = assistant_events[0].item
+    assert assistant_msg.text_content == "I'm doing well, thank you!"
+
+    # Verify extra data is preserved
+    assert assistant_msg.extra is not None
+    assert assistant_msg.extra.get("thought_signature") == "test_signature_123"
+    assert assistant_msg.extra.get("other_metadata") == {"key": "value"}
+
+    # Also verify via chat context
+    assistant_items = [
+        item for item in agent.chat_ctx.items if item.type == "message" and item.role == "assistant"
+    ]
+    assert len(assistant_items) == 1
+    assert assistant_items[0].extra.get("thought_signature") == "test_signature_123"