Skip to content
Closed
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
42079a9
Merge branch 'main' into longc/reuse-rt-session-across-handoffs
qionghuang6 Apr 1, 2026
6961d39
faster handoffs
qionghuang6 Mar 31, 2026
03d09ec
cleanup using new sdk
qionghuang6 Apr 1, 2026
8d54226
clean up
qionghuang6 Apr 1, 2026
be4e280
Merge branch 'main' into qiong/faster-handoffs
qionghuang6 Apr 1, 2026
35b1341
Merge branch 'main' into merge-main/longc/reuse-rt-session-across-han…
qionghuang6 Apr 1, 2026
cc52ad5
Merge branch 'merge-main/longc/reuse-rt-session-across-handoffs' into…
qionghuang6 Apr 1, 2026
1f1f3a3
update
qionghuang6 Apr 1, 2026
8324c47
format
qionghuang6 Apr 1, 2026
05d673f
set tg llm to not given
tinalenguyen Apr 1, 2026
1cbe370
Merge upstream/tina/set-tg-llm-not-given
qionghuang6 Apr 1, 2026
52ba5db
use actual patch release
qionghuang6 Apr 1, 2026
1b817a3
add reset method
tinalenguyen Apr 1, 2026
bd9f5e4
ruff
tinalenguyen Apr 1, 2026
326afa3
Merge remote-tracking branch 'upstream/tina/add-rt-reset-method' into…
qionghuang6 Apr 2, 2026
0917180
Merge upstream/tina/add-rt-reset-method into qiong/faster-handoffs
qionghuang6 Apr 2, 2026
752a49f
use new reset
qionghuang6 Apr 2, 2026
710767f
fixes
qionghuang6 Apr 2, 2026
e6f16ce
cleanup
qionghuang6 Apr 2, 2026
794f36b
Merge branch 'merge-main/longc/reuse-rt-session-across-handoffs' into…
qionghuang6 Apr 2, 2026
0a9a5e2
Merge longc/reuse-rt-session-across-handoffs into qiong/faster-handoffs
qionghuang6 Apr 9, 2026
cd25665
Update to use update_session
qionghuang6 Apr 9, 2026
37f70fa
dont include handoff agent in PR
qionghuang6 Apr 9, 2026
279e91d
restore uv lock
qionghuang6 Apr 9, 2026
693713f
Don't override
qionghuang6 Apr 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@
AddSystemMessagePayload,
AudioChunkPayload,
AudioChunkResponsePayload,
ConfigOptions,
ConfigPayload,
GenerateReplyPayload,
InputTextPayload,
ResetPayload,
ToolCallInterruptedPayload,
ToolCallOutputPayload,
ToolCallPayload,
Expand All @@ -43,6 +45,11 @@
PHONIC_INPUT_FRAME_MS = 20
WS_CLOSE_NORMAL = 1000
TOOL_CALL_OUTPUT_TIMEOUT_MS = 60000
_CONVERSATION_HISTORY_PREFIX = (
"\n\nThis conversation is being continued from an existing "
"conversation. You are the assistant speaking to the user. "
"The following is the conversation history:\n"
)


@dataclass
Expand Down Expand Up @@ -157,6 +164,9 @@ def __init__(
auto_tool_reply_generation=True,
audio_output=True,
manual_function_calls=False,
mid_session_chat_ctx_update=True,
mid_session_instructions_update=True,
mid_session_tools_update=True,
per_response_tool_choice=False,
)
)
Expand Down Expand Up @@ -242,9 +252,7 @@ def __init__(self, realtime_model: RealtimeModel) -> None:
self._input_resampler: rtc.AudioResampler | None = None
self._input_resampler_rate: int | None = None

self._client = AsyncPhonic(
api_key=self._opts.api_key,
)
self._client = AsyncPhonic(api_key=self._opts.api_key)

self._socket: AsyncConversationsSocketClient | None = None
self._socket_ctx: typing.AsyncContextManager[AsyncConversationsSocketClient] | None = None
Expand Down Expand Up @@ -312,11 +320,7 @@ async def update_chat_ctx(self, chat_ctx: llm.ChatContext) -> None:
"update_chat_ctx called with messages prior to config being sent to "
"Phonic. Including conversation state in system instructions."
)
self._system_prompt_postfix = (
"\n\nThis conversation is being continued from an existing "
"conversation. You are the assistant speaking to the user. "
"The following is the conversation history:\n" + turn_history
)
self._system_prompt_postfix = _CONVERSATION_HISTORY_PREFIX + turn_history
self._chat_ctx = chat_ctx.copy()
return

Expand Down Expand Up @@ -368,16 +372,20 @@ async def update_tools(self, tools: list[llm.Tool]) -> None:
if self._config_sent:
logger.warning(
"update_tools called after config was already sent. "
"Phonic does not support updating tools mid-session."
"Use update_session() for mid-session tool updates."
)
return

self._tools = llm.ToolContext(tools)
self._tool_definitions = []
self._tool_definitions = self._get_tool_definitions(tools)
self._tools_ready.set()

def _get_tool_definitions(self, tools: list[llm.Tool]) -> list[dict]:
tool_definitions = []
for tool_schema in self._tools.parse_function_tools("openai", strict=True):
# We disallow tool chaining and tool calls during agent speech to reduce complexity
# of managing state while operating within the LiveKit Realtime generations framework
self._tool_definitions.append(
tool_definitions.append(
{
"type": "custom_websocket",
"tool_schema": tool_schema,
Expand All @@ -386,8 +394,7 @@ async def update_tools(self, tools: list[llm.Tool]) -> None:
"allow_tool_chaining": False,
}
)

self._tools_ready.set()
return tool_definitions

def update_options(self, *, tool_choice: NotGivenOr[llm.ToolChoice | None] = NOT_GIVEN) -> None:
logger.warning("update_options is not supported by the Phonic realtime model.")
Expand Down Expand Up @@ -494,6 +501,91 @@ async def aclose(self) -> None:

await self._close_active_session()

def _build_turn_history(self, chat_ctx: llm.ChatContext) -> str | None:
messages = [
item
for item in chat_ctx.items
if isinstance(item, llm.ChatMessage) and item.text_content and item.text_content.strip()
]
if not messages:
return None
history = "\n".join(f"{m.role}: {m.text_content}" for m in messages)
return history.strip() or None

def _build_session_config_options_dict(
self,
*,
system_prompt: str,
tools_payload: list[dict | str],
) -> dict[str, typing.Any]:
raw = {
"agent": self._opts.phonic_agent,
"project": self._opts.project,
"welcome_message": self._opts.welcome_message,
"generate_welcome_message": self._opts.generate_welcome_message,
"system_prompt": system_prompt,
"voice_id": self._opts.voice,
"input_format": "pcm_44100",
"output_format": "pcm_44100",
"default_language": self._opts.default_language,
"additional_languages": self._opts.additional_languages,
"multilingual_mode": self._opts.multilingual_mode,
"audio_speed": self._opts.audio_speed,
"tools": tools_payload if len(tools_payload) > 0 else NOT_GIVEN,
"boosted_keywords": self._opts.boosted_keywords,
"min_words_to_interrupt": self._opts.min_words_to_interrupt,
"generate_no_input_poke_text": self._opts.generate_no_input_poke_text,
"no_input_poke_sec": self._opts.no_input_poke_sec,
"no_input_poke_text": self._opts.no_input_poke_text,
"no_input_end_conversation_sec": self._opts.no_input_end_conversation_sec,
}
return {k: v for k, v in raw.items() if v is not NOT_GIVEN}

async def _update_session(
self,
*,
instructions: NotGivenOr[str] = NOT_GIVEN,
chat_ctx: NotGivenOr[llm.ChatContext] = NOT_GIVEN,
tools: NotGivenOr[list[llm.Tool]] = NOT_GIVEN,
) -> None:
if not self._config_sent:
if is_given(instructions):
await self.update_instructions(instructions)
if is_given(chat_ctx):
await self.update_chat_ctx(chat_ctx)
if is_given(tools):
await self.update_tools(tools)
return

if is_given(instructions):
self._opts.instructions = instructions
if is_given(tools):
self._tools = llm.ToolContext(tools)
self._tool_definitions = self._get_tool_definitions(tools)
if is_given(chat_ctx):
self._chat_ctx = chat_ctx.copy()

system_prompt = self._opts.instructions
if is_given(chat_ctx):
history = self._build_turn_history(chat_ctx)
if history:
system_prompt += _CONVERSATION_HISTORY_PREFIX + history

self._close_current_generation(interrupted=True)

tools_payload: list[dict | str] = []
if is_given(self._opts.phonic_tools) and self._opts.phonic_tools:
tools_payload.extend(self._opts.phonic_tools)
tools_payload.extend(self._tool_definitions)

config_options = self._build_session_config_options_dict(
system_prompt=system_prompt,
tools_payload=tools_payload,
)
if self._socket:
logger.info("Sending mid-session reset to Phonic")
await self._socket.send_reset(ResetPayload(config=ConfigOptions(**config_options)))

@utils.log_exceptions(logger=logger)
async def _main_task(self) -> None:
try:
Expand Down Expand Up @@ -522,34 +614,11 @@ async def _main_task(self) -> None:
logger.warning("Instructions are not set. Phonic will not start a conversation.")
return

config = {
"type": "config",
"agent": self._opts.phonic_agent,
"project": self._opts.project,
"welcome_message": self._opts.welcome_message,
"generate_welcome_message": self._opts.generate_welcome_message,
"system_prompt": self._opts.instructions + self._system_prompt_postfix,
"voice_id": self._opts.voice,
"input_format": "pcm_44100",
"output_format": "pcm_44100",
"default_language": self._opts.default_language,
"additional_languages": self._opts.additional_languages,
"multilingual_mode": self._opts.multilingual_mode,
"audio_speed": self._opts.audio_speed,
"tools": tools_payload if len(tools_payload) > 0 else NOT_GIVEN,
"boosted_keywords": self._opts.boosted_keywords,
"min_words_to_interrupt": self._opts.min_words_to_interrupt,
"generate_no_input_poke_text": self._opts.generate_no_input_poke_text,
"no_input_poke_sec": self._opts.no_input_poke_sec,
"no_input_poke_text": self._opts.no_input_poke_text,
"no_input_end_conversation_sec": self._opts.no_input_end_conversation_sec,
}
# Filter out NOT_GIVEN values
config_filtered = typing.cast(
dict[str, typing.Any],
{k: v for k, v in config.items() if v is not NOT_GIVEN},
config_options = self._build_session_config_options_dict(
system_prompt=self._opts.instructions + self._system_prompt_postfix,
tools_payload=tools_payload,
)
await self._socket.send_config(ConfigPayload(**config_filtered))
await self._socket.send_config(ConfigPayload(**config_options))

recv_task = asyncio.create_task(self._recv_task(self._socket), name="phonic-recv")
send_task = asyncio.create_task(self._send_task(self._socket), name="phonic-send")
Expand Down
Loading