Skip to content

Commit f4aaf75

Browse files
xitzhangXiting ZhangCopilot
authored andcommitted
[VoiceLive] Add AgentConfig model and agent field to ResponseSession (Azure#43671)
* [VoiceLive] Add async function-calling agent sample * add phrase list * fix typo * Update sdk/ai/azure-ai-voicelive/samples/async_function_calling_sample.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update sdk/ai/azure-ai-voicelive/samples/async_function_calling_sample.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * update * fix typo * update changelog * update * remove breaking change section * update changelog * fix change log * revert changelog I lost * update version and change log * enable type verification * update * [VoiceLive] Relase 1.0.0b4 * [VoiceLive] Add AgentConfig model and agent field to ResponseSession * add release date * update change log --------- Co-authored-by: Xiting Zhang <xitzhang@microsoft.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 377c239 commit f4aaf75

File tree

4 files changed

+12
-26
lines changed

4 files changed

+12
-26
lines changed

sdk/ai/azure-ai-voicelive/CHANGELOG.md

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
# Release History
22

3-
## 1.0.1 (Unreleased)
3+
## 1.0.1 (2025-10-28)
44

55
### Features Added
66

7-
### Breaking Changes
8-
9-
### Bugs Fixed
10-
11-
### Other Changes
7+
- Added support for Agent configuration through the new `AgentConfig` model
8+
- Added `agent` field to `ResponseSession` model to support agent-based conversations
9+
- The `AgentConfig` model includes properties for agent type, name, description, agent_id, and thread_id
1210

1311
## 1.0.0 (2025-10-01)
1412

sdk/ai/azure-ai-voicelive/apiview-properties.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"CrossLanguagePackageId": "VoiceLive",
33
"CrossLanguageDefinitionId": {
4+
"azure.ai.voicelive.models.AgentConfig": "VoiceLive.AgentConfig",
45
"azure.ai.voicelive.models.Animation": "VoiceLive.Animation",
56
"azure.ai.voicelive.models.ConversationRequestItem": "VoiceLive.ConversationRequestItem",
67
"azure.ai.voicelive.models.MessageItem": "VoiceLive.MessageItem",

sdk/ai/azure-ai-voicelive/azure/ai/voicelive/models/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515

1616
from ._models import ( # type: ignore
17+
AgentConfig,
1718
Animation,
1819
AssistantMessageItem,
1920
AudioEchoCancellation,
@@ -162,6 +163,7 @@
162163
from ._patch import patch_sdk as _patch_sdk
163164

164165
__all__ = [
166+
"AgentConfig",
165167
"Animation",
166168
"AssistantMessageItem",
167169
"AudioEchoCancellation",

sdk/ai/azure-ai-voicelive/azure/ai/voicelive/models/_models.py

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,7 +1347,6 @@ class ClientEventInputAudioBufferAppend(ClientEvent, discriminator="input_audio_
13471347
mode, the audio buffer is used to detect speech and the server will decide
13481348
when to commit. When Server VAD is disabled, you must commit the audio buffer
13491349
manually.
1350-
13511350
The client may choose how much audio to place in each event up to a maximum
13521351
of 15 MiB, for example streaming smaller chunks from the client may allow the
13531352
VAD to be more responsive. Unlike made other client events, the server will
@@ -1736,7 +1735,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
17361735

17371736
class ClientEventSessionAvatarConnect(ClientEvent, discriminator="session.avatar.connect"):
17381737
"""Sent when the client connects and provides its SDP (Session Description Protocol)
1739-
17401738
for avatar-related media negotiation.
17411739
17421740
:ivar event_id:
@@ -2282,7 +2280,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
22822280

22832281
class OpenAIVoice(_Model):
22842282
"""OpenAI voice configuration with explicit type field.
2285-
22862283
This provides a unified interface for OpenAI voices, complementing the
22872284
existing string-based OpenAIVoiceName for backward compatibility.
22882285
@@ -2432,11 +2429,7 @@ class RequestSession(_Model):
24322429
:ivar instructions: Optional instructions to guide the model's behavior throughout the session.
24332430
:vartype instructions: str
24342431
:ivar input_audio_sampling_rate: Input audio sampling rate in Hz. Available values:
2435-
2436-
2437-
24382432
* For pcm16: 8000, 16000, 24000
2439-
24402433
* For g711_alaw/g711_ulaw: 8000.
24412434
:vartype input_audio_sampling_rate: int
24422435
:ivar input_audio_format: Input audio format. Default is 'pcm16'. Known values are: "pcm16",
@@ -2488,11 +2481,7 @@ class RequestSession(_Model):
24882481
"""Optional instructions to guide the model's behavior throughout the session."""
24892482
input_audio_sampling_rate: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
24902483
"""Input audio sampling rate in Hz. Available values:
2491-
2492-
2493-
24942484
* For pcm16: 8000, 16000, 24000
2495-
24962485
* For g711_alaw/g711_ulaw: 8000."""
24972486
input_audio_format: Optional[Union[str, "_models.InputAudioFormat"]] = rest_field(
24982487
visibility=["read", "create", "update", "delete", "query"]
@@ -2668,7 +2657,6 @@ class Response(_Model):
26682657
visibility=["read", "create", "update", "delete", "query"]
26692658
)
26702659
"""The final status of the response.
2671-
26722660
One of: ``completed``, ``cancelled``, ``failed``, ``incomplete``, or ``in_progress``. Known
26732661
values are: \"completed\", \"cancelled\", \"failed\", \"incomplete\", and \"in_progress\"."""
26742662
status_details: Optional["_models.ResponseStatusDetails"] = rest_field(
@@ -3262,11 +3250,7 @@ class ResponseSession(_Model):
32623250
:ivar instructions: Optional instructions to guide the model's behavior throughout the session.
32633251
:vartype instructions: str
32643252
:ivar input_audio_sampling_rate: Input audio sampling rate in Hz. Available values:
3265-
3266-
3267-
32683253
* For pcm16: 8000, 16000, 24000
3269-
32703254
* For g711_alaw/g711_ulaw: 8000.
32713255
:vartype input_audio_sampling_rate: int
32723256
:ivar input_audio_format: Input audio format. Default is 'pcm16'. Known values are: "pcm16",
@@ -3301,6 +3285,8 @@ class ResponseSession(_Model):
33013285
:ivar max_response_output_tokens: Maximum number of tokens to generate in the response. Default
33023286
is unlimited. Is either a int type or a Literal["inf"] type.
33033287
:vartype max_response_output_tokens: int or str
3288+
:ivar agent: The agent configuration for the session, if applicable.
3289+
:vartype agent: ~azure.ai.voicelive.models.AgentConfig
33043290
:ivar id: The unique identifier for the session.
33053291
:vartype id: str
33063292
"""
@@ -3320,11 +3306,7 @@ class ResponseSession(_Model):
33203306
"""Optional instructions to guide the model's behavior throughout the session."""
33213307
input_audio_sampling_rate: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
33223308
"""Input audio sampling rate in Hz. Available values:
3323-
3324-
3325-
33263309
* For pcm16: 8000, 16000, 24000
3327-
33283310
* For g711_alaw/g711_ulaw: 8000."""
33293311
input_audio_format: Optional[Union[str, "_models.InputAudioFormat"]] = rest_field(
33303312
visibility=["read", "create", "update", "delete", "query"]
@@ -3370,6 +3352,8 @@ class ResponseSession(_Model):
33703352
)
33713353
"""Maximum number of tokens to generate in the response. Default is unlimited. Is either a int
33723354
type or a Literal[\"inf\"] type."""
3355+
agent: Optional["_models.AgentConfig"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
3356+
"""The agent configuration for the session, if applicable."""
33733357
id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
33743358
"""The unique identifier for the session."""
33753359

@@ -3395,6 +3379,7 @@ def __init__(
33953379
tool_choice: Optional["_types.ToolChoice"] = None,
33963380
temperature: Optional[float] = None,
33973381
max_response_output_tokens: Optional[Union[int, Literal["inf"]]] = None,
3382+
agent: Optional["_models.AgentConfig"] = None,
33983383
id: Optional[str] = None, # pylint: disable=redefined-builtin
33993384
) -> None: ...
34003385

0 commit comments

Comments
 (0)