[VoiceLive] Add AgentConfig model and agent field to ResponseSession (Azure#43671)

xitzhang · Xiting Zhang · Copilot · JennyPng · commit f4aaf75f5ee7 · 2025-10-31T12:08:24.000-07:00
* [VoiceLive] Add async function-calling agent sample

* add phrase list

* fix typo

* Update sdk/ai/azure-ai-voicelive/samples/async_function_calling_sample.py

Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;

* Update sdk/ai/azure-ai-voicelive/samples/async_function_calling_sample.py

Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;

* update

* fix typo

* update changelog

* update

* remove breaking change section

* update changelog

* fix change log

* revert changelog I lost

* update version and change log

* enable type verification

* update

* [VoiceLive] Relase 1.0.0b4

* [VoiceLive] Add AgentConfig model and agent field to ResponseSession

* add release date

* update change log

---------

Co-authored-by: Xiting Zhang &lt;xitzhang@microsoft.com&gt;
Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/sdk/ai/azure-ai-voicelive/CHANGELOG.md b/sdk/ai/azure-ai-voicelive/CHANGELOG.md
@@ -1,14 +1,12 @@
 # Release History
 
-## 1.0.1 (Unreleased)
+## 1.0.1 (2025-10-28)
 
 ### Features Added
 
-### Breaking Changes
-
-### Bugs Fixed
-
-### Other Changes
+- Added support for Agent configuration through the new `AgentConfig` model
+- Added `agent` field to `ResponseSession` model to support agent-based conversations
+- The `AgentConfig` model includes properties for agent type, name, description, agent_id, and thread_id
 
 ## 1.0.0 (2025-10-01)
 
diff --git a/sdk/ai/azure-ai-voicelive/apiview-properties.json b/sdk/ai/azure-ai-voicelive/apiview-properties.json
@@ -1,6 +1,7 @@
 {
     "CrossLanguagePackageId": "VoiceLive",
     "CrossLanguageDefinitionId": {
+        "azure.ai.voicelive.models.AgentConfig": "VoiceLive.AgentConfig",
         "azure.ai.voicelive.models.Animation": "VoiceLive.Animation",
         "azure.ai.voicelive.models.ConversationRequestItem": "VoiceLive.ConversationRequestItem",
         "azure.ai.voicelive.models.MessageItem": "VoiceLive.MessageItem",
diff --git a/sdk/ai/azure-ai-voicelive/azure/ai/voicelive/models/__init__.py b/sdk/ai/azure-ai-voicelive/azure/ai/voicelive/models/__init__.py
@@ -14,6 +14,7 @@
 
 
 from ._models import (  # type: ignore
+    AgentConfig,
     Animation,
     AssistantMessageItem,
     AudioEchoCancellation,
@@ -162,6 +163,7 @@
 from ._patch import patch_sdk as _patch_sdk
 
 __all__ = [
+    "AgentConfig",
     "Animation",
     "AssistantMessageItem",
     "AudioEchoCancellation",
diff --git a/sdk/ai/azure-ai-voicelive/azure/ai/voicelive/models/_models.py b/sdk/ai/azure-ai-voicelive/azure/ai/voicelive/models/_models.py
@@ -1347,7 +1347,6 @@ class ClientEventInputAudioBufferAppend(ClientEvent, discriminator="input_audio_
     mode, the audio buffer is used to detect speech and the server will decide
     when to commit. When Server VAD is disabled, you must commit the audio buffer
     manually.
-
     The client may choose how much audio to place in each event up to a maximum
     of 15 MiB, for example streaming smaller chunks from the client may allow the
     VAD to be more responsive. Unlike made other client events, the server will
@@ -1736,7 +1735,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
 
 class ClientEventSessionAvatarConnect(ClientEvent, discriminator="session.avatar.connect"):
     """Sent when the client connects and provides its SDP (Session Description Protocol)
-
     for avatar-related media negotiation.
 
     :ivar event_id:
@@ -2282,7 +2280,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
 
 class OpenAIVoice(_Model):
     """OpenAI voice configuration with explicit type field.
-
     This provides a unified interface for OpenAI voices, complementing the
     existing string-based OpenAIVoiceName for backward compatibility.
 
@@ -2432,11 +2429,7 @@ class RequestSession(_Model):
     :ivar instructions: Optional instructions to guide the model's behavior throughout the session.
     :vartype instructions: str
     :ivar input_audio_sampling_rate: Input audio sampling rate in Hz. Available values:
-
-
-
      * For pcm16: 8000, 16000, 24000
-
      * For g711_alaw/g711_ulaw: 8000.
     :vartype input_audio_sampling_rate: int
     :ivar input_audio_format: Input audio format. Default is 'pcm16'. Known values are: "pcm16",
@@ -2488,11 +2481,7 @@ class RequestSession(_Model):
     """Optional instructions to guide the model's behavior throughout the session."""
     input_audio_sampling_rate: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Input audio sampling rate in Hz. Available values:
-     
-     
-     
      * For pcm16: 8000, 16000, 24000
-     
      * For g711_alaw/g711_ulaw: 8000."""
     input_audio_format: Optional[Union[str, "_models.InputAudioFormat"]] = rest_field(
         visibility=["read", "create", "update", "delete", "query"]
@@ -2668,7 +2657,6 @@ class Response(_Model):
         visibility=["read", "create", "update", "delete", "query"]
     )
     """The final status of the response.
-     
      One of: ``completed``, ``cancelled``, ``failed``, ``incomplete``, or ``in_progress``. Known
      values are: \"completed\", \"cancelled\", \"failed\", \"incomplete\", and \"in_progress\"."""
     status_details: Optional["_models.ResponseStatusDetails"] = rest_field(
@@ -3262,11 +3250,7 @@ class ResponseSession(_Model):
     :ivar instructions: Optional instructions to guide the model's behavior throughout the session.
     :vartype instructions: str
     :ivar input_audio_sampling_rate: Input audio sampling rate in Hz. Available values:
-
-
-
      * For pcm16: 8000, 16000, 24000
-
      * For g711_alaw/g711_ulaw: 8000.
     :vartype input_audio_sampling_rate: int
     :ivar input_audio_format: Input audio format. Default is 'pcm16'. Known values are: "pcm16",
@@ -3301,6 +3285,8 @@ class ResponseSession(_Model):
     :ivar max_response_output_tokens: Maximum number of tokens to generate in the response. Default
      is unlimited. Is either a int type or a Literal["inf"] type.
     :vartype max_response_output_tokens: int or str
+    :ivar agent: The agent configuration for the session, if applicable.
+    :vartype agent: ~azure.ai.voicelive.models.AgentConfig
     :ivar id: The unique identifier for the session.
     :vartype id: str
     """
@@ -3320,11 +3306,7 @@ class ResponseSession(_Model):
     """Optional instructions to guide the model's behavior throughout the session."""
     input_audio_sampling_rate: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Input audio sampling rate in Hz. Available values:
-     
-     
-     
      * For pcm16: 8000, 16000, 24000
-     
      * For g711_alaw/g711_ulaw: 8000."""
     input_audio_format: Optional[Union[str, "_models.InputAudioFormat"]] = rest_field(
         visibility=["read", "create", "update", "delete", "query"]
@@ -3370,6 +3352,8 @@ class ResponseSession(_Model):
     )
     """Maximum number of tokens to generate in the response. Default is unlimited. Is either a int
      type or a Literal[\"inf\"] type."""
+    agent: Optional["_models.AgentConfig"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The agent configuration for the session, if applicable."""
     id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The unique identifier for the session."""
 
@@ -3395,6 +3379,7 @@ def __init__(
         tool_choice: Optional["_types.ToolChoice"] = None,
         temperature: Optional[float] = None,
         max_response_output_tokens: Optional[Union[int, Literal["inf"]]] = None,
+        agent: Optional["_models.AgentConfig"] = None,
         id: Optional[str] = None,  # pylint: disable=redefined-builtin
     ) -> None: ...
 

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"CrossLanguagePackageId": "VoiceLive",`
`3`	`3`	`"CrossLanguageDefinitionId": {`
	`4`	`+ "azure.ai.voicelive.models.AgentConfig": "VoiceLive.AgentConfig",`
`4`	`5`	`"azure.ai.voicelive.models.Animation": "VoiceLive.Animation",`
`5`	`6`	`"azure.ai.voicelive.models.ConversationRequestItem": "VoiceLive.ConversationRequestItem",`
`6`	`7`	`"azure.ai.voicelive.models.MessageItem": "VoiceLive.MessageItem",`