From 8ea10af1357e8b4606cb9dd3df668e627c4cfefa Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 16 Mar 2026 17:24:06 -0700 Subject: [PATCH] feat: Add consent_audio and voice_consent_signature and AsyncSession.setup_complete PiperOrigin-RevId: 884712720 --- google/genai/live.py | 5 ++ google/genai/tests/live/test_live.py | 103 +++++++++++++++++++++++++++ google/genai/types.py | 62 ++++++++++++++++ 3 files changed, 170 insertions(+) diff --git a/google/genai/live.py b/google/genai/live.py index 93953a02f..b35e918ce 100644 --- a/google/genai/live.py +++ b/google/genai/live.py @@ -92,10 +92,12 @@ def __init__( api_client: BaseApiClient, websocket: ClientConnection, session_id: Optional[str] = None, + setup_complete: Optional[types.LiveServerSetupComplete] = None, ): self._api_client = api_client self._ws = websocket self.session_id = session_id + self.setup_complete = setup_complete async def send( self, @@ -1131,12 +1133,15 @@ async def connect( ) if setup_response.setup_complete: session_id = setup_response.setup_complete.session_id + setup_complete = setup_response.setup_complete else: session_id = None + setup_complete = None yield AsyncSession( api_client=self._api_client, websocket=ws, session_id=session_id, + setup_complete=setup_complete, ) diff --git a/google/genai/tests/live/test_live.py b/google/genai/tests/live/test_live.py index 2d59ce900..513a90a3f 100644 --- a/google/genai/tests/live/test_live.py +++ b/google/genai/tests/live/test_live.py @@ -2154,3 +2154,106 @@ async def mock_connect(uri, additional_headers=None, **kwargs): assert capture['headers']['x-goog-api-key'] == 'TEST_API_KEY' assert 'BidiGenerateContent' in capture['uri'] + +@pytest.mark.parametrize('vertexai', [True, False]) +@pytest.mark.asyncio +async def test_async_session_setup_complete_with_voice_consent_signature( + vertexai, +): + mock_ws = AsyncMock() + mock_ws.send = AsyncMock() + mock_ws.recv = AsyncMock( + return_value=( + b'{"setupComplete": {"sessionId": "test_session_id",' + b' "voiceConsentSignature": {"signature": "test_sig_abc123"}}}' + ) + ) + mock_ws.close = AsyncMock() + + mock_google_auth_default = Mock(return_value=(None, None)) + mock_creds = Mock(token='test_token') + mock_google_auth_default.return_value = (mock_creds, None) + + @contextlib.asynccontextmanager + async def mock_connect(uri, additional_headers=None, **kwargs): + yield mock_ws + + @patch('google.auth.default', new=mock_google_auth_default) + @patch.object(live, 'ws_connect', new=mock_connect) + async def _test_connect(): + live_module = live.AsyncLive(mock_api_client(vertexai=vertexai)) + async with live_module.connect(model='test_model') as session: + assert session.setup_complete is not None + assert session.setup_complete.session_id == 'test_session_id' + assert session.setup_complete.voice_consent_signature is not None + assert ( + session.setup_complete.voice_consent_signature.signature + == 'test_sig_abc123' + ) + + await _test_connect() + + +@pytest.mark.parametrize('vertexai', [False]) +@pytest.mark.asyncio +async def test_bidi_setup_replicated_voice_config_with_consent(vertexai): + config = types.LiveConnectConfig( + response_modalities=['AUDIO'], + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + replicated_voice_config=types.ReplicatedVoiceConfig( + mime_type='audio/wav', + voice_sample_audio=b'fake_audio_data', + consent_audio=b'fake_consent_data', + ) + ) + ), + ) + result = await get_connect_message( + mock_api_client(vertexai=vertexai), model='test_model', config=config + ) + + setup = result.get('setup', {}) + gen_config = setup.get('generationConfig', {}) + speech_config = gen_config.get('speechConfig', {}) + voice_config = speech_config.get('voice_config', {}) + replicated = voice_config.get('replicated_voice_config', {}) + + assert replicated.get('mime_type') == 'audio/wav' + assert replicated.get('voice_sample_audio') is not None + assert replicated.get('consent_audio') is not None + + config_with_sig = types.LiveConnectConfig( + response_modalities=['AUDIO'], + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + replicated_voice_config=types.ReplicatedVoiceConfig( + mime_type='audio/wav', + voice_sample_audio=b'fake_audio_data', + voice_consent_signature=types.VoiceConsentSignature( + signature='test_sig_abc123' + ), + ) + ) + ), + ) + result_with_sig = await get_connect_message( + mock_api_client(vertexai=vertexai), + model='test_model', + config=config_with_sig, + ) + + setup_sig = result_with_sig.get('setup', {}) + gen_config_sig = setup_sig.get('generationConfig', {}) + speech_config_sig = gen_config_sig.get('speechConfig', {}) + voice_config_sig = speech_config_sig.get('voice_config', {}) + replicated_sig = voice_config_sig.get('replicated_voice_config', {}) + + assert replicated_sig.get('mime_type') == 'audio/wav' + assert replicated_sig.get('voice_sample_audio') is not None + assert replicated_sig.get('voice_consent_signature') is not None + assert ( + replicated_sig['voice_consent_signature'].get('signature') + == 'test_sig_abc123' + ) + diff --git a/google/genai/types.py b/google/genai/types.py index 609134c58..88e6a7084 100644 --- a/google/genai/types.py +++ b/google/genai/types.py @@ -4901,6 +4901,29 @@ class ToolConfigDict(TypedDict, total=False): ToolConfigOrDict = Union[ToolConfig, ToolConfigDict] +class VoiceConsentSignature(_common.BaseModel): + """The signature of the voice consent check.""" + + signature: Optional[str] = Field( + default=None, + description="""The signature string. + """, + ) + + +class VoiceConsentSignatureDict(TypedDict, total=False): + """The signature of the voice consent check.""" + + signature: Optional[str] + """The signature string. + """ + + +VoiceConsentSignatureOrDict = Union[ + VoiceConsentSignature, VoiceConsentSignatureDict +] + + class ReplicatedVoiceConfig(_common.BaseModel): """ReplicatedVoiceConfig is used to configure replicated voice.""" @@ -4914,6 +4937,20 @@ class ReplicatedVoiceConfig(_common.BaseModel): description="""The sample audio of the replicated voice. """, ) + consent_audio: Optional[bytes] = Field( + default=None, + description="""Recorded consent verifying ownership of the voice. This + represents 16-bit signed little-endian wav data, with a 24kHz sampling + rate.""", + ) + voice_consent_signature: Optional[VoiceConsentSignature] = Field( + default=None, + description="""Signature of a previously verified consent audio. This should be + populated with a signature generated by the server for a previous + request containing the consent_audio field. When provided, the + signature is verified instead of the consent_audio field to reduce + latency. Requests will fail if the signature is invalid or expired.""", + ) class ReplicatedVoiceConfigDict(TypedDict, total=False): @@ -4927,6 +4964,18 @@ class ReplicatedVoiceConfigDict(TypedDict, total=False): """The sample audio of the replicated voice. """ + consent_audio: Optional[bytes] + """Recorded consent verifying ownership of the voice. This + represents 16-bit signed little-endian wav data, with a 24kHz sampling + rate.""" + + voice_consent_signature: Optional[VoiceConsentSignatureDict] + """Signature of a previously verified consent audio. This should be + populated with a signature generated by the server for a previous + request containing the consent_audio field. When provided, the + signature is verified instead of the consent_audio field to reduce + latency. Requests will fail if the signature is invalid or expired.""" + ReplicatedVoiceConfigOrDict = Union[ ReplicatedVoiceConfig, ReplicatedVoiceConfigDict @@ -17481,6 +17530,13 @@ class LiveServerSetupComplete(_common.BaseModel): session_id: Optional[str] = Field( default=None, description="""The session id of the live session.""" ) + voice_consent_signature: Optional[VoiceConsentSignature] = Field( + default=None, + description="""Signature of the verified consent audio. This is populated when the + request has a ReplicatedVoiceConfig with consent_audio set, if the consent + verification was successful. This may be used in a subsequent request + instead of the consent_audio to verify the same consent.""", + ) class LiveServerSetupCompleteDict(TypedDict, total=False): @@ -17489,6 +17545,12 @@ class LiveServerSetupCompleteDict(TypedDict, total=False): session_id: Optional[str] """The session id of the live session.""" + voice_consent_signature: Optional[VoiceConsentSignatureDict] + """Signature of the verified consent audio. This is populated when the + request has a ReplicatedVoiceConfig with consent_audio set, if the consent + verification was successful. This may be used in a subsequent request + instead of the consent_audio to verify the same consent.""" + LiveServerSetupCompleteOrDict = Union[ LiveServerSetupComplete, LiveServerSetupCompleteDict