Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions google/genai/live.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,12 @@ def __init__(
api_client: BaseApiClient,
websocket: ClientConnection,
session_id: Optional[str] = None,
setup_complete: Optional[types.LiveServerSetupComplete] = None,
):
self._api_client = api_client
self._ws = websocket
self.session_id = session_id
self.setup_complete = setup_complete

async def send(
self,
Expand Down Expand Up @@ -1131,12 +1133,15 @@ async def connect(
)
if setup_response.setup_complete:
session_id = setup_response.setup_complete.session_id
setup_complete = setup_response.setup_complete
else:
session_id = None
setup_complete = None
yield AsyncSession(
api_client=self._api_client,
websocket=ws,
session_id=session_id,
setup_complete=setup_complete,
)


Expand Down
103 changes: 103 additions & 0 deletions google/genai/tests/live/test_live.py
Original file line number Diff line number Diff line change
Expand Up @@ -2154,3 +2154,106 @@ async def mock_connect(uri, additional_headers=None, **kwargs):
assert capture['headers']['x-goog-api-key'] == 'TEST_API_KEY'
assert 'BidiGenerateContent' in capture['uri']


@pytest.mark.parametrize('vertexai', [True, False])
@pytest.mark.asyncio
async def test_async_session_setup_complete_with_voice_consent_signature(
vertexai,
):
mock_ws = AsyncMock()
mock_ws.send = AsyncMock()
mock_ws.recv = AsyncMock(
return_value=(
b'{"setupComplete": {"sessionId": "test_session_id",'
b' "voiceConsentSignature": {"signature": "test_sig_abc123"}}}'
)
)
mock_ws.close = AsyncMock()

mock_google_auth_default = Mock(return_value=(None, None))
mock_creds = Mock(token='test_token')
mock_google_auth_default.return_value = (mock_creds, None)

@contextlib.asynccontextmanager
async def mock_connect(uri, additional_headers=None, **kwargs):
yield mock_ws

@patch('google.auth.default', new=mock_google_auth_default)
@patch.object(live, 'ws_connect', new=mock_connect)
async def _test_connect():
live_module = live.AsyncLive(mock_api_client(vertexai=vertexai))
async with live_module.connect(model='test_model') as session:
assert session.setup_complete is not None
assert session.setup_complete.session_id == 'test_session_id'
assert session.setup_complete.voice_consent_signature is not None
assert (
session.setup_complete.voice_consent_signature.signature
== 'test_sig_abc123'
)

await _test_connect()


@pytest.mark.parametrize('vertexai', [False])
@pytest.mark.asyncio
async def test_bidi_setup_replicated_voice_config_with_consent(vertexai):
config = types.LiveConnectConfig(
response_modalities=['AUDIO'],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
replicated_voice_config=types.ReplicatedVoiceConfig(
mime_type='audio/wav',
voice_sample_audio=b'fake_audio_data',
consent_audio=b'fake_consent_data',
)
)
),
)
result = await get_connect_message(
mock_api_client(vertexai=vertexai), model='test_model', config=config
)

setup = result.get('setup', {})
gen_config = setup.get('generationConfig', {})
speech_config = gen_config.get('speechConfig', {})
voice_config = speech_config.get('voice_config', {})
replicated = voice_config.get('replicated_voice_config', {})

assert replicated.get('mime_type') == 'audio/wav'
assert replicated.get('voice_sample_audio') is not None
assert replicated.get('consent_audio') is not None

config_with_sig = types.LiveConnectConfig(
response_modalities=['AUDIO'],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
replicated_voice_config=types.ReplicatedVoiceConfig(
mime_type='audio/wav',
voice_sample_audio=b'fake_audio_data',
voice_consent_signature=types.VoiceConsentSignature(
signature='test_sig_abc123'
),
)
)
),
)
result_with_sig = await get_connect_message(
mock_api_client(vertexai=vertexai),
model='test_model',
config=config_with_sig,
)

setup_sig = result_with_sig.get('setup', {})
gen_config_sig = setup_sig.get('generationConfig', {})
speech_config_sig = gen_config_sig.get('speechConfig', {})
voice_config_sig = speech_config_sig.get('voice_config', {})
replicated_sig = voice_config_sig.get('replicated_voice_config', {})

assert replicated_sig.get('mime_type') == 'audio/wav'
assert replicated_sig.get('voice_sample_audio') is not None
assert replicated_sig.get('voice_consent_signature') is not None
assert (
replicated_sig['voice_consent_signature'].get('signature')
== 'test_sig_abc123'
)

62 changes: 62 additions & 0 deletions google/genai/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4901,6 +4901,29 @@ class ToolConfigDict(TypedDict, total=False):
ToolConfigOrDict = Union[ToolConfig, ToolConfigDict]


class VoiceConsentSignature(_common.BaseModel):
"""The signature of the voice consent check."""

signature: Optional[str] = Field(
default=None,
description="""The signature string.
""",
)


class VoiceConsentSignatureDict(TypedDict, total=False):
"""The signature of the voice consent check."""

signature: Optional[str]
"""The signature string.
"""


VoiceConsentSignatureOrDict = Union[
VoiceConsentSignature, VoiceConsentSignatureDict
]


class ReplicatedVoiceConfig(_common.BaseModel):
"""ReplicatedVoiceConfig is used to configure replicated voice."""

Expand All @@ -4914,6 +4937,20 @@ class ReplicatedVoiceConfig(_common.BaseModel):
description="""The sample audio of the replicated voice.
""",
)
consent_audio: Optional[bytes] = Field(
default=None,
description="""Recorded consent verifying ownership of the voice. This
represents 16-bit signed little-endian wav data, with a 24kHz sampling
rate.""",
)
voice_consent_signature: Optional[VoiceConsentSignature] = Field(
default=None,
description="""Signature of a previously verified consent audio. This should be
populated with a signature generated by the server for a previous
request containing the consent_audio field. When provided, the
signature is verified instead of the consent_audio field to reduce
latency. Requests will fail if the signature is invalid or expired.""",
)


class ReplicatedVoiceConfigDict(TypedDict, total=False):
Expand All @@ -4927,6 +4964,18 @@ class ReplicatedVoiceConfigDict(TypedDict, total=False):
"""The sample audio of the replicated voice.
"""

consent_audio: Optional[bytes]
"""Recorded consent verifying ownership of the voice. This
represents 16-bit signed little-endian wav data, with a 24kHz sampling
rate."""

voice_consent_signature: Optional[VoiceConsentSignatureDict]
"""Signature of a previously verified consent audio. This should be
populated with a signature generated by the server for a previous
request containing the consent_audio field. When provided, the
signature is verified instead of the consent_audio field to reduce
latency. Requests will fail if the signature is invalid or expired."""


ReplicatedVoiceConfigOrDict = Union[
ReplicatedVoiceConfig, ReplicatedVoiceConfigDict
Expand Down Expand Up @@ -17481,6 +17530,13 @@ class LiveServerSetupComplete(_common.BaseModel):
session_id: Optional[str] = Field(
default=None, description="""The session id of the live session."""
)
voice_consent_signature: Optional[VoiceConsentSignature] = Field(
default=None,
description="""Signature of the verified consent audio. This is populated when the
request has a ReplicatedVoiceConfig with consent_audio set, if the consent
verification was successful. This may be used in a subsequent request
instead of the consent_audio to verify the same consent.""",
)


class LiveServerSetupCompleteDict(TypedDict, total=False):
Expand All @@ -17489,6 +17545,12 @@ class LiveServerSetupCompleteDict(TypedDict, total=False):
session_id: Optional[str]
"""The session id of the live session."""

voice_consent_signature: Optional[VoiceConsentSignatureDict]
"""Signature of the verified consent audio. This is populated when the
request has a ReplicatedVoiceConfig with consent_audio set, if the consent
verification was successful. This may be used in a subsequent request
instead of the consent_audio to verify the same consent."""


LiveServerSetupCompleteOrDict = Union[
LiveServerSetupComplete, LiveServerSetupCompleteDict
Expand Down
Loading