diff --git a/examples/voice/cli/cli.py b/examples/voice/cli/cli.py index 5dc53ab..496b072 100644 --- a/examples/voice/cli/cli.py +++ b/examples/voice/cli/cli.py @@ -163,9 +163,6 @@ async def main() -> None: # Set chunk size config.chunk_size = args.chunk_size - # Set common items - config.enable_diarization = True - # Handle config display if args.show_compact: print(config.to_json(indent=2, exclude_unset=True, exclude_none=True)) diff --git a/sdk/voice/speechmatics/voice/_client.py b/sdk/voice/speechmatics/voice/_client.py index 978d130..a376eb8 100644 --- a/sdk/voice/speechmatics/voice/_client.py +++ b/sdk/voice/speechmatics/voice/_client.py @@ -165,11 +165,11 @@ def __init__( # ------------------------------------- # Default to EXTERNAL if no config or preset string provided - if config is None and not preset: + if config is None and preset is None: config = VoiceAgentConfigPreset.EXTERNAL() # Check for preset - elif preset: + elif preset is not None: preset_config = VoiceAgentConfigPreset.load(preset) config = VoiceAgentConfigPreset._merge_configs(preset_config, config) @@ -1085,7 +1085,7 @@ async def _add_speech_fragments(self, message: dict[str, Any], is_final: bool = is_final=is_final, attaches_to=result.get("attaches_to", ""), content=alt.get("content", ""), - speaker=alt.get("speaker", None), + speaker=alt.get("speaker", "UU"), confidence=alt.get("confidence", 1.0), volume=result.get("volume", None), result={"final": is_final, **result}, @@ -1382,8 +1382,7 @@ async def _emit_segments(self, finalize: bool = False) -> None: self._turn_start_time = self._current_view.start_time # Send updated speaker metrics - if self._dz_enabled: - self._calculate_speaker_metrics(partial_segments, final_segments) + self._calculate_speaker_metrics(partial_segments, final_segments) # Emit end of turn if finalize: diff --git a/tests/tts/async_http_test.py b/tests/tts/async_http_test.py index 72b5eff..8fddca2 100644 --- a/tests/tts/async_http_test.py +++ b/tests/tts/async_http_test.py @@ -6,7 +6,7 @@ @pytest.mark.asyncio -@pytest.mark.skipif(os.getenv("SPEECHMATICS_API_KEY") is None, reason="Skipping test if API key is not set") +@pytest.mark.skipif(os.getenv("SPEECHMATICS_API_KEY") is None, reason="Skipping test if API key is not set") async def test_async_http(): async with AsyncClient() as client: async with await client.generate(text="Hello world") as response: diff --git a/tests/voice/test_15_esl.py b/tests/voice/test_15_esl.py new file mode 100644 index 0000000..bdb79b5 --- /dev/null +++ b/tests/voice/test_15_esl.py @@ -0,0 +1,47 @@ +import asyncio +import os + +import pytest +from _utils import get_client +from _utils import log_client_messages +from _utils import send_audio_file + +from speechmatics.voice._presets import VoiceAgentConfigPreset + +# Skip for CI testing +pytestmark = pytest.mark.skipif(os.getenv("CI") == "true", reason="Skipping transcription tests in CI") + +# Skip if ESL tests is not enabled +pytestmark = pytest.mark.skipif(os.getenv("TEST_ESL", "0").lower() not in ["1", "true"], reason="Skipping ESL tests") + +# Constants +API_KEY = os.getenv("SPEECHMATICS_API_KEY") +URL = os.getenv("TEST_ESL_URL", "ws://localhost:8080/v2") +SHOW_LOG = os.getenv("SPEECHMATICS_SHOW_LOG", "0").lower() in ["1", "true"] + + +@pytest.mark.asyncio +async def test_esl(): + """Local ESL inference.""" + + # Client + client = await get_client(api_key=API_KEY, url=URL, connect=False, config=VoiceAgentConfigPreset.FAST()) + + # Add listeners + log_client_messages(client) + + # Connect + await client.connect() + + # Check we are connected + assert client._is_connected + + # Load the audio file `./assets/audio_01_16kHz.wav` + await send_audio_file(client, "./assets/audio_01_16kHz.wav") + + # Wait 5 seconds + await asyncio.sleep(5) + + # Close session + await client.disconnect() + assert not client._is_connected