Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [3.0.6] - 2025-05-20

### Added

- Support end-of-utterance messages (DEL-24982)

## [3.0.5] - 2025-05-15

- cli: fix some config options not being set when defined in a config file: `topic_detection_config` and `speaker_diarization_config`
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.0.5
3.0.6
16 changes: 16 additions & 0 deletions speechmatics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,13 @@ def get_transcription_config(
]:
config[option] = True if args.get(option) else config.get(option)

if args.get("end_of_utterance_silence_trigger") is not None:
config["conversation_config"] = {
"end_of_utterance_silence_trigger": args.get(
"end_of_utterance_silence_trigger"
)
}

if args.get("volume_threshold") is not None:
config["audio_filtering_config"] = {
"volume_threshold": args.get("volume_threshold")
Expand Down Expand Up @@ -556,6 +563,13 @@ def audio_event_handler(message):
sys.stdout.write(f"{escape_seq}[{event_name}]\n")
transcripts.text += f"[{event_name}] "

def end_of_utterance_handler(message):
if print_json:
print(json.dumps(message))
return
sys.stdout.write("[EndOfUtterance]\n")
transcripts.text += "[EndOfUtterance]"

def partial_translation_handler(message):
if print_json:
print(json.dumps(message))
Expand Down Expand Up @@ -590,6 +604,8 @@ def end_of_transcript_handler(_):
# print transcription (if text was requested without translation)

api.add_event_handler(ServerMessageType.AudioEventStarted, audio_event_handler)
api.add_event_handler(ServerMessageType.EndOfUtterance, end_of_utterance_handler)

if print_json:
if enable_partials or enable_translation_partials:
api.add_event_handler(
Expand Down
9 changes: 9 additions & 0 deletions speechmatics/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,15 @@ def get_arg_parser():
default=None,
help=("Filter out quiet audio which falls below this threshold (0.0-100.0)"),
)
config_parser.add_argument(
"--end-of-utterance-silence-trigger",
dest="end_of_utterance_silence_trigger",
type=float,
default=None,
help=(
"Generate an EndOfUtterance message from the server after this many seconds of silence (0.0-2.0)"
),
)
config_parser.add_argument(
"--remove-disfluencies",
default=None,
Expand Down
14 changes: 14 additions & 0 deletions speechmatics/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,14 @@ def asdict(self):
return asdict(self)


@dataclass
class ConversationConfig:
"""Conversation config."""

end_of_utterance_silence_trigger: Optional[float] = None
"""How much silence in seconds is required to trigger end of utterance detection."""


@dataclass
class RTTranslationConfig(TranslationConfig):
"""Real-time mode: Translation config."""
Expand Down Expand Up @@ -287,6 +295,9 @@ class TranscriptionConfig(_TranscriptionConfig):
"""Indicates if partial translation, where words are produced
immediately, is enabled."""

conversation_config: Optional[ConversationConfig] = None
"""Optional configuration for end-of-utterance detection."""

translation_config: Optional[TranslationConfig] = None
"""Optional configuration for translation."""

Expand Down Expand Up @@ -550,6 +561,9 @@ class ServerMessageType(str, Enum):
AddTranscript = "AddTranscript"
"""Indicates the final transcript of a part of the audio."""

EndOfUtterance = "EndOfUtterance"
"""Indicates that an utterance has ended, based on silence"""

AudioEventStarted = "AudioEventStarted"
"""Indicates the start of an audio event."""

Expand Down
12 changes: 11 additions & 1 deletion tests/mock_rt_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,15 @@ def dummy_add_partial_transcript():
}


def dummy_end_of_utterance():
"""Returns a dummy EndOfUtterance message."""
return {
"message": "EndOfUtterance",
"format": "2.1",
"metadata": {"start_time": 3.0, "end_time": 3.0},
}


def dummy_add_transcript():
"""Returns a dummy AddTranscript message."""
return {
Expand Down Expand Up @@ -194,9 +203,10 @@ def get_responses(message, is_binary=False):
)
mock_server_handler.next_audio_seq_no += 1

# Answer immediately with a partial and a final.
# Answer immediately with a partial and a final and an end of utterance.
responses.append(dummy_add_partial_transcript())
responses.append(dummy_add_transcript())
responses.append(dummy_end_of_utterance())
else:
msg_name = message.get("message")
if not msg_name:
Expand Down
4 changes: 4 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@
{"enable_translation_partials": True},
),
(["rt", "transcribe", "--enable-entities"], {"enable_entities": True}),
(
["rt", "transcribe", "--end-of-utterance-silence-trigger=1.8"],
{"end_of_utterance_silence_trigger": 1.8},
),
(["batch", "transcribe", "--enable-entities"], {"enable_entities": True}),
(
["batch", "transcribe", "--diarization=speaker"],
Expand Down
14 changes: 14 additions & 0 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,17 @@ def test_notification_config(params, want):
def test_audio_events_config_config(params, want):
audio_events_config = models.AudioEventsConfig(**params)
assert audio_events_config.asdict() == want


@mark.parametrize(
"params, want",
[
param(
{"end_of_utterance_silence_trigger": 1.8},
{"end_of_utterance_silence_trigger": 1.8},
),
],
)
def test_conversation_config(params, want):
conversation_config = models.ConversationConfig(**params)
assert asdict(conversation_config) == want