From f83743dd95958374931b2707b3353439f2300d1b Mon Sep 17 00:00:00 2001
From: James Gilmore <jamesg@speechmatics.com>
Date: Fri, 28 Mar 2025 09:29:26 +0000
Subject: [PATCH 1/8] first commit, deal with end-of-utterance markers in
 responses

---
 speechmatics/cli.py    | 8 ++++++++
 speechmatics/models.py | 3 +++
 2 files changed, 11 insertions(+)

diff --git a/speechmatics/cli.py b/speechmatics/cli.py
index 6aff1fd..6fcd700 100755
--- a/speechmatics/cli.py
+++ b/speechmatics/cli.py
@@ -547,6 +547,13 @@ def audio_event_handler(message):
         sys.stdout.write(f"{escape_seq}[{event_name}]\n")
         transcripts.text += f"[{event_name}] "
 
+    def end_of_utterance_handler(message):
+        if print_json:
+            print(json.dumps(message))
+            return
+        sys.stdout.write("[EndOfUtterance]\n")
+        transcripts.text += "[EndOfUtterance]"
+
     def partial_translation_handler(message):
         if print_json:
             print(json.dumps(message))
@@ -594,6 +601,7 @@ def end_of_transcript_handler(_):
                 partial_transcript_handler,
             )
         api.add_event_handler(ServerMessageType.AddTranscript, transcript_handler)
+        api.add_event_handler(ServerMessageType.EndOfUtterance, end_of_utterance_handler)
     else:
         if translation_config is not None:
             if enable_partials or enable_translation_partials:
diff --git a/speechmatics/models.py b/speechmatics/models.py
index 0becc9f..156238f 100644
--- a/speechmatics/models.py
+++ b/speechmatics/models.py
@@ -531,6 +531,9 @@ class ServerMessageType(str, Enum):
     AddTranscript = "AddTranscript"
     """Indicates the final transcript of a part of the audio."""
 
+    EndOfUtterance = "EndOfUtterance"
+    """Indicates that an utterance has ended, based on silence"""
+
     AudioEventStarted = "AudioEventStarted"
     """Indicates the start of an audio event."""
 

From f5f8353bcdd31316eac5538ac640dba2095c7266 Mon Sep 17 00:00:00 2001
From: James Gilmore <jamesg@speechmatics.com>
Date: Thu, 15 May 2025 16:56:58 +0100
Subject: [PATCH 2/8] Print to stdout even in non-json mode

---
 speechmatics/cli.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/speechmatics/cli.py b/speechmatics/cli.py
index 6fcd700..6e3b72a 100755
--- a/speechmatics/cli.py
+++ b/speechmatics/cli.py
@@ -588,6 +588,8 @@ def end_of_transcript_handler(_):
     # print transcription (if text was requested without translation)
 
     api.add_event_handler(ServerMessageType.AudioEventStarted, audio_event_handler)
+    api.add_event_handler(ServerMessageType.EndOfUtterance, end_of_utterance_handler)
+
     if print_json:
         if enable_partials or enable_translation_partials:
             api.add_event_handler(
@@ -601,7 +603,6 @@ def end_of_transcript_handler(_):
                 partial_transcript_handler,
             )
         api.add_event_handler(ServerMessageType.AddTranscript, transcript_handler)
-        api.add_event_handler(ServerMessageType.EndOfUtterance, end_of_utterance_handler)
     else:
         if translation_config is not None:
             if enable_partials or enable_translation_partials:

From 704c5ded6bd9f23e35745858151e120c011530c8 Mon Sep 17 00:00:00 2001
From: James Gilmore <jamesg@speechmatics.com>
Date: Fri, 16 May 2025 13:12:37 +0100
Subject: [PATCH 3/8] Add to arguments

---
 speechmatics/cli_parser.py | 7 +++++++
 speechmatics/models.py     | 9 +++++++++
 2 files changed, 16 insertions(+)

diff --git a/speechmatics/cli_parser.py b/speechmatics/cli_parser.py
index e0e5d4b..b1f74fd 100644
--- a/speechmatics/cli_parser.py
+++ b/speechmatics/cli_parser.py
@@ -218,6 +218,13 @@ def get_arg_parser():
         default=None,
         help=("Filter out quiet audio which falls below this threshold (0.0-100.0)"),
     )
+    config_parser.add_argument(
+        "--end-of-utterance-silence-trigger",
+        dest="end_of_utterance_silence_trigger",
+        type=float,
+        default=None,
+        help=("Generate an EndOfUtterance message from the server after this many seconds of silence."),
+    )
     config_parser.add_argument(
         "--remove-disfluencies",
         default=None,
diff --git a/speechmatics/models.py b/speechmatics/models.py
index 156238f..f3e6bba 100644
--- a/speechmatics/models.py
+++ b/speechmatics/models.py
@@ -153,7 +153,13 @@ class TranslationConfig:
     def asdict(self):
         return asdict(self)
 
+@dataclass
+class ConversationConfig:
+    """Conversation config."""
 
+    end_of_utterance_silence_trigger: Optional[float] = None
+    """How much silence in seconds is required to trigger end of utterance detection."""
+    
 @dataclass
 class RTTranslationConfig(TranslationConfig):
     """Real-time mode: Translation config."""
@@ -268,6 +274,9 @@ class TranscriptionConfig(_TranscriptionConfig):
     """Indicates if partial translation, where words are produced
     immediately, is enabled."""
 
+    conversation_config: Optional[ConversationConfig] = None
+    """Optional configuration for end-of-utterance detection."""
+
     translation_config: Optional[TranslationConfig] = None
     """Optional configuration for translation."""
 

From 90e6a45682b1c1777667f034af3bb4d07735b361 Mon Sep 17 00:00:00 2001
From: James Gilmore <jamesg@speechmatics.com>
Date: Fri, 16 May 2025 13:17:14 +0100
Subject: [PATCH 4/8] Pass argument through

---
 speechmatics/cli.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/speechmatics/cli.py b/speechmatics/cli.py
index 6e3b72a..8e1bb8b 100755
--- a/speechmatics/cli.py
+++ b/speechmatics/cli.py
@@ -284,6 +284,11 @@ def get_transcription_config(
     ]:
         config[option] = True if args.get(option) else config.get(option)
 
+    if args.get("end_of_utterance_silence_trigger") is not None:
+        config["conversation_config"] = {
+            "end_of_utterance_silence_trigger": args.get("end_of_utterance_silence_trigger")
+        }
+
     if args.get("volume_threshold") is not None:
         config["audio_filtering_config"] = {
             "volume_threshold": args.get("volume_threshold")

From 342c75fb58760e2e41fcc830aadabe465fe4dea8 Mon Sep 17 00:00:00 2001
From: James Gilmore <jamesg@speechmatics.com>
Date: Mon, 19 May 2025 16:10:15 +0100
Subject: [PATCH 5/8] doc limits

---
 speechmatics/cli_parser.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/speechmatics/cli_parser.py b/speechmatics/cli_parser.py
index b1f74fd..c8bac10 100644
--- a/speechmatics/cli_parser.py
+++ b/speechmatics/cli_parser.py
@@ -223,7 +223,8 @@ def get_arg_parser():
         dest="end_of_utterance_silence_trigger",
         type=float,
         default=None,
-        help=("Generate an EndOfUtterance message from the server after this many seconds of silence."),
+        help=("Generate an EndOfUtterance message from the server after this many seconds of silence (0.0-2.0)"),
+
     )
     config_parser.add_argument(
         "--remove-disfluencies",

From 6529bf9f5baf02b0a428a3f216193be31f5f6506 Mon Sep 17 00:00:00 2001
From: James Gilmore <jamesg@speechmatics.com>
Date: Mon, 19 May 2025 16:11:48 +0100
Subject: [PATCH 6/8] changelog and version

---
 CHANGELOG.md | 6 ++++++
 VERSION      | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c3a5ff8..383374f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [3.0.4] - 2025-05-19
+
+### Added
+
+- Support end-of-utterance messages (DEL-24982)
+
 ## [3.0.3] - 2025-03-03
 
 ### Added
diff --git a/VERSION b/VERSION
index 75a22a2..b0f2dcb 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-3.0.3
+3.0.4

From 76be3cc21470c032d675c854dc6efa865b3b9f5f Mon Sep 17 00:00:00 2001
From: James Gilmore <jamesg@speechmatics.com>
Date: Tue, 20 May 2025 09:54:01 +0100
Subject: [PATCH 7/8] lint

---
 speechmatics/models.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/speechmatics/models.py b/speechmatics/models.py
index f3e6bba..ed5253d 100644
--- a/speechmatics/models.py
+++ b/speechmatics/models.py
@@ -153,13 +153,15 @@ class TranslationConfig:
     def asdict(self):
         return asdict(self)
 
+
 @dataclass
 class ConversationConfig:
     """Conversation config."""
 
     end_of_utterance_silence_trigger: Optional[float] = None
     """How much silence in seconds is required to trigger end of utterance detection."""
-    
+
+
 @dataclass
 class RTTranslationConfig(TranslationConfig):
     """Real-time mode: Translation config."""

From 087dc125efe2a89ccd7fd313132885601cbb72d3 Mon Sep 17 00:00:00 2001
From: James Gilmore <jamesg@speechmatics.com>
Date: Tue, 20 May 2025 10:17:12 +0100
Subject: [PATCH 8/8] CLI tests

---
 speechmatics/cli.py        |  4 +++-
 speechmatics/cli_parser.py |  5 +++--
 tests/mock_rt_server.py    | 12 +++++++++++-
 tests/test_cli.py          |  4 ++++
 tests/test_models.py       | 14 ++++++++++++++
 5 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/speechmatics/cli.py b/speechmatics/cli.py
index 8e1bb8b..3338c63 100755
--- a/speechmatics/cli.py
+++ b/speechmatics/cli.py
@@ -286,7 +286,9 @@ def get_transcription_config(
 
     if args.get("end_of_utterance_silence_trigger") is not None:
         config["conversation_config"] = {
-            "end_of_utterance_silence_trigger": args.get("end_of_utterance_silence_trigger")
+            "end_of_utterance_silence_trigger": args.get(
+                "end_of_utterance_silence_trigger"
+            )
         }
 
     if args.get("volume_threshold") is not None:
diff --git a/speechmatics/cli_parser.py b/speechmatics/cli_parser.py
index c8bac10..3d88dd0 100644
--- a/speechmatics/cli_parser.py
+++ b/speechmatics/cli_parser.py
@@ -223,8 +223,9 @@ def get_arg_parser():
         dest="end_of_utterance_silence_trigger",
         type=float,
         default=None,
-        help=("Generate an EndOfUtterance message from the server after this many seconds of silence (0.0-2.0)"),
-
+        help=(
+            "Generate an EndOfUtterance message from the server after this many seconds of silence (0.0-2.0)"
+        ),
     )
     config_parser.add_argument(
         "--remove-disfluencies",
diff --git a/tests/mock_rt_server.py b/tests/mock_rt_server.py
index 387c452..30f3c24 100644
--- a/tests/mock_rt_server.py
+++ b/tests/mock_rt_server.py
@@ -120,6 +120,15 @@ def dummy_add_partial_transcript():
     }
 
 
+def dummy_end_of_utterance():
+    """Returns a dummy EndOfUtterance message."""
+    return {
+        "message": "EndOfUtterance",
+        "format": "2.1",
+        "metadata": {"start_time": 3.0, "end_time": 3.0},
+    }
+
+
 def dummy_add_transcript():
     """Returns a dummy AddTranscript message."""
     return {
@@ -194,9 +203,10 @@ def get_responses(message, is_binary=False):
             )
             mock_server_handler.next_audio_seq_no += 1
 
-            # Answer immediately with a partial and a final.
+            # Answer immediately with a partial and a final and an end of utterance.
             responses.append(dummy_add_partial_transcript())
             responses.append(dummy_add_transcript())
+            responses.append(dummy_end_of_utterance())
         else:
             msg_name = message.get("message")
             if not msg_name:
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 2113f33..1698ef1 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -183,6 +183,10 @@
             {"enable_translation_partials": True},
         ),
         (["rt", "transcribe", "--enable-entities"], {"enable_entities": True}),
+        (
+            ["rt", "transcribe", "--end-of-utterance-silence-trigger=1.8"],
+            {"end_of_utterance_silence_trigger": 1.8},
+        ),
         (["batch", "transcribe", "--enable-entities"], {"enable_entities": True}),
         (
             ["batch", "transcribe", "--speaker-diarization-sensitivity=0.7"],
diff --git a/tests/test_models.py b/tests/test_models.py
index 62703cb..19d3af4 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -220,3 +220,17 @@ def test_notification_config(params, want):
 def test_audio_events_config_config(params, want):
     audio_events_config = models.AudioEventsConfig(**params)
     assert audio_events_config.asdict() == want
+
+
+@mark.parametrize(
+    "params, want",
+    [
+        param(
+            {"end_of_utterance_silence_trigger": 1.8},
+            {"end_of_utterance_silence_trigger": 1.8},
+        ),
+    ],
+)
+def test_conversation_config(params, want):
+    conversation_config = models.ConversationConfig(**params)
+    assert asdict(conversation_config) == want