Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [3.0.5] - 2025-05-15

- cli: fix some config options not being set when defined in a config file: `topic_detection_config` and `speaker_diarization_config`

## [3.0.4] - 2025-04-16

- Support for new parameters `prefer_current_speaker` and `speaker_sensitivity` in Speaker Diarization
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.0.4
3.0.5
1 change: 1 addition & 0 deletions asr_metrics/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Entrypoint for SM metrics"""

import argparse

import asr_metrics.diarization.sm_diarization_metrics.cookbook as diarization_metrics
Expand Down
1 change: 1 addition & 0 deletions asr_metrics/wer/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Simple script to run WER analysis using Whisper normalisers
Prints results to terminal
"""

import difflib
import json
from pathlib import Path
Expand Down
22 changes: 13 additions & 9 deletions asr_metrics/wer/normalizers/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,19 @@ def remove_symbols_and_diacritics(self, s: str, keep=""):
and drop any diacritics (category 'Mn' and some manual mappings)
"""
return "".join(
c
if c in keep
else self.additional_diacritics[c]
if c in self.additional_diacritics
else ""
if unicodedata.category(c) == "Mn"
else " "
if unicodedata.category(c)[0] in "MSP"
else c
(
c
if c in keep
else (
self.additional_diacritics[c]
if c in self.additional_diacritics
else (
""
if unicodedata.category(c) == "Mn"
else " " if unicodedata.category(c)[0] in "MSP" else c
)
)
)
for c in unicodedata.normalize("NFKD", s)
)

Expand Down
1 change: 1 addition & 0 deletions examples/notification_flow/batch_transcription_client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Client module which calls the Speechmatics API
"""

import sqlite3

from speechmatics.batch_client import BatchClient
Expand Down
1 change: 1 addition & 0 deletions examples/notification_flow/cronjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

Async fallback checking in case of webhook failure is a common pattern in production systems
"""

import sqlite3

from speechmatics.batch_client import BatchClient
Expand Down
1 change: 1 addition & 0 deletions examples/notification_flow/notification_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

This is a dev example - DO NOT USE IN PRODUCTION
"""

import json
import sqlite3

Expand Down
25 changes: 9 additions & 16 deletions speechmatics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
ConnectionSettings,
RTSpeakerDiarizationConfig,
RTTranslationConfig,
ServerMessageType,
SentimentAnalysisConfig,
ServerMessageType,
SummarizationConfig,
TopicDetectionConfig,
TranscriptionConfig,
Expand Down Expand Up @@ -271,18 +271,13 @@ def get_transcription_config(
"max_delay_mode",
"diarization",
"channel_diarization_labels",
"speaker_diarization_sensitivity",
"speaker_diarization_max_speakers",
]:
if args.get(option) is not None:
config[option] = args[option]
for option in [
"streaming_mode",
"enable_partials",
"enable_entities",
"enable_translation_partials",
"enable_transcription_partials",
"speaker_diarization_prefer_current_speaker",
]:
config[option] = True if args.get(option) else config.get(option)

Expand Down Expand Up @@ -355,17 +350,15 @@ def get_transcription_config(
diarization_config = config.get("speaker_diarization_config", {})
if diarization_config or args.get("diarization") == "speaker":
max_speakers = args.get(
"speaker_diarization_max_speakers",
diarization_config.get("speaker_diarization_max_speakers", None),
)
"speaker_diarization_max_speakers"
) or diarization_config.get("max_speakers", None)
prefer_current_speaker = args.get(
"speaker_diarization_prefer_current_speaker",
diarization_config.get("speaker_diarization_prefer_current_speaker", None),
)
"speaker_diarization_prefer_current_speaker"
) or diarization_config.get("prefer_current_speaker", None)
speaker_sensitivity = args.get(
"speaker_diarization_sensitivity",
diarization_config.get("speaker_diarization_sensitivity", None),
)
"speaker_diarization_sensitivity"
) or diarization_config.get("speaker_sensitivity", None)

if args["mode"] == "rt":
config["speaker_diarization_config"] = RTSpeakerDiarizationConfig(
max_speakers=max_speakers,
Expand Down Expand Up @@ -434,7 +427,7 @@ def get_transcription_config(
args_topic_detection = args.get("detect_topics")
if args_topic_detection or file_topic_detection_config is not None:
topic_detection_config = TopicDetectionConfig()
topics = args.get("topics", file_topic_detection_config.get("topics"))
topics = args.get("topics") or file_topic_detection_config.get("topics")
if topics:
topic_detection_config.topics = topics
config["topic_detection_config"] = topic_detection_config
Expand Down
24 changes: 12 additions & 12 deletions speechmatics/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,13 +140,13 @@ def _set_recognition_config(self):
"transcription_config": self.transcription_config.as_config(),
}
if self.transcription_config.translation_config is not None:
msg[
"translation_config"
] = self.transcription_config.translation_config.asdict()
msg["translation_config"] = (
self.transcription_config.translation_config.asdict()
)
if self.transcription_config.audio_events_config is not None:
msg[
"audio_events_config"
] = self.transcription_config.audio_events_config.asdict()
msg["audio_events_config"] = (
self.transcription_config.audio_events_config.asdict()
)
self._call_middleware(ClientMessageType.SetRecognitionConfig, msg, False)
return msg

Expand All @@ -168,13 +168,13 @@ def _start_recognition(self, audio_settings):
"transcription_config": self.transcription_config.as_config(),
}
if self.transcription_config.translation_config is not None:
msg[
"translation_config"
] = self.transcription_config.translation_config.asdict()
msg["translation_config"] = (
self.transcription_config.translation_config.asdict()
)
if self.transcription_config.audio_events_config is not None:
msg[
"audio_events_config"
] = self.transcription_config.audio_events_config.asdict()
msg["audio_events_config"] = (
self.transcription_config.audio_events_config.asdict()
)
self.session_running = True
self._call_middleware(ClientMessageType.StartRecognition, msg, False)
LOGGER.debug(msg)
Expand Down
6 changes: 6 additions & 0 deletions tests/data/transcription_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
"language": "xy",
"domain": "fake",
"enable_entities": true,
"diarization": "speaker",
"speaker_diarization_config": {
"prefer_current_speaker": true,
"max_speakers": 5,
"speaker_sensitivity": 0.3
},
"translation_config": {
"target_languages": ["es"],
"enable_partials": false
Expand Down
17 changes: 15 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import pytest
import toml

from speechmatics import cli
from speechmatics import cli_parser
from speechmatics import cli, cli_parser
from speechmatics.constants import (
BATCH_SELF_SERVICE_URL,
RT_SELF_SERVICE_URL,
Expand Down Expand Up @@ -771,6 +770,12 @@ def test_rt_main_with_config_file(mock_server):
assert msg["transcription_config"]["domain"] == "fake"
assert msg["transcription_config"]["enable_entities"] is True
assert msg["transcription_config"].get("operating_point") is None
assert msg["transcription_config"]["diarization"] == "speaker"
assert msg["transcription_config"]["speaker_diarization_config"] == {
"prefer_current_speaker": True,
"max_speakers": 5,
"speaker_sensitivity": 0.3,
}
assert msg["translation_config"] is not None
assert msg["translation_config"]["enable_partials"] is False
assert msg["translation_config"]["target_languages"] == ["es"]
Expand All @@ -795,6 +800,8 @@ def test_rt_main_with_config_file_cmdline_override(mock_server):
"--output-locale=en-US",
"--domain=different",
"--operating-point=enhanced",
"--speaker-diarization-max-speakers=3",
"--speaker-diarization-sensitivity=0.7",
audio_path,
]

Expand All @@ -816,6 +823,12 @@ def test_rt_main_with_config_file_cmdline_override(mock_server):
assert msg["transcription_config"]["enable_entities"] is True
assert msg["transcription_config"]["output_locale"] == "en-US"
assert msg["transcription_config"]["operating_point"] == "enhanced"
assert msg["transcription_config"]["diarization"] == "speaker"
assert msg["transcription_config"]["speaker_diarization_config"] == {
"prefer_current_speaker": True,
"max_speakers": 3,
"speaker_sensitivity": 0.7,
}
assert msg["translation_config"] is not None
assert msg["translation_config"]["enable_partials"] is True
assert msg["translation_config"]["target_languages"] == ["fr"]
Expand Down