Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ This will:
3. Summarize with your local LLM
4. Save everything to `~/ownscribe/YYYY-MM-DD_HHMMSS/`

On first run, WhisperX / pyannote may download model files. ownscribe shows a `Preparing models` step and best-effort download progress in the TUI while this happens.

### Options

```bash
Expand All @@ -117,6 +119,7 @@ ownscribe --template lecture # use the lecture summarization te
```bash
ownscribe devices # list audio devices (uses native CoreAudio when available)
ownscribe apps # list running apps with PIDs for use with --pid
ownscribe warmup # prefetch WhisperX/pyannote models before a meeting
ownscribe transcribe recording.wav # transcribe an audio file (saves alongside the input)
ownscribe summarize transcript.md # summarize a transcript (saves alongside the input)
ownscribe resume ./2026-02-20_1736 # resume a failed/partial pipeline in a directory
Expand All @@ -125,6 +128,14 @@ ownscribe config # open config file in $EDITOR
ownscribe cleanup # remove ownscribe data from disk
```

Use `warmup` ahead of time to avoid first-run model download delays while recording:

```bash
ownscribe warmup # prefetch Whisper model (+ diarization if enabled in config)
ownscribe warmup --language en # also prefetch alignment model for English
ownscribe warmup --with-diarization # force diarization warmup for this run
```

### Searching Meeting Notes

Use `ask` to search across all your meeting notes with natural-language questions:
Expand Down
29 changes: 29 additions & 0 deletions src/ownscribe/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,35 @@ def transcribe(
run_transcribe(config, file)


@cli.command()
@click.option("--model", default=None, help="Whisper model size.")
@click.option("--language", default=None, help="Language code to prefetch alignment model for (e.g. en, de, fr).")
@click.option(
"--with-diarization/--no-diarization",
"with_diarization",
default=None,
help="Override diarization warmup (defaults to config setting).",
)
@click.pass_context
def warmup(
ctx: click.Context,
model: str | None,
language: str | None,
with_diarization: bool | None,
) -> None:
"""Prefetch WhisperX/pyannote models to avoid first-run stalls."""
config = ctx.obj["config"]
if model:
config.transcription.model = model
if language:
config.transcription.language = language
if with_diarization is not None:
config.diarization.enabled = with_diarization

from ownscribe.pipeline import run_warmup
run_warmup(config)


@cli.command()
@click.argument("file", type=click.Path(exists=True))
@click.option("--template", default=None, help="Summarization template (meeting, lecture, brief, or custom).")
Expand Down
36 changes: 36 additions & 0 deletions src/ownscribe/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,42 @@ def run_transcribe(config: Config, audio_file: str) -> None:
_do_transcribe_and_summarize(config, audio_path, out_dir, summarize=False)


def run_warmup(config: Config) -> None:
"""Prefetch transcription/diarization models without processing audio."""
diar_enabled = config.diarization.enabled and bool(config.diarization.hf_token)
hf_token_warning = (
config.diarization.enabled and not config.diarization.hf_token
)

with PipelineProgress(diarize=False, summarize=False, transcribe=False, include_prepare=True) as progress:
try:
transcriber = _create_transcriber(config, progress=progress)
except ImportError:
click.echo(
"Error: WhisperX is not installed. Install with:\n"
" uv pip install 'ownscribe[transcription]'",
err=True,
)
raise SystemExit(1) from None

transcriber.prepare_models(language=config.transcription.language or None)

click.echo(f"Whisper model ready: {config.transcription.model}")
if config.transcription.language:
click.echo(f"Alignment model ready: {config.transcription.language}")
else:
click.echo("Alignment model not preloaded (language auto-detect).")

if diar_enabled:
click.echo("Diarization pipeline ready.")
elif hf_token_warning:
click.echo(
"Warning: Diarization enabled but no HF token configured. "
"Skipping diarization warmup.",
err=True,
)


def run_summarize(config: Config, transcript_file: str) -> None:
"""Summarize a transcript file and save the summary alongside the input."""
transcript_path = Path(transcript_file).resolve()
Expand Down
Loading