Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,20 +102,42 @@ openplanter-agent --provider ollama --list-models

The base URL defaults to `http://localhost:11434/v1` and can be overridden with `OPENPLANTER_OLLAMA_BASE_URL` or `--base-url`. The first request may be slow while Ollama loads the model into memory; a 120-second first-byte timeout is used automatically.

Additional service keys: `EXA_API_KEY` (web search), `VOYAGE_API_KEY` (embeddings).
Additional service keys: `EXA_API_KEY` (web search), `VOYAGE_API_KEY` (embeddings), `MISTRAL_TRANSCRIPTION_API_KEY` or `MISTRAL_API_KEY` (audio transcription).

### Audio Transcription

OpenPlanter includes an `audio_transcribe` tool backed by Mistral's offline transcription API. It accepts local workspace audio or video files, returns transcript text plus any timestamps or diarization metadata Mistral provides, and automatically falls back to overlapping chunked transcription for long recordings when `chunking` is left at `auto`.

Useful overrides:

```bash
export MISTRAL_API_KEY=...
export OPENPLANTER_MISTRAL_TRANSCRIPTION_MODEL=voxtral-mini-latest
export OPENPLANTER_MISTRAL_TRANSCRIPTION_MAX_BYTES=104857600
export OPENPLANTER_MISTRAL_TRANSCRIPTION_CHUNK_MAX_SECONDS=900
export OPENPLANTER_MISTRAL_TRANSCRIPTION_CHUNK_OVERLAP_SECONDS=2.0
```

Notes:
- The tool only accepts local workspace files.
- Long-form chunking requires `ffmpeg` and `ffprobe` to be available at runtime.
- `chunking: "force"` always chunks, and `chunking: "off"` keeps the single-upload path.
- Video inputs are audio-extracted with `ffmpeg` before transcription.

All keys can also be set with an `OPENPLANTER_` prefix (e.g. `OPENPLANTER_OPENAI_API_KEY`), via `.env` files in the workspace, or via CLI flags.

## Agent Tools

The agent has access to 19 tools, organized around its investigation workflow:
The agent has access to 20 tools, organized around its investigation workflow:

**Dataset ingestion & workspace** — `list_files`, `search_files`, `repo_map`, `read_file`, `write_file`, `edit_file`, `hashline_edit`, `apply_patch` — load, inspect, and transform source datasets; write structured findings.

**Shell execution** — `run_shell`, `run_shell_bg`, `check_shell_bg`, `kill_shell_bg` — run analysis scripts, data pipelines, and validation checks.

**Web** — `web_search` (Exa), `fetch_url` — pull public records, verify entities, and retrieve supplementary data.

**Audio** — `audio_transcribe` — transcribe local audio or video with Mistral, including optional timestamps, diarization, and automatic chunking for long recordings.

**Planning & delegation** — `think`, `subtask`, `execute`, `list_artifacts`, `read_artifact` — decompose investigations into focused sub-tasks, each with acceptance criteria and independent verification.

In **recursive mode** (the default), the agent spawns sub-agents via `subtask` and `execute` to parallelize entity resolution, cross-dataset linking, and evidence-chain construction across large investigations.
Expand Down
6 changes: 6 additions & 0 deletions agent/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def _load_credentials(
cerebras_api_key=user_creds.cerebras_api_key,
exa_api_key=user_creds.exa_api_key,
voyage_api_key=user_creds.voyage_api_key,
mistral_transcription_api_key=user_creds.mistral_transcription_api_key,
)

store = CredentialStore(workspace=cfg.workspace, session_root_dir=cfg.session_root_dir)
Expand All @@ -229,6 +230,8 @@ def _load_credentials(
creds.exa_api_key = stored.exa_api_key
if stored.voyage_api_key:
creds.voyage_api_key = stored.voyage_api_key
if stored.mistral_transcription_api_key:
creds.mistral_transcription_api_key = stored.mistral_transcription_api_key

env_creds = credentials_from_env()
if env_creds.openai_api_key:
Expand All @@ -243,6 +246,8 @@ def _load_credentials(
creds.exa_api_key = env_creds.exa_api_key
if env_creds.voyage_api_key:
creds.voyage_api_key = env_creds.voyage_api_key
if env_creds.mistral_transcription_api_key:
creds.mistral_transcription_api_key = env_creds.mistral_transcription_api_key

for env_path in discover_env_candidates(cfg.workspace):
file_creds = parse_env_file(env_path)
Expand Down Expand Up @@ -304,6 +309,7 @@ def _apply_runtime_overrides(cfg: AgentConfig, args: argparse.Namespace, creds:
cfg.cerebras_api_key = creds.cerebras_api_key
cfg.exa_api_key = creds.exa_api_key
cfg.voyage_api_key = creds.voyage_api_key
cfg.mistral_transcription_api_key = creds.mistral_transcription_api_key
cfg.api_key = cfg.openai_api_key

if args.base_url:
Expand Down
9 changes: 9 additions & 0 deletions agent/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,15 @@ def build_engine(cfg: AgentConfig) -> RLMEngine:
max_search_hits=cfg.max_search_hits,
exa_api_key=cfg.exa_api_key,
exa_base_url=cfg.exa_base_url,
mistral_transcription_api_key=cfg.mistral_transcription_api_key,
mistral_transcription_base_url=cfg.mistral_transcription_base_url,
mistral_transcription_model=cfg.mistral_transcription_model,
mistral_transcription_max_bytes=cfg.mistral_transcription_max_bytes,
mistral_transcription_chunk_max_seconds=cfg.mistral_transcription_chunk_max_seconds,
mistral_transcription_chunk_overlap_seconds=cfg.mistral_transcription_chunk_overlap_seconds,
mistral_transcription_max_chunks=cfg.mistral_transcription_max_chunks,
mistral_transcription_request_timeout_sec=cfg.mistral_transcription_request_timeout_sec,
max_observation_chars=cfg.max_observation_chars,
)

try:
Expand Down
63 changes: 63 additions & 0 deletions agent/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
from dataclasses import dataclass
from pathlib import Path

MISTRAL_TRANSCRIPTION_BASE_URL = "https://api.mistral.ai"
MISTRAL_TRANSCRIPTION_DEFAULT_MODEL = "voxtral-mini-latest"
MISTRAL_TRANSCRIPTION_CHUNK_MAX_SECONDS = 900
MISTRAL_TRANSCRIPTION_CHUNK_OVERLAP_SECONDS = 2.0
MISTRAL_TRANSCRIPTION_MAX_CHUNKS = 48
MISTRAL_TRANSCRIPTION_REQUEST_TIMEOUT_SEC = 180

PROVIDER_DEFAULT_MODELS: dict[str, str] = {
"openai": "gpt-5.2",
"anthropic": "claude-opus-4-6",
Expand All @@ -27,12 +34,24 @@ class AgentConfig:
cerebras_base_url: str = "https://api.cerebras.ai/v1"
ollama_base_url: str = "http://localhost:11434/v1"
exa_base_url: str = "https://api.exa.ai"
mistral_transcription_base_url: str = MISTRAL_TRANSCRIPTION_BASE_URL
openai_api_key: str | None = None
anthropic_api_key: str | None = None
openrouter_api_key: str | None = None
cerebras_api_key: str | None = None
exa_api_key: str | None = None
voyage_api_key: str | None = None
mistral_transcription_api_key: str | None = None
mistral_transcription_model: str = MISTRAL_TRANSCRIPTION_DEFAULT_MODEL
mistral_transcription_max_bytes: int = 100 * 1024 * 1024
mistral_transcription_chunk_max_seconds: int = MISTRAL_TRANSCRIPTION_CHUNK_MAX_SECONDS
mistral_transcription_chunk_overlap_seconds: float = (
MISTRAL_TRANSCRIPTION_CHUNK_OVERLAP_SECONDS
)
mistral_transcription_max_chunks: int = MISTRAL_TRANSCRIPTION_MAX_CHUNKS
mistral_transcription_request_timeout_sec: int = (
MISTRAL_TRANSCRIPTION_REQUEST_TIMEOUT_SEC
)
max_depth: int = 4
max_steps_per_call: int = 100
budget_extension_enabled: bool = True
Expand Down Expand Up @@ -71,6 +90,11 @@ def from_env(cls, workspace: str | Path) -> "AgentConfig":
cerebras_api_key = os.getenv("OPENPLANTER_CEREBRAS_API_KEY") or os.getenv("CEREBRAS_API_KEY")
exa_api_key = os.getenv("OPENPLANTER_EXA_API_KEY") or os.getenv("EXA_API_KEY")
voyage_api_key = os.getenv("OPENPLANTER_VOYAGE_API_KEY") or os.getenv("VOYAGE_API_KEY")
mistral_transcription_api_key = (
os.getenv("OPENPLANTER_MISTRAL_TRANSCRIPTION_API_KEY")
or os.getenv("MISTRAL_TRANSCRIPTION_API_KEY")
or os.getenv("MISTRAL_API_KEY")
)
openai_base_url = os.getenv("OPENPLANTER_OPENAI_BASE_URL") or os.getenv(
"OPENPLANTER_BASE_URL",
"https://api.openai.com/v1",
Expand Down Expand Up @@ -100,12 +124,51 @@ def from_env(cls, workspace: str | Path) -> "AgentConfig":
cerebras_base_url=os.getenv("OPENPLANTER_CEREBRAS_BASE_URL", "https://api.cerebras.ai/v1"),
ollama_base_url=os.getenv("OPENPLANTER_OLLAMA_BASE_URL", "http://localhost:11434/v1"),
exa_base_url=os.getenv("OPENPLANTER_EXA_BASE_URL", "https://api.exa.ai"),
mistral_transcription_base_url=os.getenv(
"OPENPLANTER_MISTRAL_TRANSCRIPTION_BASE_URL",
os.getenv("MISTRAL_TRANSCRIPTION_BASE_URL")
or os.getenv("MISTRAL_BASE_URL")
or MISTRAL_TRANSCRIPTION_BASE_URL,
),
openai_api_key=openai_api_key,
anthropic_api_key=anthropic_api_key,
openrouter_api_key=openrouter_api_key,
cerebras_api_key=cerebras_api_key,
exa_api_key=exa_api_key,
voyage_api_key=voyage_api_key,
mistral_transcription_api_key=(mistral_transcription_api_key or "").strip() or None,
mistral_transcription_model=(
os.getenv("OPENPLANTER_MISTRAL_TRANSCRIPTION_MODEL")
or os.getenv("MISTRAL_TRANSCRIPTION_MODEL")
or MISTRAL_TRANSCRIPTION_DEFAULT_MODEL
),
mistral_transcription_max_bytes=int(
os.getenv("OPENPLANTER_MISTRAL_TRANSCRIPTION_MAX_BYTES", "104857600")
),
mistral_transcription_chunk_max_seconds=int(
os.getenv(
"OPENPLANTER_MISTRAL_TRANSCRIPTION_CHUNK_MAX_SECONDS",
str(MISTRAL_TRANSCRIPTION_CHUNK_MAX_SECONDS),
)
),
mistral_transcription_chunk_overlap_seconds=float(
os.getenv(
"OPENPLANTER_MISTRAL_TRANSCRIPTION_CHUNK_OVERLAP_SECONDS",
str(MISTRAL_TRANSCRIPTION_CHUNK_OVERLAP_SECONDS),
)
),
mistral_transcription_max_chunks=int(
os.getenv(
"OPENPLANTER_MISTRAL_TRANSCRIPTION_MAX_CHUNKS",
str(MISTRAL_TRANSCRIPTION_MAX_CHUNKS),
)
),
mistral_transcription_request_timeout_sec=int(
os.getenv(
"OPENPLANTER_MISTRAL_TRANSCRIPTION_REQUEST_TIMEOUT_SEC",
str(MISTRAL_TRANSCRIPTION_REQUEST_TIMEOUT_SEC),
)
),
max_depth=int(os.getenv("OPENPLANTER_MAX_DEPTH", "4")),
max_steps_per_call=int(os.getenv("OPENPLANTER_MAX_STEPS", "100")),
budget_extension_enabled=budget_extension_enabled,
Expand Down
34 changes: 34 additions & 0 deletions agent/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class CredentialBundle:
cerebras_api_key: str | None = None
exa_api_key: str | None = None
voyage_api_key: str | None = None
mistral_transcription_api_key: str | None = None

def has_any(self) -> bool:
return bool(
Expand All @@ -26,6 +27,10 @@ def has_any(self) -> bool:
or (self.cerebras_api_key and self.cerebras_api_key.strip())
or (self.exa_api_key and self.exa_api_key.strip())
or (self.voyage_api_key and self.voyage_api_key.strip())
or (
self.mistral_transcription_api_key
and self.mistral_transcription_api_key.strip()
)
)

def merge_missing(self, other: "CredentialBundle") -> None:
Expand All @@ -41,6 +46,11 @@ def merge_missing(self, other: "CredentialBundle") -> None:
self.exa_api_key = other.exa_api_key
if not self.voyage_api_key and other.voyage_api_key:
self.voyage_api_key = other.voyage_api_key
if (
not self.mistral_transcription_api_key
and other.mistral_transcription_api_key
):
self.mistral_transcription_api_key = other.mistral_transcription_api_key

def to_json(self) -> dict[str, str]:
out: dict[str, str] = {}
Expand All @@ -56,6 +66,8 @@ def to_json(self) -> dict[str, str]:
out["exa_api_key"] = self.exa_api_key
if self.voyage_api_key:
out["voyage_api_key"] = self.voyage_api_key
if self.mistral_transcription_api_key:
out["mistral_transcription_api_key"] = self.mistral_transcription_api_key
return out

@classmethod
Expand All @@ -69,6 +81,10 @@ def from_json(cls, payload: dict[str, str] | None) -> "CredentialBundle":
cerebras_api_key=(payload.get("cerebras_api_key") or "").strip() or None,
exa_api_key=(payload.get("exa_api_key") or "").strip() or None,
voyage_api_key=(payload.get("voyage_api_key") or "").strip() or None,
mistral_transcription_api_key=(
payload.get("mistral_transcription_api_key") or ""
).strip()
or None,
)


Expand Down Expand Up @@ -115,6 +131,13 @@ def parse_env_file(path: Path) -> CredentialBundle:
or None,
exa_api_key=(env.get("EXA_API_KEY") or env.get("OPENPLANTER_EXA_API_KEY") or "").strip() or None,
voyage_api_key=(env.get("VOYAGE_API_KEY") or env.get("OPENPLANTER_VOYAGE_API_KEY") or "").strip() or None,
mistral_transcription_api_key=(
env.get("OPENPLANTER_MISTRAL_TRANSCRIPTION_API_KEY")
or env.get("MISTRAL_TRANSCRIPTION_API_KEY")
or env.get("MISTRAL_API_KEY")
or ""
).strip()
or None,
)


Expand All @@ -140,6 +163,13 @@ def credentials_from_env() -> CredentialBundle:
or None,
exa_api_key=(os.getenv("OPENPLANTER_EXA_API_KEY") or os.getenv("EXA_API_KEY") or "").strip() or None,
voyage_api_key=(os.getenv("OPENPLANTER_VOYAGE_API_KEY") or os.getenv("VOYAGE_API_KEY") or "").strip() or None,
mistral_transcription_api_key=(
os.getenv("OPENPLANTER_MISTRAL_TRANSCRIPTION_API_KEY")
or os.getenv("MISTRAL_TRANSCRIPTION_API_KEY")
or os.getenv("MISTRAL_API_KEY")
or ""
).strip()
or None,
)


Expand Down Expand Up @@ -230,6 +260,7 @@ def prompt_for_credentials(
cerebras_api_key=existing.cerebras_api_key,
exa_api_key=existing.exa_api_key,
voyage_api_key=existing.voyage_api_key,
mistral_transcription_api_key=existing.mistral_transcription_api_key,
)

should_prompt = force or not current.has_any()
Expand Down Expand Up @@ -263,6 +294,9 @@ def _ask(label: str, existing_value: str | None) -> str | None:
current.cerebras_api_key = _ask("Cerebras", current.cerebras_api_key)
current.exa_api_key = _ask("Exa", current.exa_api_key)
current.voyage_api_key = _ask("Voyage", current.voyage_api_key)
current.mistral_transcription_api_key = _ask(
"Mistral Transcription", current.mistral_transcription_api_key
)
if not force and current.has_any() and not existing.has_any():
changed = True
return current, changed
75 changes: 75 additions & 0 deletions agent/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"fetch_url",
"read_file",
"read_image",
"audio_transcribe",
"list_artifacts",
"read_artifact",
}
Expand Down Expand Up @@ -1261,6 +1262,80 @@ def _apply_tool_call(
self._pending_image.data = (b64, media_type)
return False, text

if name == "audio_transcribe":
path = str(args.get("path", "")).strip()
if not path:
return False, "audio_transcribe requires path"
diarize = args.get("diarize")
diarize = diarize if isinstance(diarize, bool) else None
raw_timestamps = args.get("timestamp_granularities")
if isinstance(raw_timestamps, list):
timestamp_granularities = [
str(v).strip() for v in raw_timestamps if str(v).strip()
]
elif isinstance(raw_timestamps, str) and raw_timestamps.strip():
timestamp_granularities = [raw_timestamps.strip()]
else:
timestamp_granularities = None
raw_context_bias = args.get("context_bias")
if isinstance(raw_context_bias, list):
context_bias = [
str(v).strip() for v in raw_context_bias if str(v).strip()
]
elif isinstance(raw_context_bias, str) and raw_context_bias.strip():
context_bias = [
part.strip()
for part in raw_context_bias.split(",")
if part.strip()
]
else:
context_bias = None
language = str(args.get("language", "")).strip() or None
model = str(args.get("model", "")).strip() or None
raw_temperature = args.get("temperature")
temperature = None
if isinstance(raw_temperature, (int, float)) and not isinstance(
raw_temperature, bool
):
temperature = float(raw_temperature)
chunking = str(args.get("chunking", "")).strip().lower() or None
raw_chunk_max_seconds = args.get("chunk_max_seconds")
chunk_max_seconds = None
if isinstance(raw_chunk_max_seconds, int) and not isinstance(
raw_chunk_max_seconds, bool
):
chunk_max_seconds = raw_chunk_max_seconds
raw_chunk_overlap_seconds = args.get("chunk_overlap_seconds")
chunk_overlap_seconds = None
if isinstance(raw_chunk_overlap_seconds, (int, float)) and not isinstance(
raw_chunk_overlap_seconds, bool
):
chunk_overlap_seconds = float(raw_chunk_overlap_seconds)
raw_max_chunks = args.get("max_chunks")
max_chunks = None
if isinstance(raw_max_chunks, int) and not isinstance(raw_max_chunks, bool):
max_chunks = raw_max_chunks
raw_continue_on_chunk_error = args.get("continue_on_chunk_error")
continue_on_chunk_error = (
raw_continue_on_chunk_error
if isinstance(raw_continue_on_chunk_error, bool)
else None
)
return False, self.tools.audio_transcribe(
path=path,
diarize=diarize,
timestamp_granularities=timestamp_granularities,
context_bias=context_bias,
language=language,
model=model,
temperature=temperature,
chunking=chunking,
chunk_max_seconds=chunk_max_seconds,
chunk_overlap_seconds=chunk_overlap_seconds,
max_chunks=max_chunks,
continue_on_chunk_error=continue_on_chunk_error,
)

if name == "write_file":
path = str(args.get("path", "")).strip()
if not path:
Expand Down
Loading