diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 000000000..fba4757f7
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,62 @@
+# Git
+.git
+.gitignore
+
+# Python virtual environments (including nested)
+**/.venv
+**/venv
+**/*.egg-info
+**/dist
+**/build
+
+# Python cache (including nested)
+**/__pycache__
+**/*.pyc
+**/*.pyo
+**/.pytest_cache
+**/.mypy_cache
+**/.ruff_cache
+**/.coverage
+**/htmlcov
+
+# Node (including nested)
+**/node_modules
+**/.npm
+**/.pnpm-store
+
+# IDE
+.idea
+.vscode
+*.swp
+*.swo
+
+# Local config
+.env
+.env.*
+!.env.example
+
+# Flow tracking
+.flow
+
+# Demo fixtures (large)
+demo/fixtures
+
+# Documentation build
+site
+docs/_build
+
+# Test artifacts
+.hypothesis
+
+# OS files
+.DS_Store
+Thumbs.db
+
+# Logs
+*.log
+logs
+
+# Temporary
+tmp
+temp
+*.tmp
diff --git a/.flow/epics/fn-56.json b/.flow/epics/fn-56.json
new file mode 100644
index 000000000..d38ed890f
--- /dev/null
+++ b/.flow/epics/fn-56.json
@@ -0,0 +1,13 @@
+{
+ "branch_name": "fn-56",
+ "created_at": "2026-02-02T22:00:58.309446Z",
+ "depends_on_epics": [],
+ "id": "fn-56",
+ "next_task": 1,
+ "plan_review_status": "unknown",
+ "plan_reviewed_at": null,
+ "spec_path": ".flow/specs/fn-56.md",
+ "status": "open",
+ "title": "Self-Debugging Chat Widget (Dogfooding)",
+ "updated_at": "2026-02-02T22:43:29.882512Z"
+}
diff --git a/.flow/specs/fn-56.md b/.flow/specs/fn-56.md
new file mode 100644
index 000000000..6f8252ca1
--- /dev/null
+++ b/.flow/specs/fn-56.md
@@ -0,0 +1,278 @@
+# Dataing Assistant (fn-56)
+
+A unified AI assistant for Dataing that handles infrastructure debugging, data questions, and investigation support.
+
+## Overview
+
+**Problem**: Users need help with various Dataing tasks - debugging infrastructure issues, understanding data quality problems, querying connected datasources, and getting context on investigations. Currently they must use external tools or ask for human help.
+
+**Solution**: Persistent chat widget ("Dataing Assistant") that provides a unified AI assistant with access to:
+- Local files, configs, and git history
+- Docker container status and logs
+- Connected datasources (reusing existing query tools)
+- Investigation context and findings
+- User's recent activity for contextual suggestions
+
+## Key Decisions (from interview)
+
+### Agent Configuration
+- **LLM Model**: Claude Sonnet (fast, cost-effective)
+- **Response time target**: First token under 3 seconds
+- **Agent focus**: Balanced - explain root cause AND provide fix steps with code snippets
+- **Out-of-scope handling**: Polite decline, redirect to docs
+- **Tone**: Match existing Dataing UI voice
+
+### Tools & Capabilities (Priority Order)
+
+1. **File Access**
+ - Read any UTF-8 text file in allowlisted directories
+ - Smart chunking: request specific line ranges
+ - Grep-like search across files (max 100 results)
+ - Include logs, data samples (CSV/parquet first N rows)
+ - Centralized parsers in `core/parsing/` organized by file type
+
+2. **Git Access**
+ - Full read access via githunter tools
+ - blame_line, find_pr_discussion, get_file_experts
+ - Recent commits, branches, diffs
+
+3. **Docker Access**
+ - Container status via Docker API
+ - Log reading via pluggable LogProvider interface
+ - Auth: Configurable per deployment (socket, TCP+TLS, env auto-detect)
+
+4. **Log Providers** (pluggable interface)
+ - LocalFileLogProvider
+ - DockerLogProvider
+ - CloudWatchLogProvider (IAM role auth)
+
+5. **Datasource Access**
+ - Reuse existing query tools from investigation agents
+ - Full read access to connected datasources
+ - Unified tool registry for all capabilities
+
+6. **Environment Access**
+ - Read non-sensitive env vars (filter *SECRET*, *KEY*, *PASSWORD*, *TOKEN*)
+ - Compare current config with .env.example defaults
+
+### Security
+
+- **Path canonicalization** before allowlist check (prevent traversal)
+- **Blocked patterns**: `.env`, `*.pem`, `*.key`, `*secret*`, `*credential*`
+- **Security-blocked errors**: Suggest alternatives ("Can't read .env, but can check .env.example")
+- **Security findings**: Alert immediately if exposed secrets discovered
+- **Audit log**: Full log of every file read, search, and tool call
+- **Tool indicators**: Show detailed progress ("Reading docker-compose.yml...")
+
+### Data Model
+
+**Debug chats are investigations** with parent/child relationships:
+- Each chat session gets its own `investigation_id`
+- Can be linked to existing investigations as parent OR child
+- Child chats have full access to parent investigation context
+- DebugChatSession model with FK to Investigation when linked
+
+**Storage**: Hybrid Redis/Postgres
+- Recent sessions in Redis for fast access
+- Old sessions archived to Postgres
+- Retention: Configurable per tenant
+
+**Schema migration**: Add to existing migrations (013_dataing_assistant.sql)
+
+### User Experience
+
+- **Visibility**: All authenticated users (no restriction)
+- **Widget position**: Fixed bottom-20 right-4 (above DemoToggle)
+- **Panel width**: Resizable, remembers size per-user preference
+- **Keyboard shortcut**: None for MVP
+- **Markdown**: Full rendering (headers, lists, code blocks, links, tables)
+
+**Chat behavior**:
+- Smart placeholder text with example questions
+- Permanent history with session list (new sessions start fresh, can reopen old)
+- Minimize to button (badge shows unread), preserves state
+- Collapsible sections for long responses
+- Copy code button always visible on code blocks
+- Edit and resubmit previous messages
+
+**Streaming & errors**:
+- Queue messages if user sends while response streaming
+- Auto-retry 3x on errors before showing error
+- Offline: Retry with exponential backoff + "Reconnecting..." indicator
+
+### Concurrency & Limits
+
+- **Message queueing**: Complete current response, then process next
+- **Context limit**: Token-based, summarize when approaching model limit
+- **Rate limiting**: Admin-set token budget per tenant
+- **Limit exceeded**: Soft block with override for urgent issues
+- **Usage display**: Always visible ("X of Y tokens used this month")
+
+### Context & Memory
+
+- **User context**: Full access to recent investigations, alerts, queries
+- **Memory integration**: User confirms "This was helpful" to save to agent memory (fn-55)
+- **Multi-tenancy**: Tenant isolation - each tenant gets isolated agent instance
+
+### Export
+
+- **Formats**: Both JSON and Markdown export
+- **Sharing**: No sharing for MVP (export and send manually)
+
+### Testing & Telemetry
+
+- **Testing**: Unit tests with mocked LLM
+- **Dry run**: No special mode, use real APIs in test environment
+- **Telemetry**: Full integration with existing Dataing telemetry
+- **Metrics**: Defer to later (analyze datasets first)
+- **Analytics**: No query tracking (privacy-first)
+
+## Architecture
+
+### Backend Components
+
+```
+dataing/
+ agents/
+ assistant.py # DataingAssistant (was SelfDebugAgent)
+ tools/
+ registry.py # Unified tool registry
+ local_files.py # File reading with safety
+ docker.py # Docker API access
+ log_providers/
+ __init__.py # LogProvider protocol
+ local.py # LocalFileLogProvider
+ docker.py # DockerLogProvider
+ cloudwatch.py # CloudWatchLogProvider
+ core/
+ parsing/ # Centralized file parsers
+ yaml_parser.py
+ json_parser.py
+ text_parser.py
+ log_parser.py
+ data_parser.py # CSV, parquet sampling
+ entrypoints/api/routes/
+ assistant.py # API routes (was debug_chat.py)
+ models/
+ assistant.py # DebugChatSession, DebugChatMessage
+```
+
+### Frontend Components
+
+```
+features/assistant/
+ index.ts
+ AssistantWidget.tsx # Floating button + resizable panel
+ AssistantPanel.tsx # Chat interface
+ AssistantMessage.tsx # Message with collapsible sections
+ useAssistant.ts # State management hook
+ SessionList.tsx # Previous session selector
+```
+
+### Database Schema
+
+```sql
+-- 013_dataing_assistant.sql
+
+CREATE TABLE assistant_sessions (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ investigation_id UUID NOT NULL, -- Each session IS an investigation
+ tenant_id UUID NOT NULL,
+ user_id UUID NOT NULL,
+ parent_investigation_id UUID REFERENCES investigations(id),
+ is_parent BOOLEAN DEFAULT false,
+ created_at TIMESTAMPTZ DEFAULT NOW(),
+ last_activity TIMESTAMPTZ DEFAULT NOW(),
+ token_count INTEGER DEFAULT 0,
+ metadata JSONB
+);
+
+CREATE TABLE assistant_messages (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ session_id UUID REFERENCES assistant_sessions(id),
+ role TEXT NOT NULL, -- 'user', 'assistant', 'system', 'tool'
+ content TEXT NOT NULL,
+ tool_calls JSONB, -- For tool execution tracking
+ created_at TIMESTAMPTZ DEFAULT NOW(),
+ token_count INTEGER
+);
+
+CREATE TABLE assistant_audit_log (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ session_id UUID REFERENCES assistant_sessions(id),
+ action TEXT NOT NULL, -- 'file_read', 'search', 'query', 'docker_status'
+ target TEXT NOT NULL, -- File path, query, etc.
+ result_summary TEXT,
+ created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX idx_assistant_sessions_tenant ON assistant_sessions(tenant_id);
+CREATE INDEX idx_assistant_sessions_user ON assistant_sessions(user_id);
+CREATE INDEX idx_assistant_messages_session ON assistant_messages(session_id);
+```
+
+## Quick Commands
+
+```bash
+# Run backend
+just dev-backend
+
+# Run frontend
+just dev-frontend
+
+# Run tests
+uv run pytest python-packages/dataing/tests/unit/agents/test_assistant.py -v
+
+# Generate OpenAPI client
+just generate-client
+
+# Run migrations
+just migrate
+```
+
+## Acceptance Criteria
+
+- [ ] Assistant widget visible on all authenticated pages
+- [ ] Resizable panel that remembers size per-user
+- [ ] Full markdown rendering with syntax-highlighted code blocks
+- [ ] Copy code button on all code blocks
+- [ ] Agent streams response in real-time with tool progress indicators
+- [ ] Can read files from allowlisted directories with smart chunking
+- [ ] Can search across files (grep-like) with result limits
+- [ ] Can access git history via githunter tools
+- [ ] Can check Docker container status via API
+- [ ] Can read logs via pluggable LogProvider interface
+- [ ] Can query connected datasources (reuses existing tools)
+- [ ] Has full context of user's recent activity
+- [ ] Sessions persist permanently with session history browser
+- [ ] Parent/child investigation linking works
+- [ ] Path traversal attempts rejected with helpful alternatives
+- [ ] Security findings alert user immediately
+- [ ] Full audit log of tool usage
+- [ ] Token-based usage tracking with admin-set budgets
+- [ ] Soft block on limit exceeded with override option
+- [ ] Auto-retry 3x on errors
+- [ ] "This was helpful" saves to agent memory
+- [ ] Export to JSON and Markdown works
+
+## Tasks (Updated)
+
+1. **Create unified tool registry** - Central registry for all assistant tools
+2. **Create centralized file parsers** - core/parsing/ module by file type
+3. **Create DataingAssistant agent** - Main agent with unified tools
+4. **Create log provider interface + implementations** - Pluggable log access
+5. **Create Docker status tool** - Container status via Docker API
+6. **Create assistant API routes** - Sessions, messages, streaming
+7. **Create database migration** - 013_dataing_assistant.sql
+8. **Create frontend AssistantWidget** - Resizable floating panel
+9. **Create frontend AssistantPanel** - Chat UI with all features
+10. **Integrate with existing query tools** - Datasource access
+11. **Add investigation linking** - Parent/child relationships
+12. **Add memory integration** - "This was helpful" feedback
+
+## References
+
+- Existing patterns: `agents/client.py`, `routes/investigations.py`
+- Bond-agent tools: `/Users/bordumb/workspace/repositories/bond-agent/src/bond/tools/`
+- SSE-starlette: https://pypi.org/project/sse-starlette/
+- shadcn/ui Sheet: https://ui.shadcn.com/docs/components/sheet
diff --git a/.flow/tasks/fn-56.1.json b/.flow/tasks/fn-56.1.json
new file mode 100644
index 000000000..0f13894e0
--- /dev/null
+++ b/.flow/tasks/fn-56.1.json
@@ -0,0 +1,28 @@
+{
+ "assignee": "bordumbb@gmail.com",
+ "claim_note": "",
+ "claimed_at": "2026-02-02T23:38:26.263668Z",
+ "created_at": "2026-02-02T22:01:48.610812Z",
+ "depends_on": [
+ "fn-56.7",
+ "fn-56.2",
+ "fn-56.9",
+ "fn-56.10"
+ ],
+ "epic": "fn-56",
+ "evidence": {
+ "files_created": [
+ "python-packages/dataing/src/dataing/agents/assistant.py",
+ "python-packages/dataing/tests/unit/agents/test_assistant.py"
+ ],
+ "pre_commit_passed": true,
+ "tests_failed": 0,
+ "tests_passed": 22
+ },
+ "id": "fn-56.1",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.1.md",
+ "status": "done",
+ "title": "Create DataingAssistant agent (agents/assistant.py)",
+ "updated_at": "2026-02-02T23:41:27.015915Z"
+}
diff --git a/.flow/tasks/fn-56.1.md b/.flow/tasks/fn-56.1.md
new file mode 100644
index 000000000..bc6f51617
--- /dev/null
+++ b/.flow/tasks/fn-56.1.md
@@ -0,0 +1,108 @@
+# fn-56.1 Create DataingAssistant agent
+
+## Description
+Create the DataingAssistant agent that provides a unified AI assistant for Dataing with access to local files, Docker status, logs, git history, and connected datasources.
+
+## File to Create
+`python-packages/dataing/src/dataing/agents/assistant.py`
+
+## Implementation
+
+```python
+from bond import BondAgent, StreamHandlers
+from bond.tools.github import github_toolset, GitHubAdapter
+from bond.tools.githunter import githunter_toolset, GitHunterAdapter
+
+from dataing.agents.tools.registry import ToolRegistry, get_default_registry
+from dataing.agents.tools.local_files import register_local_file_tools
+from dataing.agents.tools.docker import register_docker_tools
+
+class DataingAssistant:
+ """Unified AI assistant for Dataing platform.
+
+ Provides help with:
+ - Infrastructure debugging (Docker, logs, config files)
+ - Data questions via connected datasources
+ - Investigation context and findings
+ - Git history and code understanding
+ """
+
+ def __init__(
+ self,
+ tenant_id: str,
+ github_token: str | None = None,
+ repo_path: str = ".",
+ ):
+ # Setup tool registry with all available tools
+ # Create BondAgent with unified toolset
+ pass
+
+ async def ask(
+ self,
+ question: str,
+ session_id: str | None = None,
+ handlers: StreamHandlers | None = None,
+ ) -> str:
+ """Ask the assistant a question."""
+ pass
+```
+
+## Key Points
+- Follow pattern from `agents/client.py:45-127`
+- Use unified tool registry (`agents/tools/registry.py`)
+- Include all tools: local files, Docker, logs, git
+- System prompt should explain Dataing architecture (refer to CLAUDE.md content)
+- Support `StreamHandlers` for real-time streaming output
+- Gracefully degrade if optional dependencies unavailable
+- LLM Model: Claude Sonnet (fast, cost-effective)
+- Response time target: First token under 3 seconds
+
+## Tools to Include
+1. Local file access (read, search, list)
+2. Docker status (containers, health, stats)
+3. Log providers (local, Docker, CloudWatch)
+4. Git access (github_toolset, githunter_toolset)
+5. Datasource queries (reuse from investigation agents)
+
+## References
+- BondAgent pattern: `agents/client.py:45-127`
+- Tool registry: `agents/tools/registry.py`
+- Spec: `.flow/specs/fn-56.md`
+
+## Acceptance
+- [ ] DataingAssistant class created in `agents/assistant.py`
+- [ ] Uses unified tool registry with all tool categories
+- [ ] System prompt includes Dataing architecture overview
+- [ ] Supports StreamHandlers for real-time output
+- [ ] Gracefully handles missing optional dependencies
+- [ ] Unit test passes: `uv run pytest python-packages/dataing/tests/unit/agents/test_assistant.py -v`
+
+## Done summary
+## Summary
+
+Created DataingAssistant agent - the main unified AI assistant for Dataing platform.
+
+### Features:
+- **Multi-tool integration**: Local files, Docker, logs, git (bond-agent)
+- **Streaming support**: Full StreamHandlers integration for real-time output
+- **Multi-tenancy**: Tenant-isolated instances
+- **Graceful degradation**: Works without optional dependencies (github token, etc.)
+
+### Tools included:
+1. **File tools**: read_local_file, search_in_files, list_directory
+2. **Docker tools**: list_containers, get_status, get_health, get_stats, find_unhealthy
+3. **Log tools**: get_logs, search_logs, get_recent_errors
+4. **Git tools**: githunter_toolset, github_toolset (from bond-agent)
+
+### System prompt:
+- Explains Dataing platform capabilities
+- Guides response format and approach
+- Defines tool usage guidelines
+
+## Files Created
+- `agents/assistant.py` - DataingAssistant class with all tools
+- `tests/unit/agents/test_assistant.py` - 22 unit tests
+## Evidence
+- Commits:
+- Tests:
+- PRs:
diff --git a/.flow/tasks/fn-56.10.json b/.flow/tasks/fn-56.10.json
new file mode 100644
index 000000000..83f92c894
--- /dev/null
+++ b/.flow/tasks/fn-56.10.json
@@ -0,0 +1,25 @@
+{
+ "assignee": "bordumbb@gmail.com",
+ "claim_note": "",
+ "claimed_at": "2026-02-02T23:16:17.741869Z",
+ "created_at": "2026-02-02T22:43:38.862577Z",
+ "depends_on": [
+ "fn-56.7"
+ ],
+ "epic": "fn-56",
+ "evidence": {
+ "files_created": [
+ "python-packages/dataing/src/dataing/agents/tools/docker.py",
+ "python-packages/dataing/tests/unit/agents/tools/test_docker.py"
+ ],
+ "pre_commit_passed": true,
+ "tests_failed": 0,
+ "tests_passed": 27
+ },
+ "id": "fn-56.10",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.10.md",
+ "status": "done",
+ "title": "Create Docker status tool",
+ "updated_at": "2026-02-02T23:22:09.352254Z"
+}
diff --git a/.flow/tasks/fn-56.10.md b/.flow/tasks/fn-56.10.md
new file mode 100644
index 000000000..f40b9eb60
--- /dev/null
+++ b/.flow/tasks/fn-56.10.md
@@ -0,0 +1,43 @@
+# fn-56.10 Create Docker status tool
+
+## Description
+TBD
+
+## Acceptance
+- [ ] TBD
+
+## Done summary
+## Summary
+
+Created Docker status tool for the Dataing Assistant with:
+
+1. **DockerStatusTool class** - Core functionality:
+ - `list_containers()` - List all containers with status
+ - `get_container_status()` - Detailed status for a container
+ - `get_container_health()` - Health check information
+ - `get_container_stats()` - Resource usage (CPU, memory, network)
+ - `find_unhealthy_containers()` - Find unhealthy/stopped containers
+
+2. **Agent tool functions** - Human-readable output:
+ - `list_docker_containers()` - Formatted container list with status indicators
+ - `get_docker_container_status()` - Detailed container info
+ - `get_docker_container_health()` - Health check results
+ - `get_docker_container_stats()` - Resource usage display
+ - `find_unhealthy_docker_containers()` - Unhealthy container report
+
+3. **Registry integration**:
+ - `register_docker_tools()` - Registers all tools with ToolRegistry
+
+Features:
+- Async/await support for non-blocking Docker API calls
+- Graceful error handling when Docker is unavailable
+- Human-readable output formatting
+- Emoji status indicators (🟢/🔴) for quick scanning
+
+## Files Created
+- `agents/tools/docker.py` - Main implementation
+- `tests/unit/agents/tools/test_docker.py` - 27 unit tests
+## Evidence
+- Commits:
+- Tests:
+- PRs:
diff --git a/.flow/tasks/fn-56.11.json b/.flow/tasks/fn-56.11.json
new file mode 100644
index 000000000..93730aca3
--- /dev/null
+++ b/.flow/tasks/fn-56.11.json
@@ -0,0 +1,23 @@
+{
+ "assignee": "bordumbb@gmail.com",
+ "claim_note": "",
+ "claimed_at": "2026-02-02T23:43:10.542555Z",
+ "created_at": "2026-02-02T22:43:38.942802Z",
+ "depends_on": [],
+ "epic": "fn-56",
+ "evidence": {
+ "migration_file": "035_dataing_assistant.sql",
+ "tables_created": [
+ "assistant_sessions",
+ "assistant_messages",
+ "assistant_audit_log"
+ ],
+ "tests_passed": true
+ },
+ "id": "fn-56.11",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.11.md",
+ "status": "done",
+ "title": "Create database migration (013_dataing_assistant.sql)",
+ "updated_at": "2026-02-02T23:44:13.931632Z"
+}
diff --git a/.flow/tasks/fn-56.11.md b/.flow/tasks/fn-56.11.md
new file mode 100644
index 000000000..9fa76ca4c
--- /dev/null
+++ b/.flow/tasks/fn-56.11.md
@@ -0,0 +1,54 @@
+# fn-56.11 Create database migration (035_dataing_assistant.sql)
+
+## Description
+Create database migration for Dataing Assistant tables: sessions, messages, and audit log.
+
+## File Created
+`python-packages/dataing/migrations/035_dataing_assistant.sql`
+
+## Schema
+
+### assistant_sessions
+- Links each session to its own investigation (investigation_id)
+- Supports parent/child investigation linking
+- Tracks token usage and last activity
+- Stores user preferences in metadata JSONB
+
+### assistant_messages
+- Messages in sessions (user, assistant, system, tool roles)
+- Tracks tool calls as JSONB array
+- Per-message token count
+
+### assistant_audit_log
+- Audit trail of all tool usage
+- Tracks action type, target, and result summary
+- Full metadata in JSONB
+
+## Acceptance
+- [x] Migration file created at `migrations/035_dataing_assistant.sql`
+- [x] assistant_sessions table with investigation linking
+- [x] assistant_messages table with tool_calls JSONB
+- [x] assistant_audit_log table for security audit
+- [x] Proper indexes for tenant, user, and session queries
+- [x] Trigger to auto-update last_activity on new messages
+- [x] ON DELETE CASCADE for foreign keys
+
+## Done summary
+## Summary
+
+Created database migration for Dataing Assistant (035_dataing_assistant.sql).
+
+### Tables:
+1. **assistant_sessions** - Sessions linked to investigations with parent/child support
+2. **assistant_messages** - Chat messages with tool call tracking
+3. **assistant_audit_log** - Security audit log for all tool usage
+
+### Features:
+- Proper foreign keys to investigations, tenants, users
+- Indexes for fast tenant/user/session queries
+- Auto-update trigger for last_activity
+- ON DELETE CASCADE for cleanup
+## Evidence
+- Commits:
+- Tests:
+- PRs:
diff --git a/.flow/tasks/fn-56.12.json b/.flow/tasks/fn-56.12.json
new file mode 100644
index 000000000..1cea8dc70
--- /dev/null
+++ b/.flow/tasks/fn-56.12.json
@@ -0,0 +1,23 @@
+{
+ "assignee": "bordumbb@gmail.com",
+ "claim_note": "",
+ "claimed_at": "2026-02-03T00:05:42.793766Z",
+ "created_at": "2026-02-02T22:43:39.025602Z",
+ "depends_on": [
+ "fn-56.6"
+ ],
+ "epic": "fn-56",
+ "evidence": {
+ "commit": "6295da89",
+ "frontend_updated": true,
+ "new_endpoint_added": true,
+ "parent_context_loading": true,
+ "tests_passing": 24
+ },
+ "id": "fn-56.12",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.12.md",
+ "status": "done",
+ "title": "Add investigation parent/child linking",
+ "updated_at": "2026-02-03T00:10:36.386823Z"
+}
diff --git a/.flow/tasks/fn-56.12.md b/.flow/tasks/fn-56.12.md
new file mode 100644
index 000000000..39c804331
--- /dev/null
+++ b/.flow/tasks/fn-56.12.md
@@ -0,0 +1,56 @@
+# fn-56.12 Add investigation parent/child linking
+
+## Description
+Add parent/child investigation linking so chat sessions can be linked to existing investigations and have full access to parent investigation context.
+
+## What Was Done
+
+### 1. Backend: Load parent investigation context
+Added `_load_parent_investigation_context()` function in `routes/assistant.py` that:
+- Loads session to check for `parent_investigation_id`
+- Fetches parent investigation data (findings, events, metadata)
+- Returns context dict for the assistant
+
+Updated `_process_message()` to call this and merge context.
+
+### 2. Backend: Add endpoint to list sessions by investigation
+Added `GET /assistant/investigations/{investigation_id}/sessions` endpoint that:
+- Lists all assistant sessions linked to an investigation as parent
+- Returns same `ListSessionsResponse` format as regular list
+
+### 3. Frontend: Support parentInvestigationId
+Updated `useAssistant.ts`:
+- Added `parentInvestigationId` to `AssistantSession` interface
+- Added `parentInvestigationId` to `UseAssistantOptions`
+- Updated `createSession()` to accept and pass `parentInvestigationId`
+- Updated `loadSession()` to load `parent_investigation_id` from API
+
+### 4. Regenerated OpenAPI client
+Ran `just generate-client` to update frontend types.
+
+### 5. Added unit tests
+Added 4 new tests in `test_assistant.py`:
+- `test_load_parent_investigation_context_no_parent`
+- `test_load_parent_investigation_context_session_not_found`
+- `test_load_parent_investigation_context_with_parent`
+- `test_load_parent_investigation_context_parent_not_found`
+
+## Acceptance
+- [x] Sessions can be created with parent_investigation_id
+- [x] Parent investigation context loaded when processing messages
+- [x] Endpoint to list sessions by parent investigation
+- [x] Frontend hook supports parentInvestigationId
+- [x] OpenAPI client regenerated
+- [x] Unit tests added and passing (24 total)
+
+## Done summary
+Added parent/child investigation linking: backend loads parent context for chat, added endpoint to list sessions by investigation, frontend supports parentInvestigationId.
+
+### Files:
+- entrypoints/api/routes/assistant.py - Added context loading + new endpoint
+- features/assistant/useAssistant.ts - Added parentInvestigationId support
+- tests/unit/entrypoints/api/routes/test_assistant.py - Added 4 tests
+## Evidence
+- Commits:
+- Tests:
+- PRs:
diff --git a/.flow/tasks/fn-56.13.json b/.flow/tasks/fn-56.13.json
new file mode 100644
index 000000000..ca39a001c
--- /dev/null
+++ b/.flow/tasks/fn-56.13.json
@@ -0,0 +1,22 @@
+{
+ "assignee": null,
+ "claim_note": "",
+ "claimed_at": null,
+ "created_at": "2026-02-02T22:43:39.106654Z",
+ "depends_on": [
+ "fn-56.12"
+ ],
+ "epic": "fn-56",
+ "evidence": {
+ "commit": "0c918f6f",
+ "feedback_event_type_added": true,
+ "helpful_button_added": true,
+ "tests_passing": 24
+ },
+ "id": "fn-56.13",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.13.md",
+ "status": "done",
+ "title": "Add agent memory integration ('This was helpful')",
+ "updated_at": "2026-02-03T00:14:53.795942Z"
+}
diff --git a/.flow/tasks/fn-56.13.md b/.flow/tasks/fn-56.13.md
new file mode 100644
index 000000000..f82fbf4fb
--- /dev/null
+++ b/.flow/tasks/fn-56.13.md
@@ -0,0 +1,52 @@
+# fn-56.13 Add agent memory integration ('This was helpful')
+
+## Description
+Add "This was helpful" button to assistant messages to enable agent memory integration. Users can mark helpful responses which are stored via the existing feedback system for future memory/fine-tuning use.
+
+## What Was Done
+
+### 1. Backend: Add feedback event type
+- Added `FEEDBACK_ASSISTANT_MESSAGE = "feedback.assistant_message"` to EventType enum
+- Added "assistant_message" target type to FeedbackCreate model
+- Made investigation_id optional (assistant messages don't require one)
+
+### 2. Frontend: Update feedback API
+- Added "assistant_message" to TargetType union
+- Made investigation_id optional in FeedbackCreate interface
+- Added `useSubmitFeedback()` hook for generic feedback submission
+
+### 3. Frontend: Add "This was helpful" button
+- Updated AssistantMessage component to show thumbs up button on assistant messages
+- Button only appears when message is not streaming and has content
+- Shows loading state during submission
+- Shows "Marked as helpful" confirmation after success
+
+### 4. Frontend: Wire up context
+- Updated AssistantPanel to pass sessionInvestigationId to AssistantMessage
+- Updated InvestigationFeedbackButtons REASON_OPTIONS with assistant_message entry
+
+### 5. Regenerated OpenAPI client
+
+## Acceptance
+- [x] "This was helpful" button shows on assistant messages (not user/tool messages)
+- [x] Button disabled during message streaming
+- [x] Clicking button submits feedback via existing API
+- [x] Shows confirmation after successful submission
+- [x] Feedback stored in investigation_feedback_events table
+- [x] OpenAPI client regenerated
+- [x] TypeScript compilation passes
+- [x] All tests pass (24)
+
+## Done summary
+Added "This was helpful" button to assistant messages using existing feedback system. Feedback is stored and can be used for agent memory/fine-tuning.
+
+### Files:
+- adapters/investigation_feedback/types.py - Added FEEDBACK_ASSISTANT_MESSAGE
+- entrypoints/api/routes/investigation_feedback.py - Added assistant_message target
+- features/assistant/AssistantMessage.tsx - Added thumbs up button
+- features/assistant/AssistantPanel.tsx - Pass sessionInvestigationId
+- lib/api/investigation-feedback.ts - Added target type + generic hook
+## Evidence
+- Commits:
+- Tests:
+- PRs:
diff --git a/.flow/tasks/fn-56.2.json b/.flow/tasks/fn-56.2.json
new file mode 100644
index 000000000..14529fb52
--- /dev/null
+++ b/.flow/tasks/fn-56.2.json
@@ -0,0 +1,30 @@
+{
+ "assignee": "bordumbb@gmail.com",
+ "claim_note": "",
+ "claimed_at": "2026-02-02T22:58:49.042925Z",
+ "created_at": "2026-02-02T22:01:48.692788Z",
+ "depends_on": [
+ "fn-56.7",
+ "fn-56.8"
+ ],
+ "epic": "fn-56",
+ "evidence": {
+ "commits": [
+ "7775de88"
+ ],
+ "files_created": [
+ "python-packages/dataing/src/dataing/agents/tools/local_files.py",
+ "python-packages/dataing/tests/unit/agents/tools/test_local_files.py"
+ ],
+ "test_count": 27,
+ "tests": [
+ "tests/unit/agents/tools/test_local_files.py"
+ ]
+ },
+ "id": "fn-56.2",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.2.md",
+ "status": "done",
+ "title": "Create local file reader tool with safety",
+ "updated_at": "2026-02-02T23:02:12.393355Z"
+}
diff --git a/.flow/tasks/fn-56.2.md b/.flow/tasks/fn-56.2.md
new file mode 100644
index 000000000..8068e1839
--- /dev/null
+++ b/.flow/tasks/fn-56.2.md
@@ -0,0 +1,97 @@
+# fn-56.2 Create local file reader tool with safety
+
+## Description
+Create a safe local file reader tool for the SelfDebugAgent with path allowlist and traversal protection.
+
+## File to Create
+`python-packages/dataing/src/dataing/agents/tools/local_files.py`
+
+## Implementation
+
+```python
+from pathlib import Path
+from pydantic_ai import RunContext
+from pydantic_ai.tools import Tool
+
+ALLOWED_DIRS = [
+ "python-packages/",
+ "frontend/",
+ "demo/",
+ "docs/",
+]
+
+ALLOWED_PATTERNS = [
+ "docker-compose*.yml",
+ "*.md",
+ "justfile",
+ "pyproject.toml",
+ "package.json",
+]
+
+BLOCKED_PATTERNS = [
+ ".env",
+ "*.pem",
+ "*.key",
+ "*secret*",
+ "*credential*",
+ "*password*",
+]
+
+async def read_local_file(ctx: RunContext[Path], file_path: str) -> str:
+ """Read a file from the Dataing repository.
+
+ Args:
+ file_path: Path relative to repository root.
+
+ Returns:
+ File contents (max 100KB) or error message.
+ """
+ # 1. Canonicalize path
+ # 2. Check against allowlist
+ # 3. Check against blocklist
+ # 4. Read and return (truncate if >100KB)
+ pass
+
+local_files_toolset = [Tool(read_local_file)]
+```
+
+## Security Requirements
+- MUST canonicalize paths before allowlist check (Path.resolve())
+- MUST reject any path containing `..` after resolution
+- MUST reject files matching blocked patterns
+- MUST limit file size to 100KB
+- MUST NOT follow symlinks outside allowed directories
+
+## References
+- Path validation pattern from practice-scout findings
+- SQL validator for pattern matching: `safety/validator.py`
+## Acceptance
+- [ ] `local_files.py` created with `read_local_file` tool
+- [ ] Path canonicalization implemented (prevents `../` traversal)
+- [ ] Allowlist enforced for directories
+- [ ] Blocklist enforced for sensitive files (.env, keys, secrets)
+- [ ] File size limit of 100KB
+- [ ] Symlinks outside allowed dirs rejected
+- [ ] Returns helpful error messages for blocked paths
+- [ ] Unit tests cover traversal attempts
+## Done summary
+Created local file reader tool with comprehensive safety features:
+
+**Security Features:**
+- Directory allowlist: python-packages/, frontend/, demo/, docs/
+- Root file patterns: docker-compose*.yml, *.md, pyproject.toml
+- Blocked patterns: .env, *.pem, *.key, *secret*, *credential*
+- Path traversal prevention via canonicalization
+- Symlink target validation
+- File size limit (100KB)
+
+**Tool Functions:**
+- `read_local_file` - Read file with safety checks and line-range support
+- `search_in_files` - Search pattern across repository files
+- `list_directory` - List files in directory
+
+27 unit tests covering all security requirements.
+## Evidence
+- Commits: 7775de88
+- Tests: tests/unit/agents/tools/test_local_files.py
+- PRs:
diff --git a/.flow/tasks/fn-56.3.json b/.flow/tasks/fn-56.3.json
new file mode 100644
index 000000000..3e3b00b42
--- /dev/null
+++ b/.flow/tasks/fn-56.3.json
@@ -0,0 +1,30 @@
+{
+ "assignee": "bordumbb@gmail.com",
+ "claim_note": "",
+ "claimed_at": "2026-02-02T23:44:43.250984Z",
+ "created_at": "2026-02-02T22:01:48.771211Z",
+ "depends_on": [
+ "fn-56.1",
+ "fn-56.11"
+ ],
+ "epic": "fn-56",
+ "evidence": {
+ "endpoints": [
+ "POST /sessions",
+ "GET /sessions",
+ "GET /sessions/{id}",
+ "POST /sessions/{id}/messages",
+ "GET /sessions/{id}/stream",
+ "DELETE /sessions/{id}",
+ "POST /sessions/{id}/export"
+ ],
+ "tests_failed": 0,
+ "tests_passed": 20
+ },
+ "id": "fn-56.3",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.3.md",
+ "status": "done",
+ "title": "Create assistant API routes with SSE streaming (routes/assistant.py)",
+ "updated_at": "2026-02-02T23:48:17.959030Z"
+}
diff --git a/.flow/tasks/fn-56.3.md b/.flow/tasks/fn-56.3.md
new file mode 100644
index 000000000..c67f366c2
--- /dev/null
+++ b/.flow/tasks/fn-56.3.md
@@ -0,0 +1,77 @@
+# fn-56.3 Create assistant API routes with SSE streaming
+
+## Description
+Create assistant API routes with SSE streaming support for the Dataing Assistant.
+
+## File Created
+`python-packages/dataing/src/dataing/entrypoints/api/routes/assistant.py`
+
+## Endpoints
+
+1. `POST /assistant/sessions` - Create new session
+ - Returns: `{session_id: str, investigation_id: str, created_at: datetime}`
+
+2. `GET /assistant/sessions` - List user's sessions
+ - Returns: `{sessions: [{id, created_at, last_activity, message_count}]}`
+
+3. `GET /assistant/sessions/{session_id}` - Get session details
+ - Returns: Full session with messages
+
+4. `POST /assistant/sessions/{session_id}/messages` - Send message
+ - Body: `{content: str}`
+ - Returns: `{message_id: str, status: "processing"}`
+
+5. `GET /assistant/sessions/{session_id}/stream` - SSE stream
+ - Query: `?last_event_id=N` for resumption
+ - Events: `text`, `tool_call`, `tool_result`, `complete`, `error`
+ - Heartbeat: 15 seconds
+
+6. `DELETE /assistant/sessions/{session_id}` - End session
+
+7. `POST /assistant/sessions/{session_id}/export` - Export session
+ - Query: `?format=json|markdown`
+
+## Implementation Details
+
+- Uses EventSourceResponse from sse-starlette
+- Streaming handlers forward text and tool calls to SSE queue
+- Background task processes messages asynchronously
+- Heartbeat sent every 15 seconds
+- Client disconnect detection via request.is_disconnected()
+- Audit logging for all tool calls
+- Pydantic models for all request/response schemas
+
+## Acceptance
+- [x] `assistant.py` created with all endpoints
+- [x] Sessions linked to investigations (each session IS an investigation)
+- [x] SSE streaming works with EventSourceResponse
+- [x] Heartbeat sent every 15 seconds
+- [x] `X-Accel-Buffering: no` header set
+- [x] Client disconnect detection via `request.is_disconnected()`
+- [x] Auth required on all endpoints
+- [x] Pydantic models for request/response schemas
+- [x] Export to JSON and Markdown formats
+- [x] Router registered in routes/__init__.py
+- [x] Unit tests pass: 20 tests
+
+## Done summary
+## Summary
+
+Created assistant API routes with full SSE streaming support.
+
+### Endpoints:
+1. POST/GET/DELETE /sessions - Session management
+2. POST /sessions/{id}/messages - Send messages
+3. GET /sessions/{id}/stream - SSE streaming
+4. POST /sessions/{id}/export - Export (JSON/Markdown)
+
+### Features:
+- Real-time streaming via EventSourceResponse
+- 15-second heartbeat, client disconnect detection
+- Audit logging for all tool calls
+- Full Pydantic model validation
+- 20 unit tests covering models and helpers
+## Evidence
+- Commits:
+- Tests:
+- PRs:
diff --git a/.flow/tasks/fn-56.4.json b/.flow/tasks/fn-56.4.json
new file mode 100644
index 000000000..7e52814c8
--- /dev/null
+++ b/.flow/tasks/fn-56.4.json
@@ -0,0 +1,21 @@
+{
+ "assignee": "bordumbb@gmail.com",
+ "claim_note": "",
+ "claimed_at": "2026-02-02T23:48:53.330056Z",
+ "created_at": "2026-02-02T22:01:48.851384Z",
+ "depends_on": [
+ "fn-56.3"
+ ],
+ "epic": "fn-56",
+ "evidence": {
+ "completed_in": "fn-56.3",
+ "dependency_injection": "per-request with tenant isolation",
+ "routes_registered": true
+ },
+ "id": "fn-56.4",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.4.md",
+ "status": "done",
+ "title": "Register routes and add dependencies",
+ "updated_at": "2026-02-02T23:49:23.117059Z"
+}
diff --git a/.flow/tasks/fn-56.4.md b/.flow/tasks/fn-56.4.md
new file mode 100644
index 000000000..7e6d0c31b
--- /dev/null
+++ b/.flow/tasks/fn-56.4.md
@@ -0,0 +1,52 @@
+# fn-56.4 Register routes and add dependencies
+
+## Description
+Register assistant routes and add dependency injection for DataingAssistant.
+
+## Status
+**Completed** - Routes registered in fn-56.3, dependencies implemented in assistant.py.
+
+## What Was Done
+
+### Route Registration (`routes/__init__.py`)
+```python
+from dataing.entrypoints.api.routes.assistant import router as assistant_router
+
+api_router.include_router(assistant_router) # Dataing Assistant chat API
+```
+
+### Dependency Injection (`routes/assistant.py`)
+The DataingAssistant is created per-request with tenant isolation:
+```python
+async def get_assistant(
+ auth: ApiKeyContext,
+ db: AppDatabase,
+) -> DataingAssistant:
+ return DataingAssistant(
+ api_key=settings.anthropic_api_key,
+ tenant_id=auth.tenant_id,
+ model=settings.llm_model,
+ )
+```
+
+**Note:** A singleton pattern is NOT appropriate for multi-tenancy. Each request creates
+an assistant instance scoped to the authenticated tenant. This matches the pattern used
+by other services like InvestigationService.
+
+## Acceptance
+- [x] `assistant_router` imported and registered in `__init__.py`
+- [x] `get_assistant()` creates per-request assistant with tenant isolation
+- [x] Uses ANTHROPIC_API_KEY from settings
+- [x] Uses LLM_MODEL from settings
+- [x] Tenant-scoped (not singleton) for multi-tenancy
+
+## Done summary
+## Summary
+
+Completed as part of fn-56.3. Routes registered in __init__.py, and get_assistant()
+helper creates per-request assistants with tenant isolation. No singleton needed
+for multi-tenant architecture.
+## Evidence
+- Commits:
+- Tests:
+- PRs:
diff --git a/.flow/tasks/fn-56.5.json b/.flow/tasks/fn-56.5.json
new file mode 100644
index 000000000..7e003c155
--- /dev/null
+++ b/.flow/tasks/fn-56.5.json
@@ -0,0 +1,25 @@
+{
+ "assignee": "bordumbb@gmail.com",
+ "claim_note": "",
+ "claimed_at": "2026-02-02T23:49:52.928183Z",
+ "created_at": "2026-02-02T22:01:48.935587Z",
+ "depends_on": [],
+ "epic": "fn-56",
+ "evidence": {
+ "eslint": "pass",
+ "files_created": [
+ "features/assistant/index.ts",
+ "features/assistant/AssistantWidget.tsx",
+ "features/assistant/AssistantPanel.tsx",
+ "features/assistant/AssistantMessage.tsx",
+ "features/assistant/useAssistant.ts"
+ ],
+ "typescript": "pass"
+ },
+ "id": "fn-56.5",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.5.md",
+ "status": "done",
+ "title": "Create frontend chat widget components",
+ "updated_at": "2026-02-02T23:54:15.514766Z"
+}
diff --git a/.flow/tasks/fn-56.5.md b/.flow/tasks/fn-56.5.md
new file mode 100644
index 000000000..2237c1070
--- /dev/null
+++ b/.flow/tasks/fn-56.5.md
@@ -0,0 +1,71 @@
+# fn-56.5 Create frontend chat widget components
+
+## Description
+Create frontend chat widget components: floating button, slide-in panel, message list, and chat hook.
+
+## Files Created
+
+### `features/assistant/index.ts`
+Exports all components and types.
+
+### `features/assistant/AssistantWidget.tsx`
+Main widget with floating button (bottom-20 right-4 z-50) and Sheet panel.
+
+### `features/assistant/AssistantPanel.tsx`
+Chat interface with:
+- Message history (auto-scroll to bottom)
+- Streaming message display
+- Textarea input with send button (Enter to send, Shift+Enter for newline)
+- Quick question suggestions for empty state
+- Error banner with "New Chat" option
+
+### `features/assistant/AssistantMessage.tsx`
+Message component with:
+- Avatar icons (User/Bot/Tool)
+- Role-based styling
+- Tool call indicators
+- Streaming spinner
+
+### `features/assistant/useAssistant.ts`
+React hook for chat state:
+- Session management (create, load, clear)
+- Session ID persisted in localStorage
+- Message history state
+- SSE subscription for streaming
+- Text and tool_call event handling
+- Error handling with retry
+
+## App Integration
+Updated `App.tsx` to include `` above the DemoToggle.
+
+## Acceptance
+- [x] `features/assistant/` directory created with 5 files
+- [x] Floating button positioned at `bottom-20 right-4 z-50`
+- [x] Sheet opens on button click
+- [x] Chat panel shows message history
+- [x] Streaming messages display as they arrive
+- [x] Textarea input with send button
+- [x] Session ID persisted in localStorage
+- [x] SSE subscription handles text, tool_call, complete events
+- [x] TypeScript strict mode passes
+- [x] ESLint passes
+- [x] Prettier formatting applied
+
+## Done summary
+## Summary
+
+Created Dataing Assistant frontend feature with floating chat widget.
+
+### Components:
+1. **AssistantWidget** - Floating button + Sheet panel
+2. **AssistantPanel** - Chat UI with input, messages, quick questions
+3. **AssistantMessage** - Individual message display with streaming support
+4. **useAssistant** - State hook with SSE streaming
+
+### Integration:
+- Widget added to App.tsx
+- Positioned above DemoToggle (bottom-20 right-4)
+## Evidence
+- Commits:
+- Tests:
+- PRs:
diff --git a/.flow/tasks/fn-56.6.json b/.flow/tasks/fn-56.6.json
new file mode 100644
index 000000000..a9399efde
--- /dev/null
+++ b/.flow/tasks/fn-56.6.json
@@ -0,0 +1,23 @@
+{
+ "assignee": "bordumbb@gmail.com",
+ "claim_note": "",
+ "claimed_at": "2026-02-02T23:56:34.348182Z",
+ "created_at": "2026-02-02T22:01:49.019252Z",
+ "depends_on": [
+ "fn-56.4",
+ "fn-56.5"
+ ],
+ "epic": "fn-56",
+ "evidence": {
+ "api_wrapper_created": true,
+ "eslint_passes": true,
+ "openapi_generated": true,
+ "typescript_passes": true
+ },
+ "id": "fn-56.6",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.6.md",
+ "status": "done",
+ "title": "Generate OpenAPI client and integrate widget into App",
+ "updated_at": "2026-02-03T00:00:16.475213Z"
+}
diff --git a/.flow/tasks/fn-56.6.md b/.flow/tasks/fn-56.6.md
new file mode 100644
index 000000000..930a5e631
--- /dev/null
+++ b/.flow/tasks/fn-56.6.md
@@ -0,0 +1,55 @@
+# fn-56.6 Generate OpenAPI client and integrate widget into App
+
+## Description
+Generate OpenAPI client and integrate AssistantWidget into App.tsx.
+
+## What Was Done
+
+### 1. Generated OpenAPI client
+```bash
+just generate-client
+```
+This created `frontend/app/src/lib/api/generated/assistant/assistant.ts` with:
+- `useCreateSessionApiV1AssistantSessionsPost`
+- `useListSessionsApiV1AssistantSessionsGet`
+- `useGetSessionApiV1AssistantSessionsSessionIdGet`
+- `useDeleteSessionApiV1AssistantSessionsSessionIdDelete`
+- `useSendMessageApiV1AssistantSessionsSessionIdMessagesPost`
+- `useExportSessionApiV1AssistantSessionsSessionIdExportPost`
+
+### 2. Created API wrapper (`lib/api/assistant.ts`)
+```typescript
+export const useCreateAssistantSession = ...
+export const useAssistantSessions = ...
+export const assistantApi = {
+ createSession, getSession, sendMessage, getStreamUrl
+}
+```
+
+### 3. Updated useAssistant hook
+Refactored to use the generated API client instead of raw fetch calls.
+
+### 4. Widget integration (done in fn-56.5)
+`` is already rendered in App.tsx above DemoToggle.
+
+## Acceptance
+- [x] OpenAPI client regenerated: `just generate-client`
+- [x] `lib/api/assistant.ts` wrapper created
+- [x] `AssistantWidget` imported and rendered in App.tsx (done in fn-56.5)
+- [x] Widget visible on all authenticated pages
+- [x] TypeScript compilation passes
+- [x] ESLint passes with no errors
+
+## Done summary
+## Summary
+
+Generated OpenAPI client for assistant endpoints and created API wrapper.
+Updated useAssistant hook to use generated client for type-safe API calls.
+
+### Files:
+- lib/api/assistant.ts - API wrapper with cleaner hook names
+- lib/api/generated/assistant/assistant.ts - Generated client
+## Evidence
+- Commits:
+- Tests:
+- PRs:
diff --git a/.flow/tasks/fn-56.7.json b/.flow/tasks/fn-56.7.json
new file mode 100644
index 000000000..946e10024
--- /dev/null
+++ b/.flow/tasks/fn-56.7.json
@@ -0,0 +1,28 @@
+{
+ "assignee": "bordumbb@gmail.com",
+ "claim_note": "",
+ "claimed_at": "2026-02-02T22:47:40.803413Z",
+ "created_at": "2026-02-02T22:43:38.619900Z",
+ "depends_on": [],
+ "epic": "fn-56",
+ "evidence": {
+ "commits": [
+ "82fd221a"
+ ],
+ "files_created": [
+ "python-packages/dataing/src/dataing/agents/tools/__init__.py",
+ "python-packages/dataing/src/dataing/agents/tools/registry.py",
+ "python-packages/dataing/tests/unit/agents/tools/__init__.py",
+ "python-packages/dataing/tests/unit/agents/tools/test_registry.py"
+ ],
+ "tests": [
+ "python-packages/dataing/tests/unit/agents/tools/test_registry.py"
+ ]
+ },
+ "id": "fn-56.7",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.7.md",
+ "status": "done",
+ "title": "Create unified tool registry",
+ "updated_at": "2026-02-02T22:51:20.897653Z"
+}
diff --git a/.flow/tasks/fn-56.7.md b/.flow/tasks/fn-56.7.md
new file mode 100644
index 000000000..463557bf3
--- /dev/null
+++ b/.flow/tasks/fn-56.7.md
@@ -0,0 +1,21 @@
+# fn-56.7 Create unified tool registry
+
+## Description
+TBD
+
+## Acceptance
+- [ ] TBD
+
+## Done summary
+Created unified tool registry for Dataing Assistant:
+
+- `ToolCategory` enum: FILES, GIT, DOCKER, LOGS, DATASOURCE, ENVIRONMENT
+- `ToolConfig` dataclass for tool metadata (name, category, description, priority)
+- `TenantToolConfig` for per-tenant enable/disable overrides
+- `ToolRegistry` class with methods: register, get_tool, get_tools_by_category, is_tool_enabled, get_enabled_tools, enable_tool, disable_tool
+- Singleton pattern via `get_default_registry()`
+- 27 unit tests covering all functionality
+## Evidence
+- Commits: 82fd221a
+- Tests: python-packages/dataing/tests/unit/agents/tools/test_registry.py
+- PRs:
diff --git a/.flow/tasks/fn-56.8.json b/.flow/tasks/fn-56.8.json
new file mode 100644
index 000000000..1ff6e86fd
--- /dev/null
+++ b/.flow/tasks/fn-56.8.json
@@ -0,0 +1,35 @@
+{
+ "assignee": "bordumbb@gmail.com",
+ "claim_note": "",
+ "claimed_at": "2026-02-02T22:51:34.224400Z",
+ "created_at": "2026-02-02T22:43:38.701427Z",
+ "depends_on": [],
+ "epic": "fn-56",
+ "evidence": {
+ "commits": [
+ "4cf6b939"
+ ],
+ "files_created": [
+ "python-packages/dataing/src/dataing/core/parsing/__init__.py",
+ "python-packages/dataing/src/dataing/core/parsing/text_parser.py",
+ "python-packages/dataing/src/dataing/core/parsing/yaml_parser.py",
+ "python-packages/dataing/src/dataing/core/parsing/json_parser.py",
+ "python-packages/dataing/src/dataing/core/parsing/log_parser.py",
+ "python-packages/dataing/src/dataing/core/parsing/data_parser.py"
+ ],
+ "test_count": 58,
+ "tests": [
+ "tests/unit/core/parsing/test_text_parser.py",
+ "tests/unit/core/parsing/test_yaml_parser.py",
+ "tests/unit/core/parsing/test_json_parser.py",
+ "tests/unit/core/parsing/test_log_parser.py",
+ "tests/unit/core/parsing/test_data_parser.py"
+ ]
+ },
+ "id": "fn-56.8",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.8.md",
+ "status": "done",
+ "title": "Create centralized file parsers (core/parsing/)",
+ "updated_at": "2026-02-02T22:58:37.329162Z"
+}
diff --git a/.flow/tasks/fn-56.8.md b/.flow/tasks/fn-56.8.md
new file mode 100644
index 000000000..73df221f4
--- /dev/null
+++ b/.flow/tasks/fn-56.8.md
@@ -0,0 +1,22 @@
+# fn-56.8 Create centralized file parsers (core/parsing/)
+
+## Description
+TBD
+
+## Acceptance
+- [ ] TBD
+
+## Done summary
+Created centralized file parsers in core/parsing/:
+
+- **TextParser**: UTF-8 text files with line-range chunking, search, and encoding fallback
+- **YamlParser**: Safe YAML loading with multi-document support and format_summary for LLMs
+- **JsonParser**: JSON parsing with schema inference and formatted summaries
+- **LogParser**: Log files with level detection, timestamp parsing, JSON log support
+- **DataParser**: CSV/Parquet sampling without full memory load
+
+All parsers include size limits, consistent error handling, and helpful summaries for LLM consumption.
+## Evidence
+- Commits: 4cf6b939
+- Tests: tests/unit/core/parsing/test_text_parser.py, tests/unit/core/parsing/test_yaml_parser.py, tests/unit/core/parsing/test_json_parser.py, tests/unit/core/parsing/test_log_parser.py, tests/unit/core/parsing/test_data_parser.py
+- PRs:
diff --git a/.flow/tasks/fn-56.9.json b/.flow/tasks/fn-56.9.json
new file mode 100644
index 000000000..4b094b131
--- /dev/null
+++ b/.flow/tasks/fn-56.9.json
@@ -0,0 +1,31 @@
+{
+ "assignee": "bordumbb@gmail.com",
+ "claim_note": "",
+ "claimed_at": "2026-02-02T23:02:29.684094Z",
+ "created_at": "2026-02-02T22:43:38.781579Z",
+ "depends_on": [
+ "fn-56.7"
+ ],
+ "epic": "fn-56",
+ "evidence": {
+ "files_created": [
+ "python-packages/dataing/src/dataing/agents/tools/log_providers/base.py",
+ "python-packages/dataing/src/dataing/agents/tools/log_providers/local.py",
+ "python-packages/dataing/src/dataing/agents/tools/log_providers/docker.py",
+ "python-packages/dataing/src/dataing/agents/tools/log_providers/cloudwatch.py",
+ "python-packages/dataing/src/dataing/agents/tools/log_providers/__init__.py",
+ "python-packages/dataing/tests/unit/agents/tools/log_providers/__init__.py",
+ "python-packages/dataing/tests/unit/agents/tools/log_providers/test_base.py",
+ "python-packages/dataing/tests/unit/agents/tools/log_providers/test_local.py"
+ ],
+ "pre_commit_passed": true,
+ "tests_failed": 0,
+ "tests_passed": 24
+ },
+ "id": "fn-56.9",
+ "priority": null,
+ "spec_path": ".flow/tasks/fn-56.9.md",
+ "status": "done",
+ "title": "Create log provider interface and implementations",
+ "updated_at": "2026-02-02T23:10:38.660807Z"
+}
diff --git a/.flow/tasks/fn-56.9.md b/.flow/tasks/fn-56.9.md
new file mode 100644
index 000000000..7e7d2f3db
--- /dev/null
+++ b/.flow/tasks/fn-56.9.md
@@ -0,0 +1,46 @@
+# fn-56.9 Create log provider interface and implementations
+
+## Description
+TBD
+
+## Acceptance
+- [ ] TBD
+
+## Done summary
+## Summary
+
+Implemented log provider interface with three provider implementations:
+
+1. **LocalFileLogProvider** - Reads logs from local filesystem with:
+ - Pattern filtering on message AND raw line
+ - Time-based filtering
+ - Pagination support with proper truncation logic
+ - Rotation detection
+
+2. **DockerLogProvider** - Reads logs from Docker containers with:
+ - Container listing and status
+ - Log level detection
+ - Timestamp parsing
+
+3. **CloudWatchLogProvider** - Optional provider for AWS CloudWatch Logs with:
+ - IAM role authentication
+ - Log group/stream listing
+ - Filter patterns
+
+Fixed bugs:
+- Pattern filter now checks both message and raw line (fixes level-only searches like "ERROR")
+- Truncation logic now tracks last processed line correctly
+- Added `_matches_pattern` helper for consistent search behavior
+
+## Files Changed
+- `agents/tools/log_providers/base.py` - Protocol, base class, helper method
+- `agents/tools/log_providers/local.py` - Local file provider
+- `agents/tools/log_providers/docker.py` - Docker provider
+- `agents/tools/log_providers/cloudwatch.py` - CloudWatch provider
+- `agents/tools/log_providers/__init__.py` - Re-exports
+- `tests/unit/agents/tools/log_providers/test_base.py` - Base tests
+- `tests/unit/agents/tools/log_providers/test_local.py` - Local provider tests
+## Evidence
+- Commits:
+- Tests:
+- PRs:
diff --git a/demo/docker-compose.demo.yml b/demo/docker-compose.demo.yml
index 2f7cc04f3..4abaa0764 100644
--- a/demo/docker-compose.demo.yml
+++ b/demo/docker-compose.demo.yml
@@ -13,6 +13,33 @@
# 3. Run investigation on the connected datasource
services:
+ # Override API to mount repo files for assistant file reading
+ api:
+ volumes:
+ # Mount repo root for assistant file reading
+ # PWD is set by 'just demo' or defaults to current directory
+ - ${PWD:-.}/demo:/repo/demo:ro
+ - ${PWD:-.}/python-packages:/repo/python-packages:ro
+ - ${PWD:-.}/frontend:/repo/frontend:ro
+ - ${PWD:-.}/docs:/repo/docs:ro
+ # Docker socket for container introspection
+ - /var/run/docker.sock:/var/run/docker.sock:ro
+ environment:
+ DATAING_REPO_ROOT: /repo
+
+ # Override worker to mount repo files for assistant file reading (Temporal agent)
+ worker:
+ volumes:
+ # Mount repo root for assistant file reading
+ # PWD is set by 'just demo' or defaults to current directory
+ - ${PWD:-.}/demo:/repo/demo:ro
+ - ${PWD:-.}/python-packages:/repo/python-packages:ro
+ - ${PWD:-.}/frontend:/repo/frontend:ro
+ - ${PWD:-.}/docs:/repo/docs:ro
+ # Docker socket for container introspection
+ - /var/run/docker.sock:/var/run/docker.sock:ro
+ environment:
+ DATAING_REPO_ROOT: /repo
# Demo PostgreSQL with DuckDB integration for analytics
# Uses pg_duckdb extension - real PostgreSQL with DuckDB query engine
# https://github.com/duckdb/pg_duckdb
diff --git a/demo/fixtures/baseline/manifest.json b/demo/fixtures/baseline/manifest.json
index e97f6dec0..b93ae36e9 100644
--- a/demo/fixtures/baseline/manifest.json
+++ b/demo/fixtures/baseline/manifest.json
@@ -1,7 +1,7 @@
{
"name": "baseline",
"description": "Clean e-commerce data with no anomalies",
- "created_at": "2026-02-01T22:23:30.928901Z",
+ "created_at": "2026-02-02T02:25:55.796747Z",
"simulation_period": {
"start": "2026-01-08",
"end": "2026-01-14"
diff --git a/demo/fixtures/duplicates/manifest.json b/demo/fixtures/duplicates/manifest.json
index e2330f230..3bf8d1b63 100644
--- a/demo/fixtures/duplicates/manifest.json
+++ b/demo/fixtures/duplicates/manifest.json
@@ -1,7 +1,7 @@
{
"name": "duplicates",
"description": "Retry logic creates duplicate order_items",
- "created_at": "2026-02-01T22:23:33.696510Z",
+ "created_at": "2026-02-02T02:26:03.807337Z",
"simulation_period": {
"start": "2026-01-08",
"end": "2026-01-14"
@@ -48,56 +48,56 @@
],
"ground_truth": {
"affected_order_ids": [
- "b8442c42-598a-4114-bcf6-52889371384c",
- "1b25227b-007d-4597-be6c-bc9b6a234bee",
- "168efc49-004b-4b2b-a5bc-54b630cd6260",
- "e24fd01b-b477-4150-84d4-5ace57da0316",
- "cfd604f6-6d3b-4534-a4f8-d4df9661629d",
- "bddd3d2c-2739-4a4a-998e-bd4743529b00",
- "41a95582-b6aa-4a95-91cb-1b6258d0618d",
- "49530e30-4ba2-47d6-a236-04beb7943892",
- "55f9489e-1bc1-42a8-8922-5a542bde7271",
- "667853c4-0228-420d-abe9-e9024436c1f5",
- "f0ac51c5-32af-48c1-bdee-4b43ca8f440f",
- "c87a91fe-3e91-4d4a-b12f-902f5bd84f8c",
- "9a31b301-250c-4177-8004-54e071b15c0b",
- "f95009b0-bba1-4745-8761-0fc836c85147",
- "789d77aa-dee5-44a4-96f9-9ff53727b1c6",
- "fdfd4efe-f0da-4497-a516-28481302d0c9",
- "dde5ca8b-2f9c-4506-99ea-e2167979b3b0",
- "2a9da0fe-3ca8-4637-b28f-3e14ad05a79b",
- "31d6f307-564e-4e2b-80c0-018d379dbfea",
- "2f90592e-3543-46d7-8963-c5acb5055819",
- "f03969ad-724a-4bb0-a383-176b0284dcf7",
- "26593879-2a51-4a8f-8545-fe0c36f509ff",
- "f11b9e5e-63ef-4c15-ab01-adf72b185ebb",
- "8681ddf9-2ab6-4de4-8ee5-d289b2dea07a",
- "ffaf74f5-caa2-4ace-8aff-05b32b162f4a",
- "8fa94019-cfc7-4c72-9dcb-d9f5d2dcbe54",
- "e1ec0dbf-d79d-4181-bb37-0967ee8f8058",
- "b18ecd6c-2297-49f5-ac3f-97a3c6256332",
- "7ea0bf19-de82-4adc-9353-deb1c53648e1",
- "58715e9e-3abc-430d-84d0-cfb110a434a4",
- "9637c1f3-0803-4f78-ac70-de7ecdb72e5d",
- "6e29ee88-342a-4392-a9be-b222635ef119",
- "5f4f6c8b-6a3b-4d2f-a6c4-14399efb34bc",
- "0ef5d034-ed48-452b-aa7b-3db063c14bbb",
- "b07a0dc3-c597-4242-9a0e-0c10331966f8",
- "35d6454a-d338-4e3a-a608-06fb020a8c8c",
- "d150026d-dedd-4b24-9e27-c9d1da989ef4",
- "11713dbf-2abd-4179-9fde-b0772276b7da",
- "757b1afa-3f0b-4598-a056-92c04788fae8",
- "9bc8d67a-9a0c-485f-a442-687b49450356",
- "f8767eef-23d5-4d95-b5ad-d0dc9d67686c",
- "48c70fd5-01af-430e-b669-8489f6914881",
- "9a724d63-9a29-486d-8c5b-95d64bad7a6a",
- "d8937102-640b-47ab-a2d2-33585fef82d2",
- "161b6a51-17fb-4645-ba17-f61bf1c7d4f4",
- "f6afa085-f0df-43dc-97af-9a506f1fde07",
- "de22257d-104a-4ba8-aa70-19cf68cc4a4d",
- "74430204-f467-47c4-ad93-f4988c2dc563",
- "390a483a-7552-4603-9071-1f9f4456d093",
- "414180c5-3f03-453f-b303-f0e52cd51572"
+ "210828a2-cf31-4732-b244-d118001b8671",
+ "b6f178bc-e934-4456-9e2f-4ea2221de71d",
+ "abd40727-116d-470e-9eb6-4a03fb5f8b81",
+ "bfa54f05-2890-426b-88ef-0db9381321a5",
+ "bb4a77b6-aa6b-4584-bfa2-cb2b654c6a49",
+ "da203267-7066-4486-b256-a789bd113637",
+ "e7899412-9880-4705-84c1-f0b328fc0ee2",
+ "77617e2d-94c4-4005-82bf-2d25819631df",
+ "00a6940e-2515-4c06-8d6a-a72bd74fa36d",
+ "c47d0cb5-0e8a-4c92-b5d4-16e41da15362",
+ "ab93e414-d5c3-4635-ad93-b1fa5af23b69",
+ "cb842dcf-4fff-4289-92ff-f71f0157f349",
+ "5e6f01cc-6c4c-44f3-8a28-909605eeeeb8",
+ "78e980dc-23a0-4b31-a3af-084dc6a656fa",
+ "95596b6d-f086-4e04-802c-34af0f689ff5",
+ "0d0bb801-7185-4b8e-8623-a83c923a37fa",
+ "c4274b7a-02f0-4806-b710-f7e3be2ca8c0",
+ "a64350ad-6f50-4926-a4e3-871312749ff7",
+ "47c3b420-c831-4b73-9038-640477590c7e",
+ "7d23cc3b-825a-45a2-97e2-41e2fe55dc92",
+ "141229c1-dfcb-4f06-8d01-db0108252395",
+ "4838c36f-a7e7-48dd-8f9e-cdd69ce344f1",
+ "1984c8c9-e7f0-4241-8df0-13cf5f5c9481",
+ "f9c372ee-8174-4b60-8661-d394d0f4b6f2",
+ "81ec17e1-83c1-4eb3-9bb3-9e5045535a4a",
+ "7d4c2f77-7b18-4474-828c-0120a272b415",
+ "77684a4d-6d03-4207-a4b6-e5075a8416e7",
+ "088413e2-04e5-4b75-ac86-15e09ccd0d74",
+ "2a57a25b-6681-4d10-b6dd-23b4f6557007",
+ "d26d2720-bb2c-4532-b640-40d84eb537a7",
+ "7c325af6-f815-468b-b531-ca8809b0d340",
+ "4b921b18-0031-4957-abd0-dd2724f9e043",
+ "0b23e3a5-d05d-4d0f-8eb7-3251921cc803",
+ "d1ec0fb4-5830-441c-b51c-db3560a10002",
+ "39ce1cdb-b977-494e-92dd-78d0d28116f4",
+ "d908cdbf-db64-40e9-ab70-e9374372f39e",
+ "fffb4922-0388-4247-bc1c-3dc6a7a05d3e",
+ "9f381026-a7e0-413a-bd4c-5ef6db25a6f8",
+ "b9dc9cbd-36f5-4dfe-8308-da1067f9ce54",
+ "21d35a2c-9b39-401b-aea4-9edb9c472887",
+ "f2a70a85-dc8e-4c63-8905-4ab39682657b",
+ "f1d357d8-4dfe-44f0-971b-155875bd8ad2",
+ "1a133370-f77a-4467-8889-ebce02435d28",
+ "4a60d818-eb9c-4d79-b6f5-50ae30571a4b",
+ "e4289200-a9e7-47f4-a196-369a51ef06f6",
+ "60f6a3e8-c696-4740-ab4a-9f895841d1f5",
+ "9aef6619-c372-402d-b144-159040e1fbee",
+ "059d0f48-35b7-4b6a-8b85-3596f497f8e7",
+ "8b3069e5-e88b-42a0-83e8-ad8e3df9aa0c",
+ "5bd4a4df-050a-4418-ba95-a7dc57994d6c"
],
"affected_order_count": 81,
"duplicate_items": 84
diff --git a/demo/fixtures/late_arriving/manifest.json b/demo/fixtures/late_arriving/manifest.json
index dcbbea14a..4d91e01bc 100644
--- a/demo/fixtures/late_arriving/manifest.json
+++ b/demo/fixtures/late_arriving/manifest.json
@@ -1,7 +1,7 @@
{
"name": "late_arriving",
"description": "Mobile app queues events offline, batch uploaded later",
- "created_at": "2026-02-01T22:23:35.087147Z",
+ "created_at": "2026-02-02T02:26:06.908134Z",
"simulation_period": {
"start": "2026-01-08",
"end": "2026-01-14"
diff --git a/demo/fixtures/null_spike/manifest.json b/demo/fixtures/null_spike/manifest.json
index 8bd789ee7..c417358ce 100644
--- a/demo/fixtures/null_spike/manifest.json
+++ b/demo/fixtures/null_spike/manifest.json
@@ -1,7 +1,7 @@
{
"name": "null_spike",
"description": "Mobile app bug causes NULL user_id in orders",
- "created_at": "2026-02-01T22:23:31.794763Z",
+ "created_at": "2026-02-02T02:25:57.814319Z",
"simulation_period": {
"start": "2026-01-08",
"end": "2026-01-14"
@@ -64,106 +64,106 @@
],
"ground_truth": {
"affected_order_ids": [
- "b688ab17-46f9-4441-9714-ed69c62db62d",
- "222a798c-b5a5-4d3d-bed6-d03ecbf88285",
- "3c766d1d-fa1e-4118-8f59-aff2d9b1ffe5",
- "27d1c43a-6243-40e0-9a86-4e1dc9eb807c",
- "c9e1f49d-cae6-475a-8568-1d0f7acc8be5",
- "09e35fe5-6b9c-43c8-a6f6-2149f5895992",
- "e38eecc9-e24f-4b52-b1d2-e592328f3962",
- "6324d705-110a-4d02-89ef-c8225a739ffc",
- "6a48cd61-49c4-4ab2-9c06-c1ca5cb634e1",
- "3ea2bcc5-e695-431b-871c-972083904ba6",
- "ff9cbb73-c129-4b49-ae59-f3e4d0f074f6",
- "e7899ea9-6527-42fd-86fb-7a519e276245",
- "89c9a5af-7d5e-451d-8c0a-e37ac69d1c51",
- "59e97154-9794-4b54-95ce-9bbc875ab4a0",
- "48afa17a-70be-46c4-920c-d2ee8f7e78d0",
- "7f3ae3aa-c108-4045-aa3b-28da46c0990d",
- "dff5a6c5-6ffb-43bd-b5bd-c933d7a0abd5",
- "4f684893-5ba3-4b54-b335-295450dc3e72",
- "49384b3e-9a7e-403f-a671-b73a0c00696f",
- "94025178-7389-4b9b-8c1f-782b0a79bf16",
- "71018331-e4cc-40e1-97c5-6f8edaa25394",
- "dd28754d-d3c5-4957-bde7-2038ff8900e5",
- "00e73881-17ef-4459-9a0d-2ffa3f6c72ef",
- "c4564a14-fb0c-4fa2-b9be-0b6409b89915",
- "29c9f54a-8a89-4509-b130-553c2291ede8",
- "bc12695b-e6ee-4aaf-9925-d07bf09466b1",
- "8a7e3ae2-0d7c-48f5-b4bc-b46840ffc2ed",
- "3aa27b1f-b7df-467a-bfe7-6d81e59ba722",
- "ad05e20e-52b6-4883-a855-2f3430ea9ed1",
- "76a5423c-04c4-4566-8978-8e920a8c623a",
- "62cd2cea-3234-40fb-9813-f44712af540f",
- "f2af95c4-5929-4d96-a341-64218f9d90fb",
- "0f269ed5-95df-4d89-a891-f8058a24ad64",
- "8908ce10-6900-4663-80f2-5582dbcdab0e",
- "4f0adc62-a433-4812-ad2b-9ba2ba60b215",
- "de8f8781-1e12-4f14-87df-948ae0e55eb3",
- "1b5686e2-e7a6-4094-b44d-a91cb6423178",
- "557eab97-80b1-4649-8426-a0a8abc7ad08",
- "c4f3a4d4-7431-40d8-aeb5-029c212316b0",
- "65c18152-0826-4acc-bbc1-9eb754d7b149",
- "f235c3ef-8bc5-470b-98ff-97f6c9a2e540",
- "e857bc39-17d0-45bb-96b6-dd7bcfdeac1f",
- "424d79fb-5412-4c62-9c26-518b290bd627",
- "edf4e95b-669c-438a-a568-5afb0f002e58",
- "b8e19ef8-113b-4f86-8281-30f6656e0cc0",
- "af723078-ecd2-4dfa-8648-93769e203728",
- "b722e3a8-9a8c-47c3-9479-fe6fcb5ae39d",
- "8527c3aa-dd32-4055-9360-5a452d08e953",
- "6aa1ff4f-2920-44ab-bbed-f12ece875821",
- "978fee78-489b-40b6-b198-bb582538bd02",
- "69194003-e741-4964-915d-b8a84e805224",
- "320449db-fd32-405a-a1ac-b5d0a801f959",
- "116cfa9e-e96c-47e5-8674-315a8cd93e00",
- "1ac0619c-a463-4302-b895-545743550634",
- "0a8b3da0-8eaf-4457-9aa9-1683617e7eb5",
- "2b43676b-b8e8-46e3-89d9-e2b623b4796c",
- "569a5f66-aba7-40fc-b5bc-54efb28da82e",
- "806e6135-579e-41fd-b1cd-5a05d5162a01",
- "5d7ceaf1-0f26-46e2-b910-f9e88ddf514c",
- "8c250f6e-16e1-41f6-87f9-ff9987aa0f4a",
- "8ba2eab3-cfeb-44f6-b1c4-972919faeedb",
- "0b2e78d1-e4cb-46a1-90b3-5371330ff500",
- "eb657602-843b-4488-b04d-1f27f1a72854",
- "6356d69c-57af-4edf-b493-616e53d26de6",
- "55c3d05a-5896-4497-b192-69e77bc45e7a",
- "df9d0556-647b-4acd-b51d-1e134d13be2e",
- "19f4c128-b849-42a8-a0ef-2f0bc9140044",
- "0223b211-6249-48d8-ae4e-ed4e6323818a",
- "9bef1760-70de-43e4-94d1-f9768e0dbd3b",
- "3742955d-9290-4464-8cf0-cbf8f9055e6c",
- "67e6d006-f3c4-4a8f-b85b-51679e965680",
- "1fa4f148-607f-4ff7-96a5-2616425e4d14",
- "42790d83-f00a-42db-a485-c101030bfe45",
- "b6162c76-943e-4361-85f6-228c6f77525f",
- "293d37e0-7808-4bcd-a6ab-ee72c6e991c2",
- "a29e6502-8952-4043-a5c2-9b8326415659",
- "ca231126-041d-4289-bbad-c616d7a64b51",
- "9231416a-46ec-4178-a02d-8273824055a8",
- "3675185b-5116-4a0d-a29b-a54b7e436bf3",
- "c613442c-0821-4681-8bc9-a65e0578b933",
- "6ff71e5f-4ff4-4524-97fb-88b7ce8f8b29",
- "c571f97e-7483-411f-a00d-b0dfd546eed1",
- "83df91c2-ce90-4cd5-8ae3-330576dd6dea",
- "a5c4c809-eb89-4e20-a989-0ba4989779e8",
- "4d3bf484-0498-4b91-832f-400890defdf9",
- "14d0d13f-b789-43c3-81a7-d3ebb942f35c",
- "cd087048-1ab4-4411-8354-75d7ee75cc90",
- "5d27fa58-25fe-4fe0-acf5-49e7ec42019d",
- "3d9e434e-9b4b-4174-93b5-c31aadd98983",
- "d385975e-5d63-4f5c-9ec9-53792718800c",
- "296a2f2d-6f52-4461-8dbe-cb34fd6df659",
- "64a97d60-9fe2-4433-b433-ad713a160980",
- "908a2cc4-c75a-4566-ac61-6b98feafd4c8",
- "21bbe94a-52e3-4a28-a7c8-22056b824a92",
- "a2c1c751-e151-454b-87d2-09583e7da070",
- "f2cf8ef5-4c48-4e8b-98f1-38ea3bcede86",
- "a145f4b5-cac3-45fa-8634-dc2420f0bb42",
- "da60fd06-934e-4e7b-82cf-6fe10993c3f4",
- "9d004671-afb1-412f-a7cf-bc5c51029b15",
- "74dcfdba-7ed3-4da6-8d74-247c7a091d5f"
+ "9bd62fb1-efe9-4672-94af-657a23deaffc",
+ "ef48fc1e-718b-42bc-998b-64958ce7ba68",
+ "0cf82c21-258f-44ef-a643-f1ed0c9accb9",
+ "5e2b078c-fea1-4a9c-bcb5-66133c93bad5",
+ "a8222d7a-bc0d-4e4d-97de-baad8222e4dc",
+ "86753645-6595-4106-9768-1901aa531583",
+ "122e39ca-0c7b-4a1a-a308-62c3de0610cb",
+ "1f3f39fa-fc36-4365-b5dc-63c7546af5f3",
+ "acb53253-e005-4b21-93e2-c77dc967d27e",
+ "65970a72-93b1-4723-a248-d8908a9b1ba5",
+ "fccec367-5a4e-4d13-830b-ae1c021d4ca5",
+ "68a396f2-80bf-4dd8-8d99-bceef1c5dee1",
+ "99fd2506-9ca6-42fa-b45a-31b702775806",
+ "6e3c1b5a-59f6-4994-8a1c-9127dbbb77d6",
+ "c1d13afa-2af0-426e-8852-d9a1558318a8",
+ "70f1f2fd-0ba7-4319-93f6-b8232758fe8d",
+ "8f52e185-64ad-40b5-bc11-1966a53f32cf",
+ "eff83042-5937-4147-a080-488c422c102f",
+ "6839db6f-8fad-4547-b53c-d4270bf6dc95",
+ "9be0b9d6-43a0-4251-b2d8-9f2d98d6ad80",
+ "654e583d-aaf9-4d08-868c-685a1d540b2b",
+ "ddc8e975-daa3-44a6-81a9-d81787d6a77c",
+ "c784495e-005b-485b-88e4-191b1e59a2cb",
+ "6742c4ed-edd7-45aa-a67a-8908b5e71604",
+ "51fbaeac-3504-46e6-9cbb-bbba57c452dd",
+ "2a40a796-4fd0-49ae-9321-d9368349d85e",
+ "239a5569-ca70-4966-bc8c-840b824a5763",
+ "37b186e4-2a3c-4bb9-8e8b-2da002cbc4b9",
+ "ae8b1506-e856-43dd-b798-b94bf56ed133",
+ "8afd203d-cd2e-48f3-bcd9-045e693763b5",
+ "3cd56a07-ad06-4a8c-a05b-fe913fe557b9",
+ "c052b5c4-169f-4a0a-84ae-e315e73f4f8d",
+ "f43debe2-2a66-4ca5-98f7-ec8921d481d8",
+ "d347a7bb-f83e-42e0-8b7f-43f74156c521",
+ "4900c072-2e53-49cb-b970-be6b051f2bd6",
+ "a3131b98-f020-455a-b859-b3a2c43ce51e",
+ "3ba52b84-7c59-4172-8f17-eeac832c3cf9",
+ "e570db42-32d5-492a-abb4-1b8322891b55",
+ "19aa9bf5-5727-4e07-85a5-5ded73062a7c",
+ "ade3d6b4-80ab-4608-934e-f00f5d1a79cc",
+ "cf743945-7a90-4c96-b539-6b7f9dfe163f",
+ "d0593a0f-b41d-4ba0-8f66-15e79136d608",
+ "848c22b7-1eae-41f8-8a41-db57ccbfd86f",
+ "97a8e835-c8cb-46a4-be64-1f89c40cdd0e",
+ "a4b7487c-8ca2-4a42-8c39-eba72bda48e6",
+ "18022d03-f0cf-4169-856a-ac4c11c92dc0",
+ "20dbc45a-532e-4770-aacb-5a1e545c6742",
+ "2ba9c0b7-32c2-46b4-bec5-b8ccd9cd9613",
+ "df16319d-ee71-428e-a85d-165c8f179dd2",
+ "60d83e70-cd35-4883-b232-e4cdda3258f3",
+ "2f4118be-8198-4cfe-8eb6-550e5093ba10",
+ "a6e85353-a741-4f9f-b598-65a17de11d1b",
+ "ed3e934a-a704-4a1e-a300-60950f4147b7",
+ "1555adfa-1155-462e-9c57-5d5124eebe47",
+ "84ebe0b1-a19e-496d-a30a-18604b83e5b8",
+ "54ab361a-6c89-41f7-9da5-45923214d881",
+ "13eb13b8-2f26-4d03-a68a-02193e6a8dd7",
+ "fe546de6-3a27-4255-bad8-ceb5097c6573",
+ "f9b815e9-c6c7-44e2-baf3-7785f4a83c4d",
+ "ad03898b-589b-4eeb-bd3f-9a3402821221",
+ "8c22d7f1-40f8-4a41-b77c-5905378331c8",
+ "e01f686e-db2d-4ad5-8916-355ed7dc32d4",
+ "56cdd4ec-9ad5-40e5-ba94-11faf13731dd",
+ "18e9f574-045b-442e-8731-f1ddb981ca02",
+ "eeeb2ed4-832e-48d8-b2f3-4231f7b57788",
+ "19150f30-6259-429a-8b60-961bcb28b524",
+ "c5e9553b-03d5-4cbb-925d-fc7bf7c9912f",
+ "aee23b63-5ff9-432c-b5d2-c71b6064a56a",
+ "b7631907-40b8-46d8-840c-53e45750bb11",
+ "8a2c9773-ea61-4ce6-934e-86a2ae9c0e27",
+ "acb8b7f2-411f-4b34-a96f-4c21d0f9bf1f",
+ "8c481c3b-1beb-4851-b266-9bdb053fdde7",
+ "2b9f6c73-4bda-4bab-bc9f-a00fb7b55176",
+ "d5b90899-654b-4262-8263-c9ddd075747c",
+ "438c46c3-13fb-488d-a4c5-1e4b7b5a4965",
+ "70954d3d-62ae-4e5f-8b78-e05ee2f04f78",
+ "4dd4e464-5d86-4875-ad5c-782b669c86d8",
+ "5d637246-b288-46c6-80c2-f4145d6a3e87",
+ "2adca569-6cee-40e9-930b-868560712462",
+ "e0314784-cb41-44b6-82f5-4a28edb313a6",
+ "9f2325f8-51bb-4466-8d4e-9d7561752fc6",
+ "11a76a1a-a2e3-4da8-837b-a249888ea05b",
+ "0ae5cb9f-89d0-4d1e-981c-20100692616d",
+ "8e3867bc-24aa-439a-8bfe-feb0ec9b70d0",
+ "809b331b-82c0-4470-9cc9-b925a3fecb02",
+ "cfb73e92-34d6-4ef9-942e-f3383f899e5d",
+ "37e411de-7851-4e1c-b572-e6b1b533f75b",
+ "5e495da2-f966-472c-8d29-80d818e6b42b",
+ "8e90f34c-b735-4754-a84a-c9ef7e9c9e8b",
+ "927720e4-80b1-484d-9e97-5f611af1bb05",
+ "5efc5a36-814d-454a-8f8c-3e0273e48227",
+ "1a018694-4f7d-4987-aa41-6de78e6c7250",
+ "0a7e999a-602c-4203-8d53-3779cc46e3e7",
+ "0082d202-ab71-452d-9fdd-3a6ba372ddd4",
+ "45e56db9-a4c9-41b3-9a58-db5926daefd2",
+ "8fb1b1fe-78b8-4687-bdec-e4b5889a0413",
+ "ac69f775-2cb7-4baa-b637-23f3e8dd6dd1",
+ "ed90fe9c-4c78-42c7-9102-bbdde5015f95",
+ "b96e5a21-73b7-487b-9148-85ebd775b374",
+ "1faff227-b173-4ff2-83cb-5c6e2bd49ad0"
],
"affected_row_count": 304
}
diff --git a/demo/fixtures/orphaned_records/manifest.json b/demo/fixtures/orphaned_records/manifest.json
index 292e9423a..7ec95f2de 100644
--- a/demo/fixtures/orphaned_records/manifest.json
+++ b/demo/fixtures/orphaned_records/manifest.json
@@ -1,7 +1,7 @@
{
"name": "orphaned_records",
"description": "User deletion job ran before order archival",
- "created_at": "2026-02-01T22:23:35.954937Z",
+ "created_at": "2026-02-02T02:26:09.522005Z",
"simulation_period": {
"start": "2026-01-08",
"end": "2026-01-14"
@@ -48,44 +48,44 @@
],
"ground_truth": {
"affected_order_ids": [
- "da60fd06-934e-4e7b-82cf-6fe10993c3f4",
- "f48e9c02-a154-44c4-be8d-83be2adf9791",
- "088556ad-c2e2-43a9-91f5-98220167710b",
- "0a7b4efb-933f-4690-8fa4-7e38b9e2523c",
- "e894de0d-e0fd-4af6-b7a3-365d844a6bd9",
- "8a9e718b-9745-4a6b-af57-9a9a6bb44c8f",
- "69ca8fa6-715f-49ca-8f04-007cfe49d5f3",
- "3eaecfe8-8946-4ecc-81d8-c84def48094b",
- "07fb3b19-2463-4f4f-835d-e05280df408a",
- "80fd10af-17c7-4f07-9cb9-e0586fe2e87f",
- "6efb50a0-c622-4367-8b37-5de11a79ceb4",
- "811e2c5f-5b3c-43a3-9f59-193005faf910",
- "237d5d99-ce5a-4bf1-9740-3be74fe04498",
- "cb06073d-fde6-4b66-b161-864d04ecf496",
- "20770f6f-ea61-479d-93e1-7ecf877a653e",
- "4b6bcabe-c247-4f19-8fba-ee1903228987",
- "c4855fc4-21ae-4ae8-baeb-2888043f9280",
- "f20f78f1-1d54-423a-8bf0-0e62d7de5acf",
- "813b3b55-6f3a-4a11-b8e4-cab53331fb89",
- "78c816c6-35a2-4ce9-9976-d3685de02518",
- "dc27e472-458a-436f-9ca7-dc24ecbe3acc",
- "cfd19557-6df9-427c-ba68-887c909f2e41",
- "539161d8-bcce-4664-bca1-f6b150a1af09",
- "6ed313ec-ea56-4c74-9c9a-fb0717454031",
- "99bcc15c-ca5d-4656-8c5d-f949810204ea",
- "fb682c38-f874-4528-8c60-e686b02fd782",
- "27e72667-dbcf-4daf-8113-540c4607dc9c",
- "18844ca9-124a-4ffe-b857-afe371f34d59",
- "716defa3-7ad1-49ce-9d84-6f7f9e8f6ff3",
- "c084db0b-71b8-4242-8c8a-d47ea031145a",
- "b5dd14e0-1641-4664-8e77-adbbe151a8a3",
- "2c59763e-d176-4b40-a376-a26ae9f0b86b",
- "1ff349e3-8d74-4278-8424-a7d5f48e9842",
- "88b0130b-03b2-4114-891b-754529210fd4",
- "5b13771b-dd25-4afe-a23d-7cef201b87c3",
- "91c0d63f-8250-4a1a-afa2-c0ad01d4bd5b",
- "3d17bdea-8ef5-4152-acaa-22430762d7ab",
- "23dd14a0-8e5e-42cd-96c6-eadde927f469"
+ "ed90fe9c-4c78-42c7-9102-bbdde5015f95",
+ "1e08ed9b-9a8b-4ff0-b1ec-e7d8b6743a92",
+ "2b5b9226-04b1-41ec-a185-40ba085c720c",
+ "4768cffb-b740-4673-9216-b6a932521bde",
+ "3452c22e-811a-465b-aa2f-046ae68fb674",
+ "350c8353-c387-4b3a-a048-1f9e9a4a6d14",
+ "5c14f1b6-e2d8-4b73-b30e-18322d6783a7",
+ "1d80e09e-69a2-4840-8f50-77ed05e75129",
+ "ccf07bfb-ed01-49d7-9238-0bfa9c57dc57",
+ "4e23b6ef-ec84-4aab-959f-75911f8c00d4",
+ "f837445b-2ae1-48b1-a0cb-1863d0dbfaf7",
+ "8c1f1264-53fd-4e40-b1d3-1d91bbb88fa0",
+ "660a8f4e-0750-4bb0-bef8-8ad54efb401f",
+ "bc16792c-a66f-4ff5-a157-41073423e9bb",
+ "81144d4b-2e57-4fda-a6f4-d6d65dd862d6",
+ "3c4d992c-817a-440d-b9af-5f2e6d974792",
+ "6473fd1f-e7f0-441f-a348-fb6392c4adc2",
+ "b7cba48e-7a6a-44f6-839a-f28de597cb47",
+ "fab90286-19cf-445a-b9e2-3c8782a7e55d",
+ "136b18f2-fb70-4654-8499-da3735531c04",
+ "d28a745f-776d-438e-bd15-255e96f6f106",
+ "b90b44b2-4ada-4761-a802-c198409da30e",
+ "1c51b1b7-0a18-4a34-baa1-f33c6dfa45a1",
+ "9a6a55f0-681f-4801-b182-d8a6e0806138",
+ "718db26a-9ed2-413e-b5d3-41995ccc56c5",
+ "217eb098-3a9b-49bd-8009-f9fad6ed8950",
+ "39989da1-5732-4549-8f8c-f09b368b606c",
+ "c710e945-d4a3-4c8b-8e02-bd4f437e44ab",
+ "8177ba9d-90f5-4767-a80c-7c8027105cd5",
+ "170d68c7-1ae2-4a46-9e78-4f23169f5076",
+ "cfb79fda-2ffc-43ca-92a3-ebcf1e8af35b",
+ "6934cb84-4506-4705-b516-80798e921184",
+ "204e8dbf-a15a-4c50-b6a3-5a5114c563ad",
+ "272f53af-4f7b-4a3c-96ef-032da6966e63",
+ "e5a8bbe4-3f81-418b-98a0-0ebaccb0c0df",
+ "b96ed613-c0cb-4bb3-8574-1f26515caf3d",
+ "720f6cee-7c95-47d9-8adb-3634af09f7d0",
+ "136ecc1d-804d-4acd-9458-c87004b99d3a"
],
"orphaned_order_count": 38,
"deleted_user_count": 38
diff --git a/demo/fixtures/schema_drift/manifest.json b/demo/fixtures/schema_drift/manifest.json
index 283aaece7..86b5cc7fe 100644
--- a/demo/fixtures/schema_drift/manifest.json
+++ b/demo/fixtures/schema_drift/manifest.json
@@ -1,7 +1,7 @@
{
"name": "schema_drift",
"description": "New product import job inserts price as string with currency",
- "created_at": "2026-02-01T22:23:32.857208Z",
+ "created_at": "2026-02-02T02:25:59.992159Z",
"simulation_period": {
"start": "2026-01-08",
"end": "2026-01-14"
diff --git a/demo/fixtures/volume_drop/manifest.json b/demo/fixtures/volume_drop/manifest.json
index 93de0cf48..b03ebf300 100644
--- a/demo/fixtures/volume_drop/manifest.json
+++ b/demo/fixtures/volume_drop/manifest.json
@@ -1,7 +1,7 @@
{
"name": "volume_drop",
"description": "CDN misconfiguration blocked tracking pixel for EU users",
- "created_at": "2026-02-01T22:23:32.823651Z",
+ "created_at": "2026-02-02T02:25:59.874196Z",
"simulation_period": {
"start": "2026-01-08",
"end": "2026-01-14"
diff --git a/docker-compose.yml b/docker-compose.yml
index 3f50db7d7..e0f33baa3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -174,6 +174,10 @@ services:
REDIS_HOST: redis
REDIS_PORT: 6379
LLM_MODEL: ${LLM_MODEL:-claude-sonnet-4-20250514}
+ DATAING_REPO_ROOT: /repo
+ volumes:
+ # Docker socket for container introspection (assistant tools)
+ - /var/run/docker.sock:/var/run/docker.sock:ro
depends_on:
db-migrate:
condition: service_completed_successfully
diff --git a/frontend/app/src/App.tsx b/frontend/app/src/App.tsx
index 1e52c9331..503661b29 100644
--- a/frontend/app/src/App.tsx
+++ b/frontend/app/src/App.tsx
@@ -35,6 +35,8 @@ import { UsagePage } from "@/features/usage/usage-page";
import { NotificationsPage } from "@/features/notifications";
import { AdminPage } from "@/features/admin";
import { IssueList, IssueCreate, IssueWorkspace } from "@/features/issues";
+import { AssistantWidget } from "@/features/assistant";
+import { PageContextProvider } from "@/lib/assistant/page-context";
import { JwtLoginPage } from "@/features/auth/jwt-login-page";
import { SSOLoginPage } from "@/features/auth/sso-login-page";
import { SSOCallbackPage } from "@/features/auth/sso-callback-page";
@@ -113,6 +115,7 @@ function AppWithEntitlements() {
return (
+
{/* Public routes */}
} />
@@ -252,6 +255,8 @@ function AppWithEntitlements() {
}
/>
+ {/* Assistant chat widget - bottom-right above DemoToggle */}
+
{/* CRITICAL: DO NOT REMOVE - Demo toggles for testing */}
{/* Bottom-right: Plan tiers (free/pro/enterprise) */}
@@ -262,6 +267,7 @@ function AppWithEntitlements() {
onClose={handleCloseUpgradeModal}
/>
+
);
}
diff --git a/frontend/app/src/components/error-boundary.tsx b/frontend/app/src/components/error-boundary.tsx
index 51c43a14e..d36859b5c 100644
--- a/frontend/app/src/components/error-boundary.tsx
+++ b/frontend/app/src/components/error-boundary.tsx
@@ -2,6 +2,7 @@ import { Component, ReactNode } from "react";
import { Button } from "@/components/ui/Button";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/Card";
import { AlertTriangle } from "lucide-react";
+import { emitReactError } from "@/lib/assistant/error-bus";
interface ErrorBoundaryProps {
children: ReactNode;
@@ -29,6 +30,7 @@ export class ErrorBoundary extends Component<
componentDidCatch(error: Error, errorInfo: React.ErrorInfo): void {
console.error("Error caught by boundary:", error, errorInfo);
+ emitReactError(error, errorInfo.componentStack ?? undefined);
this.props.onError?.(error, errorInfo);
}
diff --git a/frontend/app/src/features/assistant/AssistantMessage.tsx b/frontend/app/src/features/assistant/AssistantMessage.tsx
new file mode 100644
index 000000000..4bdb4d74c
--- /dev/null
+++ b/frontend/app/src/features/assistant/AssistantMessage.tsx
@@ -0,0 +1,132 @@
+/**
+ * Message component for assistant chat.
+ *
+ * Renders user and assistant messages with markdown support.
+ */
+
+import { useState } from "react";
+import { User, Bot, Wrench, Loader2, ThumbsUp, Check } from "lucide-react";
+import { cn } from "@/lib/utils";
+import { Button } from "@/components/ui/Button";
+import { useSubmitFeedback } from "@/lib/api/investigation-feedback";
+import type { AssistantMessage as AssistantMessageType } from "./useAssistant";
+
+interface AssistantMessageProps {
+ message: AssistantMessageType;
+ sessionInvestigationId?: string;
+}
+
+export function AssistantMessage({
+ message,
+ sessionInvestigationId,
+}: AssistantMessageProps) {
+ const isUser = message.role === "user";
+ const isAssistant = message.role === "assistant";
+ const isTool = message.role === "tool";
+
+ const [isMarkedHelpful, setIsMarkedHelpful] = useState(false);
+ const submitFeedback = useSubmitFeedback();
+
+ const handleMarkHelpful = () => {
+ submitFeedback.mutate(
+ {
+ target_type: "assistant_message",
+ target_id: message.id,
+ investigation_id: sessionInvestigationId,
+ rating: 1,
+ },
+ {
+ onSuccess: () => {
+ setIsMarkedHelpful(true);
+ },
+ },
+ );
+ };
+
+ return (
+
+ {/* Avatar */}
+
+ {isUser && }
+ {isAssistant && }
+ {isTool && }
+
+
+ {/* Content */}
+
+ {/* Role label */}
+
+
+ {isUser && "You"}
+ {isAssistant && "Assistant"}
+ {isTool && "Tool"}
+
+ {message.isStreaming && (
+
+ )}
+
+
+ {/* Message content */}
+
+ {message.content || (message.isStreaming && "...")}
+
+
+ {/* Tool calls */}
+ {message.toolCalls && message.toolCalls.length > 0 && (
+
+ {message.toolCalls.map((tool, index) => (
+
+
+ {tool.name}
+
+ ))}
+
+ )}
+
+ {/* Mark as helpful button (assistant messages only, when not streaming) */}
+ {isAssistant && !message.isStreaming && message.content && (
+
+ {isMarkedHelpful ? (
+
+
+ Marked as helpful
+
+ ) : (
+
+ )}
+
+ )}
+
+
+ );
+}
diff --git a/frontend/app/src/features/assistant/AssistantPanel.tsx b/frontend/app/src/features/assistant/AssistantPanel.tsx
new file mode 100644
index 000000000..f96f90175
--- /dev/null
+++ b/frontend/app/src/features/assistant/AssistantPanel.tsx
@@ -0,0 +1,193 @@
+/**
+ * Chat panel component for the assistant widget.
+ *
+ * Contains message history, input field, and streaming indicators.
+ */
+
+import { useState, useRef, useEffect } from "react";
+import { Send, Loader2, Plus, AlertCircle } from "lucide-react";
+import { Button } from "@/components/ui/Button";
+import { Textarea } from "@/components/ui/textarea";
+import { AssistantMessage } from "./AssistantMessage";
+import { useAssistant } from "./useAssistant";
+
+// Example placeholder questions
+const PLACEHOLDER_QUESTIONS = [
+ "Why is my container unhealthy?",
+ "What caused the null spike in orders?",
+ "Show me recent errors in the logs",
+ "Explain the schema for customers table",
+];
+
+export function AssistantPanel() {
+ const [input, setInput] = useState("");
+ const messagesEndRef = useRef(null);
+ const textareaRef = useRef(null);
+
+ const {
+ messages,
+ session,
+ isLoading,
+ isStreaming,
+ error,
+ sendMessage,
+ createSession,
+ clearSession,
+ } = useAssistant({
+ onError: (err) => console.error("Assistant error:", err),
+ });
+
+ // Auto-scroll to bottom on new messages
+ useEffect(() => {
+ messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
+ }, [messages]);
+
+ // Auto-create session if none exists
+ useEffect(() => {
+ if (!session && !isLoading) {
+ createSession();
+ }
+ }, [session, isLoading, createSession]);
+
+ const handleSubmit = async (e?: React.FormEvent) => {
+ e?.preventDefault();
+ if (!input.trim() || isStreaming) return;
+
+ const message = input;
+ setInput("");
+ await sendMessage(message);
+
+ // Reset textarea height
+ if (textareaRef.current) {
+ textareaRef.current.style.height = "auto";
+ }
+ };
+
+ const handleKeyDown = (e: React.KeyboardEvent) => {
+ if (e.key === "Enter" && !e.shiftKey) {
+ e.preventDefault();
+ handleSubmit();
+ }
+ };
+
+ // Auto-resize textarea
+ const handleInputChange = (e: React.ChangeEvent) => {
+ setInput(e.target.value);
+ e.target.style.height = "auto";
+ e.target.style.height = `${Math.min(e.target.scrollHeight, 150)}px`;
+ };
+
+ const handleQuickQuestion = (question: string) => {
+ setInput(question);
+ textareaRef.current?.focus();
+ };
+
+ return (
+
+ {/* Messages area */}
+
+ {/* Empty state */}
+ {messages.length === 0 && !isLoading && (
+
+
+
How can I help you today?
+
+ Ask about infrastructure, data issues, or investigations.
+
+
+
+ {/* Quick questions */}
+
+ {PLACEHOLDER_QUESTIONS.map((question, index) => (
+
+ ))}
+
+
+ )}
+
+ {/* Loading state */}
+ {isLoading && messages.length === 0 && (
+
+
+
+ )}
+
+ {/* Messages */}
+ {messages.map((message) => (
+
+ ))}
+
+ {/* Scroll anchor */}
+
+
+
+ {/* Error banner */}
+ {error && (
+
+
+
{error}
+
+
+ )}
+
+ {/* Input area */}
+
+
+
+ {/* New chat button */}
+
+
+
+ );
+}
diff --git a/frontend/app/src/features/assistant/AssistantWidget.tsx b/frontend/app/src/features/assistant/AssistantWidget.tsx
new file mode 100644
index 000000000..0acc01395
--- /dev/null
+++ b/frontend/app/src/features/assistant/AssistantWidget.tsx
@@ -0,0 +1,54 @@
+/**
+ * Floating chat widget for the Dataing Assistant.
+ *
+ * Renders a floating button in the bottom-right corner that opens
+ * a slide-out panel for chatting with the assistant.
+ */
+
+import { useState } from "react";
+import { MessageSquare } from "lucide-react";
+import { Button } from "@/components/ui/Button";
+import {
+ Sheet,
+ SheetContent,
+ SheetHeader,
+ SheetTitle,
+ SheetDescription,
+} from "@/components/ui/sheet";
+import { AssistantPanel } from "./AssistantPanel";
+
+export function AssistantWidget() {
+ const [open, setOpen] = useState(false);
+
+ return (
+ <>
+ {/* Floating button - bottom-right, above DemoToggle */}
+
+
+ {/* Slide-out panel */}
+
+
+
+ Dataing Assistant
+
+ Ask about infrastructure, data issues, or investigations
+
+
+
+
+
+ >
+ );
+}
diff --git a/frontend/app/src/features/assistant/index.ts b/frontend/app/src/features/assistant/index.ts
new file mode 100644
index 000000000..41779be5e
--- /dev/null
+++ b/frontend/app/src/features/assistant/index.ts
@@ -0,0 +1,15 @@
+/**
+ * Dataing Assistant feature exports.
+ *
+ * The assistant provides a chat interface for debugging infrastructure,
+ * asking about data issues, and getting help with investigations.
+ */
+
+export { AssistantWidget } from "./AssistantWidget";
+export { AssistantPanel } from "./AssistantPanel";
+export { AssistantMessage } from "./AssistantMessage";
+export { useAssistant } from "./useAssistant";
+export type {
+ AssistantMessage as AssistantMessageType,
+ AssistantSession,
+} from "./useAssistant";
diff --git a/frontend/app/src/features/assistant/useAssistant.ts b/frontend/app/src/features/assistant/useAssistant.ts
new file mode 100644
index 000000000..deb913200
--- /dev/null
+++ b/frontend/app/src/features/assistant/useAssistant.ts
@@ -0,0 +1,308 @@
+/**
+ * React hook for managing assistant chat state.
+ *
+ * Handles session management, message history, and SSE streaming.
+ */
+
+import { useState, useCallback, useRef, useEffect } from "react";
+import { useJwtAuth } from "@/lib/auth/jwt-context";
+import { assistantApi } from "@/lib/api/assistant";
+import type { MessageResponse } from "@/lib/api/assistant";
+import { usePageContext } from "@/lib/assistant/page-context";
+
+// Storage key for session persistence
+const SESSION_STORAGE_KEY = "dataing_assistant_session_id";
+
+export interface AssistantMessage {
+ id: string;
+ role: "user" | "assistant" | "system" | "tool";
+ content: string;
+ toolCalls?: { name: string; arguments: Record }[];
+ createdAt: Date;
+ isStreaming?: boolean;
+}
+
+export interface AssistantSession {
+ id: string;
+ investigationId: string;
+ parentInvestigationId?: string;
+ createdAt: Date;
+}
+
+interface UseAssistantOptions {
+ parentInvestigationId?: string;
+ onError?: (error: string) => void;
+}
+
+interface UseAssistantReturn {
+ messages: AssistantMessage[];
+ session: AssistantSession | null;
+ isLoading: boolean;
+ isStreaming: boolean;
+ error: string | null;
+ sendMessage: (content: string) => Promise;
+ createSession: (parentInvestigationId?: string) => Promise;
+ clearSession: () => void;
+}
+
+// Convert API message to our format
+function toAssistantMessage(msg: MessageResponse): AssistantMessage {
+ return {
+ id: msg.id,
+ role: msg.role as AssistantMessage["role"],
+ content: msg.content,
+ toolCalls: msg.tool_calls?.map((tc) => ({
+ name: String(tc.name || "unknown"),
+ arguments: (tc.arguments as Record) || {},
+ })),
+ createdAt: new Date(msg.created_at),
+ };
+}
+
+export function useAssistant(
+ options: UseAssistantOptions = {},
+): UseAssistantReturn {
+ const { onError } = options;
+ const { accessToken } = useJwtAuth();
+ const pageContext = usePageContext();
+ const [messages, setMessages] = useState([]);
+ const [session, setSession] = useState(null);
+ const [isLoading, setIsLoading] = useState(false);
+ const [isStreaming, setIsStreaming] = useState(false);
+ const [error, setError] = useState(null);
+ const eventSourceRef = useRef(null);
+
+ // Load session from server
+ const loadSession = useCallback(async (sessionId: string) => {
+ try {
+ setIsLoading(true);
+ const data = await assistantApi.getSession(sessionId);
+
+ setSession({
+ id: data.id,
+ investigationId: data.investigation_id,
+ parentInvestigationId: data.parent_investigation_id ?? undefined,
+ createdAt: new Date(data.created_at),
+ });
+
+ // Load messages
+ setMessages(data.messages.map(toAssistantMessage));
+ } catch (err) {
+ console.error("Failed to load session:", err);
+ localStorage.removeItem(SESSION_STORAGE_KEY);
+ } finally {
+ setIsLoading(false);
+ }
+ }, []);
+
+ // Load session from storage on mount
+ useEffect(() => {
+ const storedSessionId = localStorage.getItem(SESSION_STORAGE_KEY);
+ if (storedSessionId && accessToken) {
+ loadSession(storedSessionId);
+ }
+ }, [accessToken, loadSession]);
+
+ // Cleanup EventSource on unmount
+ useEffect(() => {
+ return () => {
+ if (eventSourceRef.current) {
+ eventSourceRef.current.close();
+ }
+ };
+ }, []);
+
+ const createSession = useCallback(
+ async (parentInvestigationId?: string) => {
+ console.log("[Assistant] createSession called");
+ try {
+ setIsLoading(true);
+ setError(null);
+
+ console.log("[Assistant] calling API...");
+ const data = await assistantApi.createSession({
+ parent_investigation_id: parentInvestigationId,
+ });
+ console.log("[Assistant] API response:", data);
+
+ const newSession: AssistantSession = {
+ id: data.session_id,
+ investigationId: data.investigation_id,
+ parentInvestigationId,
+ createdAt: new Date(data.created_at),
+ };
+
+ setSession(newSession);
+ setMessages([]);
+ localStorage.setItem(SESSION_STORAGE_KEY, newSession.id);
+ } catch (err) {
+ const message = err instanceof Error ? err.message : "Unknown error";
+ setError(message);
+ onError?.(message);
+ } finally {
+ setIsLoading(false);
+ }
+ },
+ [onError],
+ );
+
+ const sendMessage = useCallback(
+ async (content: string) => {
+ console.log("[Assistant] sendMessage called, session:", session);
+ if (!session) {
+ console.log("[Assistant] No session, returning early");
+ setError("No active session");
+ return;
+ }
+
+ try {
+ setIsStreaming(true);
+ setError(null);
+
+ // Add user message immediately
+ const userMessage: AssistantMessage = {
+ id: `temp-${Date.now()}`,
+ role: "user",
+ content,
+ createdAt: new Date(),
+ };
+ setMessages((prev) => [...prev, userMessage]);
+
+ // Send message to API
+ console.log("[Assistant] sending message to API...");
+ await assistantApi.sendMessage(session.id, {
+ content,
+ page_context: {
+ route: pageContext.route,
+ route_pattern: pageContext.routePattern,
+ route_params: pageContext.routeParams,
+ page_type: pageContext.pageType,
+ page_title: pageContext.pageTitle,
+ page_data: pageContext.pageData,
+ errors: pageContext.errors.map((e) => ({
+ type: e.type,
+ message: e.message,
+ status: e.status ?? null,
+ url: e.url ?? null,
+ timestamp: e.timestamp,
+ stack_preview: e.stackPreview ?? null,
+ })),
+ },
+ });
+ console.log("[Assistant] message sent successfully");
+
+ // Add assistant message placeholder
+ const assistantMessage: AssistantMessage = {
+ id: `streaming-${Date.now()}`,
+ role: "assistant",
+ content: "",
+ createdAt: new Date(),
+ isStreaming: true,
+ };
+ setMessages((prev) => [...prev, assistantMessage]);
+
+ // Connect to SSE stream
+ const streamUrl = assistantApi.getStreamUrl(
+ session.id,
+ accessToken || undefined,
+ );
+ const eventSource = new EventSource(streamUrl);
+ eventSourceRef.current = eventSource;
+
+ eventSource.addEventListener("text", (event: MessageEvent) => {
+ const data = JSON.parse(event.data);
+ setMessages((prev) => {
+ const updated = [...prev];
+ const lastMsg = updated[updated.length - 1];
+ if (lastMsg && lastMsg.isStreaming) {
+ lastMsg.content += data.text;
+ }
+ return updated;
+ });
+ });
+
+ eventSource.addEventListener("tool_call", (event: MessageEvent) => {
+ const data = JSON.parse(event.data);
+ setMessages((prev) => {
+ const updated = [...prev];
+ const lastMsg = updated[updated.length - 1];
+ if (lastMsg && lastMsg.isStreaming) {
+ lastMsg.toolCalls = [
+ ...(lastMsg.toolCalls || []),
+ { name: data.tool, arguments: data.arguments },
+ ];
+ }
+ return updated;
+ });
+ });
+
+ eventSource.addEventListener("complete", () => {
+ setMessages((prev) => {
+ const updated = [...prev];
+ const lastMsg = updated[updated.length - 1];
+ if (lastMsg) {
+ lastMsg.isStreaming = false;
+ }
+ return updated;
+ });
+ setIsStreaming(false);
+ eventSource.close();
+ eventSourceRef.current = null;
+ });
+
+ eventSource.addEventListener("error", (event: MessageEvent) => {
+ try {
+ const data = JSON.parse(event.data);
+ setError(data.error || "Stream error");
+ } catch {
+ setError("Stream error");
+ }
+ setIsStreaming(false);
+ eventSource.close();
+ eventSourceRef.current = null;
+
+ // Remove streaming message
+ setMessages((prev) => prev.filter((msg) => !msg.isStreaming));
+ });
+
+ eventSource.onerror = () => {
+ setError("Connection lost");
+ setIsStreaming(false);
+ eventSource.close();
+ eventSourceRef.current = null;
+ };
+ } catch (err) {
+ const message = err instanceof Error ? err.message : "Unknown error";
+ setError(message);
+ onError?.(message);
+ setIsStreaming(false);
+
+ // Remove the optimistic user message on error
+ setMessages((prev) => prev.slice(0, -1));
+ }
+ },
+ [session, accessToken, onError, pageContext],
+ );
+
+ const clearSession = useCallback(() => {
+ if (eventSourceRef.current) {
+ eventSourceRef.current.close();
+ eventSourceRef.current = null;
+ }
+ setSession(null);
+ setMessages([]);
+ setError(null);
+ localStorage.removeItem(SESSION_STORAGE_KEY);
+ }, []);
+
+ return {
+ messages,
+ session,
+ isLoading,
+ isStreaming,
+ error,
+ sendMessage,
+ createSession,
+ clearSession,
+ };
+}
diff --git a/frontend/app/src/features/dashboard/dashboard-page.tsx b/frontend/app/src/features/dashboard/dashboard-page.tsx
index ca993a778..30cfc6764 100644
--- a/frontend/app/src/features/dashboard/dashboard-page.tsx
+++ b/frontend/app/src/features/dashboard/dashboard-page.tsx
@@ -1,5 +1,7 @@
+import { useMemo } from "react";
import { useQuery } from "@tanstack/react-query";
import { Link } from "react-router-dom";
+import { useRegisterPageContext } from "@/lib/assistant/page-context";
import {
Search,
Database,
@@ -22,6 +24,22 @@ export function DashboardPage() {
queryFn: fetchDashboardStats,
});
+ useRegisterPageContext(
+ useMemo(
+ () => ({
+ pageType: "dashboard",
+ pageTitle: "Dashboard",
+ pageData: {
+ activeInvestigations: stats?.activeInvestigations ?? 0,
+ totalDatasources: stats?.dataSources ?? 0,
+ completedToday: stats?.completedToday ?? 0,
+ pendingApprovals: stats?.pendingApprovals ?? 0,
+ },
+ }),
+ [stats],
+ ),
+ );
+
return (
({
+ pageType: "dataset_detail",
+ pageTitle: dataset ? `Dataset: ${dataset.name}` : "Dataset",
+ pageData: dataset
+ ? {
+ datasetId: datasetId,
+ datasetName: dataset.name,
+ nativePath: dataset.native_path,
+ columnCount: dataset.column_count ?? (dataset.columns?.length || 0),
+ tableType: dataset.table_type,
+ datasourceType: dataset.datasource_type ?? null,
+ }
+ : { datasetId: datasetId },
+ }),
+ [datasetId, dataset],
+ );
+ useRegisterPageContext(pageCtx);
+
if (isLoading) {
return (
diff --git a/frontend/app/src/features/datasources/datasource-page.tsx b/frontend/app/src/features/datasources/datasource-page.tsx
index 8adf1a3ef..8a9777435 100644
--- a/frontend/app/src/features/datasources/datasource-page.tsx
+++ b/frontend/app/src/features/datasources/datasource-page.tsx
@@ -1,6 +1,7 @@
import * as React from "react";
import { Plus, Database, AlertCircle, RefreshCw } from "lucide-react";
import { Link } from "react-router-dom";
+import { useRegisterPageContext } from "@/lib/assistant/page-context";
import { Button } from "@/components/ui/Button";
import { PageHeader } from "@/components/shared/page-header";
@@ -15,6 +16,26 @@ export function DataSourcePage() {
const [formOpen, setFormOpen] = React.useState(false);
const { data: datasources, isLoading, error, refetch } = useDataSources();
+ const dsTypes = React.useMemo(() => {
+ if (!datasources) return [];
+ const types = new Set(datasources.map((ds) => ds.type ?? "unknown"));
+ return [...types];
+ }, [datasources]);
+
+ useRegisterPageContext(
+ React.useMemo(
+ () => ({
+ pageType: "datasource_list",
+ pageTitle: "Data Sources",
+ pageData: {
+ count: datasources?.length ?? 0,
+ types: dsTypes,
+ },
+ }),
+ [datasources, dsTypes],
+ ),
+ );
+
if (isLoading) {
return (
diff --git a/frontend/app/src/features/investigation/InvestigationDetail.tsx b/frontend/app/src/features/investigation/InvestigationDetail.tsx
index 5bc5ed6ca..71067012a 100644
--- a/frontend/app/src/features/investigation/InvestigationDetail.tsx
+++ b/frontend/app/src/features/investigation/InvestigationDetail.tsx
@@ -1,5 +1,6 @@
-import { useState, useEffect, useRef } from "react";
+import { useState, useEffect, useRef, useMemo } from "react";
import { useParams, Link } from "react-router-dom";
+import { useRegisterPageContext } from "@/lib/assistant/page-context";
import {
useInvestigation,
useSendMessage,
@@ -495,6 +496,27 @@ export function InvestigationDetail() {
}
};
+ // Register page context for assistant (must be before early returns)
+ const pageContextData = useMemo(
+ () => ({
+ pageType: "investigation_detail",
+ pageTitle: data
+ ? `Investigation: ${data.main_branch.matched_patterns?.[0] || data.status}`
+ : "Investigation",
+ pageData: data
+ ? {
+ investigationId: id,
+ status: data.status,
+ hasFindings: !!data.main_branch.synthesis,
+ evidenceCount: data.main_branch.evidence.length,
+ patternCount: data.main_branch.matched_patterns?.length ?? 0,
+ }
+ : { investigationId: id },
+ }),
+ [id, data],
+ );
+ useRegisterPageContext(pageContextData);
+
if (!id) {
return (
diff --git a/frontend/app/src/features/investigation/components/InvestigationFeedbackButtons.tsx b/frontend/app/src/features/investigation/components/InvestigationFeedbackButtons.tsx
index c6f4dbd4a..aa55657be 100644
--- a/frontend/app/src/features/investigation/components/InvestigationFeedbackButtons.tsx
+++ b/frontend/app/src/features/investigation/components/InvestigationFeedbackButtons.tsx
@@ -39,6 +39,10 @@ const REASON_OPTIONS: Record<
positive: ["Will implement", "Good advice"],
negative: ["Not applicable", "Already done"],
},
+ assistant_message: {
+ positive: ["Helpful answer", "Good suggestion"],
+ negative: ["Not relevant", "Incorrect"],
+ },
};
interface InvestigationFeedbackButtonsProps {
diff --git a/frontend/app/src/features/issues/IssueWorkspace.tsx b/frontend/app/src/features/issues/IssueWorkspace.tsx
index 6d1434583..389479e1d 100644
--- a/frontend/app/src/features/issues/IssueWorkspace.tsx
+++ b/frontend/app/src/features/issues/IssueWorkspace.tsx
@@ -1,5 +1,6 @@
-import { useState } from "react";
+import { useState, useMemo } from "react";
import { useParams, Link } from "react-router-dom";
+import { useRegisterPageContext } from "@/lib/assistant/page-context";
import type { UseQueryResult } from "@tanstack/react-query";
import {
ArrowLeft,
@@ -477,6 +478,23 @@ function IssueWorkspaceContent({ issue }: IssueWorkspaceContentProps) {
const invalidate = useInvalidateIssues();
const [isEditingStatus, setIsEditingStatus] = useState(false);
+ const pageCtx = useMemo(
+ () => ({
+ pageType: "issue_detail",
+ pageTitle: `Issue #${issue.number}: ${issue.title}`,
+ pageData: {
+ issueId: issue.id,
+ title: issue.title,
+ status: issue.status,
+ priority: issue.priority ?? null,
+ severity: issue.severity ?? null,
+ labels: issue.labels,
+ },
+ }),
+ [issue],
+ );
+ useRegisterPageContext(pageCtx);
+
const handleStatusChange = async (newStatus: string) => {
try {
await updateIssue.mutateAsync({
diff --git a/frontend/app/src/lib/api/assistant.ts b/frontend/app/src/lib/api/assistant.ts
new file mode 100644
index 000000000..ca695d8d0
--- /dev/null
+++ b/frontend/app/src/lib/api/assistant.ts
@@ -0,0 +1,68 @@
+/**
+ * API client wrapper for Dataing Assistant.
+ * Re-exports generated hooks with cleaner names.
+ */
+
+import { useQueryClient } from "@tanstack/react-query";
+import {
+ useCreateSessionApiV1AssistantSessionsPost,
+ useListSessionsApiV1AssistantSessionsGet,
+ useGetSessionApiV1AssistantSessionsSessionIdGet,
+ useDeleteSessionApiV1AssistantSessionsSessionIdDelete,
+ useSendMessageApiV1AssistantSessionsSessionIdMessagesPost,
+ useExportSessionApiV1AssistantSessionsSessionIdExportPost,
+ createSessionApiV1AssistantSessionsPost,
+ getSessionApiV1AssistantSessionsSessionIdGet,
+ sendMessageApiV1AssistantSessionsSessionIdMessagesPost,
+} from "./generated/assistant/assistant";
+
+// Re-export types from model
+export type {
+ CreateSessionRequest,
+ CreateSessionResponse,
+ ListSessionsResponse,
+ SessionDetailResponse,
+ SessionSummary,
+ SendMessageRequest,
+ SendMessageResponse,
+ MessageResponse,
+ ExportFormat,
+} from "./model";
+
+// Re-export hooks with cleaner names
+export const useCreateAssistantSession =
+ useCreateSessionApiV1AssistantSessionsPost;
+export const useAssistantSessions = useListSessionsApiV1AssistantSessionsGet;
+export const useAssistantSession =
+ useGetSessionApiV1AssistantSessionsSessionIdGet;
+export const useDeleteAssistantSession =
+ useDeleteSessionApiV1AssistantSessionsSessionIdDelete;
+export const useSendAssistantMessage =
+ useSendMessageApiV1AssistantSessionsSessionIdMessagesPost;
+export const useExportAssistantSession =
+ useExportSessionApiV1AssistantSessionsSessionIdExportPost;
+
+// Non-hook API functions for imperative use
+export const assistantApi = {
+ createSession: createSessionApiV1AssistantSessionsPost,
+ getSession: getSessionApiV1AssistantSessionsSessionIdGet,
+ sendMessage: sendMessageApiV1AssistantSessionsSessionIdMessagesPost,
+ getStreamUrl: (sessionId: string, token?: string) =>
+ `/api/v1/assistant/sessions/${sessionId}/stream${token ? `?token=${token}` : ""}`,
+};
+
+// Helper hook to invalidate assistant queries
+export function useInvalidateAssistant() {
+ const queryClient = useQueryClient();
+
+ return {
+ invalidateSessions: () =>
+ queryClient.invalidateQueries({
+ queryKey: ["/api/v1/assistant/sessions"],
+ }),
+ invalidateSession: (sessionId: string) =>
+ queryClient.invalidateQueries({
+ queryKey: [`/api/v1/assistant/sessions/${sessionId}`],
+ }),
+ };
+}
diff --git a/frontend/app/src/lib/api/client.ts b/frontend/app/src/lib/api/client.ts
index be419a1a5..f47ffef0c 100644
--- a/frontend/app/src/lib/api/client.ts
+++ b/frontend/app/src/lib/api/client.ts
@@ -1,3 +1,5 @@
+import { emitApiError } from "@/lib/assistant/error-bus";
+
// API base URL - empty for same-origin (dev), set VITE_API_URL for production
const API_BASE_URL = import.meta.env.VITE_API_URL || "";
@@ -78,11 +80,18 @@ export const customInstance = async (config: RequestConfig): Promise => {
throw new Error(messages || `HTTP error ${response.status}`);
}
- throw new Error(
+ const errorMessage =
errorData.detail?.message ||
- errorData.detail ||
- `HTTP error ${response.status}`,
- );
+ errorData.detail ||
+ `HTTP error ${response.status}`;
+
+ emitApiError({
+ message: String(errorMessage),
+ status: response.status,
+ url: url,
+ });
+
+ throw new Error(String(errorMessage));
}
return response.json();
diff --git a/frontend/app/src/lib/api/generated/assistant/assistant.ts b/frontend/app/src/lib/api/generated/assistant/assistant.ts
new file mode 100644
index 000000000..c0f55cd38
--- /dev/null
+++ b/frontend/app/src/lib/api/generated/assistant/assistant.ts
@@ -0,0 +1,920 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import { useMutation, useQuery } from "@tanstack/react-query";
+import type {
+ MutationFunction,
+ QueryFunction,
+ QueryKey,
+ UseMutationOptions,
+ UseMutationResult,
+ UseQueryOptions,
+ UseQueryResult,
+} from "@tanstack/react-query";
+import type {
+ CreateSessionRequest,
+ CreateSessionResponse,
+ DataingEntrypointsApiRoutesAssistantSendMessageRequest,
+ DataingEntrypointsApiRoutesAssistantSendMessageResponse,
+ DeleteSessionApiV1AssistantSessionsSessionIdDelete200,
+ ExportSessionApiV1AssistantSessionsSessionIdExportPost200,
+ ExportSessionApiV1AssistantSessionsSessionIdExportPostParams,
+ HTTPValidationError,
+ ListSessionsApiV1AssistantSessionsGetParams,
+ ListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetParams,
+ ListSessionsResponse,
+ SessionDetailResponse,
+ StreamResponseApiV1AssistantSessionsSessionIdStreamGetParams,
+} from "../../model";
+import { customInstance } from "../../client";
+
+/**
+ * Create a new assistant session.
+
+Each session is linked to an investigation for tracking and context.
+ * @summary Create Session
+ */
+export const createSessionApiV1AssistantSessionsPost = (
+ createSessionRequest: CreateSessionRequest,
+) => {
+ return customInstance({
+ url: `/api/v1/assistant/sessions`,
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ data: createSessionRequest,
+ });
+};
+
+export const getCreateSessionApiV1AssistantSessionsPostMutationOptions = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { data: CreateSessionRequest },
+ TContext
+ >;
+}): UseMutationOptions<
+ Awaited>,
+ TError,
+ { data: CreateSessionRequest },
+ TContext
+> => {
+ const { mutation: mutationOptions } = options ?? {};
+
+ const mutationFn: MutationFunction<
+ Awaited>,
+ { data: CreateSessionRequest }
+ > = (props) => {
+ const { data } = props ?? {};
+
+ return createSessionApiV1AssistantSessionsPost(data);
+ };
+
+ return { mutationFn, ...mutationOptions };
+};
+
+export type CreateSessionApiV1AssistantSessionsPostMutationResult = NonNullable<
+ Awaited>
+>;
+export type CreateSessionApiV1AssistantSessionsPostMutationBody =
+ CreateSessionRequest;
+export type CreateSessionApiV1AssistantSessionsPostMutationError =
+ HTTPValidationError;
+
+/**
+ * @summary Create Session
+ */
+export const useCreateSessionApiV1AssistantSessionsPost = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { data: CreateSessionRequest },
+ TContext
+ >;
+}): UseMutationResult<
+ Awaited>,
+ TError,
+ { data: CreateSessionRequest },
+ TContext
+> => {
+ const mutationOptions =
+ getCreateSessionApiV1AssistantSessionsPostMutationOptions(options);
+
+ return useMutation(mutationOptions);
+};
+/**
+ * List the user's assistant sessions.
+ * @summary List Sessions
+ */
+export const listSessionsApiV1AssistantSessionsGet = (
+ params?: ListSessionsApiV1AssistantSessionsGetParams,
+ signal?: AbortSignal,
+) => {
+ return customInstance({
+ url: `/api/v1/assistant/sessions`,
+ method: "GET",
+ params,
+ signal,
+ });
+};
+
+export const getListSessionsApiV1AssistantSessionsGetQueryKey = (
+ params?: ListSessionsApiV1AssistantSessionsGetParams,
+) => {
+ return [`/api/v1/assistant/sessions`, ...(params ? [params] : [])] as const;
+};
+
+export const getListSessionsApiV1AssistantSessionsGetQueryOptions = <
+ TData = Awaited>,
+ TError = HTTPValidationError,
+>(
+ params?: ListSessionsApiV1AssistantSessionsGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ >
+ >;
+ },
+) => {
+ const { query: queryOptions } = options ?? {};
+
+ const queryKey =
+ queryOptions?.queryKey ??
+ getListSessionsApiV1AssistantSessionsGetQueryKey(params);
+
+ const queryFn: QueryFunction<
+ Awaited>
+ > = ({ signal }) => listSessionsApiV1AssistantSessionsGet(params, signal);
+
+ return { queryKey, queryFn, ...queryOptions } as UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
+};
+
+export type ListSessionsApiV1AssistantSessionsGetQueryResult = NonNullable<
+ Awaited>
+>;
+export type ListSessionsApiV1AssistantSessionsGetQueryError =
+ HTTPValidationError;
+
+/**
+ * @summary List Sessions
+ */
+export const useListSessionsApiV1AssistantSessionsGet = <
+ TData = Awaited>,
+ TError = HTTPValidationError,
+>(
+ params?: ListSessionsApiV1AssistantSessionsGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ >
+ >;
+ },
+): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions = getListSessionsApiV1AssistantSessionsGetQueryOptions(
+ params,
+ options,
+ );
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+};
+
+/**
+ * List assistant sessions linked to an investigation.
+
+Returns sessions where the investigation is the parent.
+ * @summary List Sessions For Investigation
+ */
+export const listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGet =
+ (
+ investigationId: string,
+ params?: ListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetParams,
+ signal?: AbortSignal,
+ ) => {
+ return customInstance({
+ url: `/api/v1/assistant/investigations/${investigationId}/sessions`,
+ method: "GET",
+ params,
+ signal,
+ });
+ };
+
+export const getListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetQueryKey =
+ (
+ investigationId: string,
+ params?: ListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetParams,
+ ) => {
+ return [
+ `/api/v1/assistant/investigations/${investigationId}/sessions`,
+ ...(params ? [params] : []),
+ ] as const;
+ };
+
+export const getListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetQueryOptions =
+ <
+ TData = Awaited<
+ ReturnType<
+ typeof listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGet
+ >
+ >,
+ TError = HTTPValidationError,
+ >(
+ investigationId: string,
+ params?: ListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+ ) => {
+ const { query: queryOptions } = options ?? {};
+
+ const queryKey =
+ queryOptions?.queryKey ??
+ getListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetQueryKey(
+ investigationId,
+ params,
+ );
+
+ const queryFn: QueryFunction<
+ Awaited<
+ ReturnType<
+ typeof listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGet
+ >
+ >
+ > = ({ signal }) =>
+ listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGet(
+ investigationId,
+ params,
+ signal,
+ );
+
+ return {
+ queryKey,
+ queryFn,
+ enabled: !!investigationId,
+ ...queryOptions,
+ } as UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGet
+ >
+ >,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
+ };
+
+export type ListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetQueryResult =
+ NonNullable<
+ Awaited<
+ ReturnType<
+ typeof listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGet
+ >
+ >
+ >;
+export type ListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetQueryError =
+ HTTPValidationError;
+
+/**
+ * @summary List Sessions For Investigation
+ */
+export const useListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGet =
+ <
+ TData = Awaited<
+ ReturnType<
+ typeof listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGet
+ >
+ >,
+ TError = HTTPValidationError,
+ >(
+ investigationId: string,
+ params?: ListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+ ): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions =
+ getListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetQueryOptions(
+ investigationId,
+ params,
+ options,
+ );
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+ };
+
+/**
+ * Get full session details with messages.
+ * @summary Get Session
+ */
+export const getSessionApiV1AssistantSessionsSessionIdGet = (
+ sessionId: string,
+ signal?: AbortSignal,
+) => {
+ return customInstance({
+ url: `/api/v1/assistant/sessions/${sessionId}`,
+ method: "GET",
+ signal,
+ });
+};
+
+export const getGetSessionApiV1AssistantSessionsSessionIdGetQueryKey = (
+ sessionId: string,
+) => {
+ return [`/api/v1/assistant/sessions/${sessionId}`] as const;
+};
+
+export const getGetSessionApiV1AssistantSessionsSessionIdGetQueryOptions = <
+ TData = Awaited<
+ ReturnType
+ >,
+ TError = HTTPValidationError,
+>(
+ sessionId: string,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+) => {
+ const { query: queryOptions } = options ?? {};
+
+ const queryKey =
+ queryOptions?.queryKey ??
+ getGetSessionApiV1AssistantSessionsSessionIdGetQueryKey(sessionId);
+
+ const queryFn: QueryFunction<
+ Awaited>
+ > = ({ signal }) =>
+ getSessionApiV1AssistantSessionsSessionIdGet(sessionId, signal);
+
+ return {
+ queryKey,
+ queryFn,
+ enabled: !!sessionId,
+ ...queryOptions,
+ } as UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
+};
+
+export type GetSessionApiV1AssistantSessionsSessionIdGetQueryResult =
+ NonNullable<
+ Awaited>
+ >;
+export type GetSessionApiV1AssistantSessionsSessionIdGetQueryError =
+ HTTPValidationError;
+
+/**
+ * @summary Get Session
+ */
+export const useGetSessionApiV1AssistantSessionsSessionIdGet = <
+ TData = Awaited<
+ ReturnType
+ >,
+ TError = HTTPValidationError,
+>(
+ sessionId: string,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions =
+ getGetSessionApiV1AssistantSessionsSessionIdGetQueryOptions(
+ sessionId,
+ options,
+ );
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+};
+
+/**
+ * Delete an assistant session.
+ * @summary Delete Session
+ */
+export const deleteSessionApiV1AssistantSessionsSessionIdDelete = (
+ sessionId: string,
+) => {
+ return customInstance({
+ url: `/api/v1/assistant/sessions/${sessionId}`,
+ method: "DELETE",
+ });
+};
+
+export const getDeleteSessionApiV1AssistantSessionsSessionIdDeleteMutationOptions =
+ (options?: {
+ mutation?: UseMutationOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ { sessionId: string },
+ TContext
+ >;
+ }): UseMutationOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ { sessionId: string },
+ TContext
+ > => {
+ const { mutation: mutationOptions } = options ?? {};
+
+ const mutationFn: MutationFunction<
+ Awaited<
+ ReturnType
+ >,
+ { sessionId: string }
+ > = (props) => {
+ const { sessionId } = props ?? {};
+
+ return deleteSessionApiV1AssistantSessionsSessionIdDelete(sessionId);
+ };
+
+ return { mutationFn, ...mutationOptions };
+ };
+
+export type DeleteSessionApiV1AssistantSessionsSessionIdDeleteMutationResult =
+ NonNullable<
+ Awaited<
+ ReturnType
+ >
+ >;
+
+export type DeleteSessionApiV1AssistantSessionsSessionIdDeleteMutationError =
+ HTTPValidationError;
+
+/**
+ * @summary Delete Session
+ */
+export const useDeleteSessionApiV1AssistantSessionsSessionIdDelete = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ { sessionId: string },
+ TContext
+ >;
+}): UseMutationResult<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ { sessionId: string },
+ TContext
+> => {
+ const mutationOptions =
+ getDeleteSessionApiV1AssistantSessionsSessionIdDeleteMutationOptions(
+ options,
+ );
+
+ return useMutation(mutationOptions);
+};
+/**
+ * Send a message to the assistant.
+
+The response will be streamed via the /stream endpoint.
+ * @summary Send Message
+ */
+export const sendMessageApiV1AssistantSessionsSessionIdMessagesPost = (
+ sessionId: string,
+ dataingEntrypointsApiRoutesAssistantSendMessageRequest: DataingEntrypointsApiRoutesAssistantSendMessageRequest,
+) => {
+ return customInstance(
+ {
+ url: `/api/v1/assistant/sessions/${sessionId}/messages`,
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ data: dataingEntrypointsApiRoutesAssistantSendMessageRequest,
+ },
+ );
+};
+
+export const getSendMessageApiV1AssistantSessionsSessionIdMessagesPostMutationOptions =
+ (options?: {
+ mutation?: UseMutationOptions<
+ Awaited<
+ ReturnType<
+ typeof sendMessageApiV1AssistantSessionsSessionIdMessagesPost
+ >
+ >,
+ TError,
+ {
+ sessionId: string;
+ data: DataingEntrypointsApiRoutesAssistantSendMessageRequest;
+ },
+ TContext
+ >;
+ }): UseMutationOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ {
+ sessionId: string;
+ data: DataingEntrypointsApiRoutesAssistantSendMessageRequest;
+ },
+ TContext
+ > => {
+ const { mutation: mutationOptions } = options ?? {};
+
+ const mutationFn: MutationFunction<
+ Awaited<
+ ReturnType<
+ typeof sendMessageApiV1AssistantSessionsSessionIdMessagesPost
+ >
+ >,
+ {
+ sessionId: string;
+ data: DataingEntrypointsApiRoutesAssistantSendMessageRequest;
+ }
+ > = (props) => {
+ const { sessionId, data } = props ?? {};
+
+ return sendMessageApiV1AssistantSessionsSessionIdMessagesPost(
+ sessionId,
+ data,
+ );
+ };
+
+ return { mutationFn, ...mutationOptions };
+ };
+
+export type SendMessageApiV1AssistantSessionsSessionIdMessagesPostMutationResult =
+ NonNullable<
+ Awaited<
+ ReturnType
+ >
+ >;
+export type SendMessageApiV1AssistantSessionsSessionIdMessagesPostMutationBody =
+ DataingEntrypointsApiRoutesAssistantSendMessageRequest;
+export type SendMessageApiV1AssistantSessionsSessionIdMessagesPostMutationError =
+ HTTPValidationError;
+
+/**
+ * @summary Send Message
+ */
+export const useSendMessageApiV1AssistantSessionsSessionIdMessagesPost = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ {
+ sessionId: string;
+ data: DataingEntrypointsApiRoutesAssistantSendMessageRequest;
+ },
+ TContext
+ >;
+}): UseMutationResult<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ {
+ sessionId: string;
+ data: DataingEntrypointsApiRoutesAssistantSendMessageRequest;
+ },
+ TContext
+> => {
+ const mutationOptions =
+ getSendMessageApiV1AssistantSessionsSessionIdMessagesPostMutationOptions(
+ options,
+ );
+
+ return useMutation(mutationOptions);
+};
+/**
+ * Stream assistant responses via Server-Sent Events.
+
+Connect to this endpoint after sending a message to receive real-time
+updates including text chunks, tool calls, and completion status.
+ * @summary Stream Response
+ */
+export const streamResponseApiV1AssistantSessionsSessionIdStreamGet = (
+ sessionId: string,
+ params?: StreamResponseApiV1AssistantSessionsSessionIdStreamGetParams,
+ signal?: AbortSignal,
+) => {
+ return customInstance({
+ url: `/api/v1/assistant/sessions/${sessionId}/stream`,
+ method: "GET",
+ params,
+ signal,
+ });
+};
+
+export const getStreamResponseApiV1AssistantSessionsSessionIdStreamGetQueryKey =
+ (
+ sessionId: string,
+ params?: StreamResponseApiV1AssistantSessionsSessionIdStreamGetParams,
+ ) => {
+ return [
+ `/api/v1/assistant/sessions/${sessionId}/stream`,
+ ...(params ? [params] : []),
+ ] as const;
+ };
+
+export const getStreamResponseApiV1AssistantSessionsSessionIdStreamGetQueryOptions =
+ <
+ TData = Awaited<
+ ReturnType
+ >,
+ TError = HTTPValidationError,
+ >(
+ sessionId: string,
+ params?: StreamResponseApiV1AssistantSessionsSessionIdStreamGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof streamResponseApiV1AssistantSessionsSessionIdStreamGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+ ) => {
+ const { query: queryOptions } = options ?? {};
+
+ const queryKey =
+ queryOptions?.queryKey ??
+ getStreamResponseApiV1AssistantSessionsSessionIdStreamGetQueryKey(
+ sessionId,
+ params,
+ );
+
+ const queryFn: QueryFunction<
+ Awaited<
+ ReturnType<
+ typeof streamResponseApiV1AssistantSessionsSessionIdStreamGet
+ >
+ >
+ > = ({ signal }) =>
+ streamResponseApiV1AssistantSessionsSessionIdStreamGet(
+ sessionId,
+ params,
+ signal,
+ );
+
+ return {
+ queryKey,
+ queryFn,
+ enabled: !!sessionId,
+ ...queryOptions,
+ } as UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof streamResponseApiV1AssistantSessionsSessionIdStreamGet
+ >
+ >,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
+ };
+
+export type StreamResponseApiV1AssistantSessionsSessionIdStreamGetQueryResult =
+ NonNullable<
+ Awaited<
+ ReturnType
+ >
+ >;
+export type StreamResponseApiV1AssistantSessionsSessionIdStreamGetQueryError =
+ HTTPValidationError;
+
+/**
+ * @summary Stream Response
+ */
+export const useStreamResponseApiV1AssistantSessionsSessionIdStreamGet = <
+ TData = Awaited<
+ ReturnType
+ >,
+ TError = HTTPValidationError,
+>(
+ sessionId: string,
+ params?: StreamResponseApiV1AssistantSessionsSessionIdStreamGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof streamResponseApiV1AssistantSessionsSessionIdStreamGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions =
+ getStreamResponseApiV1AssistantSessionsSessionIdStreamGetQueryOptions(
+ sessionId,
+ params,
+ options,
+ );
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+};
+
+/**
+ * Export a session as JSON or Markdown.
+ * @summary Export Session
+ */
+export const exportSessionApiV1AssistantSessionsSessionIdExportPost = (
+ sessionId: string,
+ params?: ExportSessionApiV1AssistantSessionsSessionIdExportPostParams,
+) => {
+ return customInstance(
+ {
+ url: `/api/v1/assistant/sessions/${sessionId}/export`,
+ method: "POST",
+ params,
+ },
+ );
+};
+
+export const getExportSessionApiV1AssistantSessionsSessionIdExportPostMutationOptions =
+ (options?: {
+ mutation?: UseMutationOptions<
+ Awaited<
+ ReturnType<
+ typeof exportSessionApiV1AssistantSessionsSessionIdExportPost
+ >
+ >,
+ TError,
+ {
+ sessionId: string;
+ params?: ExportSessionApiV1AssistantSessionsSessionIdExportPostParams;
+ },
+ TContext
+ >;
+ }): UseMutationOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ {
+ sessionId: string;
+ params?: ExportSessionApiV1AssistantSessionsSessionIdExportPostParams;
+ },
+ TContext
+ > => {
+ const { mutation: mutationOptions } = options ?? {};
+
+ const mutationFn: MutationFunction<
+ Awaited<
+ ReturnType<
+ typeof exportSessionApiV1AssistantSessionsSessionIdExportPost
+ >
+ >,
+ {
+ sessionId: string;
+ params?: ExportSessionApiV1AssistantSessionsSessionIdExportPostParams;
+ }
+ > = (props) => {
+ const { sessionId, params } = props ?? {};
+
+ return exportSessionApiV1AssistantSessionsSessionIdExportPost(
+ sessionId,
+ params,
+ );
+ };
+
+ return { mutationFn, ...mutationOptions };
+ };
+
+export type ExportSessionApiV1AssistantSessionsSessionIdExportPostMutationResult =
+ NonNullable<
+ Awaited<
+ ReturnType
+ >
+ >;
+
+export type ExportSessionApiV1AssistantSessionsSessionIdExportPostMutationError =
+ HTTPValidationError;
+
+/**
+ * @summary Export Session
+ */
+export const useExportSessionApiV1AssistantSessionsSessionIdExportPost = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ {
+ sessionId: string;
+ params?: ExportSessionApiV1AssistantSessionsSessionIdExportPostParams;
+ },
+ TContext
+ >;
+}): UseMutationResult<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ {
+ sessionId: string;
+ params?: ExportSessionApiV1AssistantSessionsSessionIdExportPostParams;
+ },
+ TContext
+> => {
+ const mutationOptions =
+ getExportSessionApiV1AssistantSessionsSessionIdExportPostMutationOptions(
+ options,
+ );
+
+ return useMutation(mutationOptions);
+};
diff --git a/frontend/app/src/lib/api/generated/git/git.ts b/frontend/app/src/lib/api/generated/git/git.ts
new file mode 100644
index 000000000..44a42796d
--- /dev/null
+++ b/frontend/app/src/lib/api/generated/git/git.ts
@@ -0,0 +1,728 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import { useMutation, useQuery } from "@tanstack/react-query";
+import type {
+ MutationFunction,
+ QueryFunction,
+ QueryKey,
+ UseMutationOptions,
+ UseMutationResult,
+ UseQueryOptions,
+ UseQueryResult,
+} from "@tanstack/react-query";
+import type {
+ CodeChangeListResponse,
+ ConnectGitRepoRequest,
+ DeleteGitRepoApiV1GitReposRepoIdDelete200,
+ FindChangesByAssetApiV1GitChangesByAssetGetParams,
+ GitRepoListResponse,
+ GitRepoResponse,
+ HTTPValidationError,
+ ListGitReposApiV1GitReposGetParams,
+ ListRepoChangesApiV1GitReposRepoIdChangesGetParams,
+ SyncTriggerResponse,
+ UpdateGitRepoRequest,
+} from "../../model";
+import { customInstance } from "../../client";
+
+/**
+ * Connect a new git repository for pipeline change tracking.
+ * @summary Connect Git Repo
+ */
+export const connectGitRepoApiV1GitReposPost = (
+ connectGitRepoRequest: ConnectGitRepoRequest,
+) => {
+ return customInstance({
+ url: `/api/v1/git/repos`,
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ data: connectGitRepoRequest,
+ });
+};
+
+export const getConnectGitRepoApiV1GitReposPostMutationOptions = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { data: ConnectGitRepoRequest },
+ TContext
+ >;
+}): UseMutationOptions<
+ Awaited>,
+ TError,
+ { data: ConnectGitRepoRequest },
+ TContext
+> => {
+ const { mutation: mutationOptions } = options ?? {};
+
+ const mutationFn: MutationFunction<
+ Awaited>,
+ { data: ConnectGitRepoRequest }
+ > = (props) => {
+ const { data } = props ?? {};
+
+ return connectGitRepoApiV1GitReposPost(data);
+ };
+
+ return { mutationFn, ...mutationOptions };
+};
+
+export type ConnectGitRepoApiV1GitReposPostMutationResult = NonNullable<
+ Awaited>
+>;
+export type ConnectGitRepoApiV1GitReposPostMutationBody = ConnectGitRepoRequest;
+export type ConnectGitRepoApiV1GitReposPostMutationError = HTTPValidationError;
+
+/**
+ * @summary Connect Git Repo
+ */
+export const useConnectGitRepoApiV1GitReposPost = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { data: ConnectGitRepoRequest },
+ TContext
+ >;
+}): UseMutationResult<
+ Awaited>,
+ TError,
+ { data: ConnectGitRepoRequest },
+ TContext
+> => {
+ const mutationOptions =
+ getConnectGitRepoApiV1GitReposPostMutationOptions(options);
+
+ return useMutation(mutationOptions);
+};
+/**
+ * List connected git repositories.
+ * @summary List Git Repos
+ */
+export const listGitReposApiV1GitReposGet = (
+ params?: ListGitReposApiV1GitReposGetParams,
+ signal?: AbortSignal,
+) => {
+ return customInstance({
+ url: `/api/v1/git/repos`,
+ method: "GET",
+ params,
+ signal,
+ });
+};
+
+export const getListGitReposApiV1GitReposGetQueryKey = (
+ params?: ListGitReposApiV1GitReposGetParams,
+) => {
+ return [`/api/v1/git/repos`, ...(params ? [params] : [])] as const;
+};
+
+export const getListGitReposApiV1GitReposGetQueryOptions = <
+ TData = Awaited>,
+ TError = HTTPValidationError,
+>(
+ params?: ListGitReposApiV1GitReposGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ >
+ >;
+ },
+) => {
+ const { query: queryOptions } = options ?? {};
+
+ const queryKey =
+ queryOptions?.queryKey ?? getListGitReposApiV1GitReposGetQueryKey(params);
+
+ const queryFn: QueryFunction<
+ Awaited>
+ > = ({ signal }) => listGitReposApiV1GitReposGet(params, signal);
+
+ return { queryKey, queryFn, ...queryOptions } as UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
+};
+
+export type ListGitReposApiV1GitReposGetQueryResult = NonNullable<
+ Awaited>
+>;
+export type ListGitReposApiV1GitReposGetQueryError = HTTPValidationError;
+
+/**
+ * @summary List Git Repos
+ */
+export const useListGitReposApiV1GitReposGet = <
+ TData = Awaited>,
+ TError = HTTPValidationError,
+>(
+ params?: ListGitReposApiV1GitReposGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ >
+ >;
+ },
+): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions = getListGitReposApiV1GitReposGetQueryOptions(
+ params,
+ options,
+ );
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+};
+
+/**
+ * Get a single git repository by ID.
+ * @summary Get Git Repo
+ */
+export const getGitRepoApiV1GitReposRepoIdGet = (
+ repoId: string,
+ signal?: AbortSignal,
+) => {
+ return customInstance({
+ url: `/api/v1/git/repos/${repoId}`,
+ method: "GET",
+ signal,
+ });
+};
+
+export const getGetGitRepoApiV1GitReposRepoIdGetQueryKey = (repoId: string) => {
+ return [`/api/v1/git/repos/${repoId}`] as const;
+};
+
+export const getGetGitRepoApiV1GitReposRepoIdGetQueryOptions = <
+ TData = Awaited>,
+ TError = HTTPValidationError,
+>(
+ repoId: string,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ >
+ >;
+ },
+) => {
+ const { query: queryOptions } = options ?? {};
+
+ const queryKey =
+ queryOptions?.queryKey ??
+ getGetGitRepoApiV1GitReposRepoIdGetQueryKey(repoId);
+
+ const queryFn: QueryFunction<
+ Awaited>
+ > = ({ signal }) => getGitRepoApiV1GitReposRepoIdGet(repoId, signal);
+
+ return {
+ queryKey,
+ queryFn,
+ enabled: !!repoId,
+ ...queryOptions,
+ } as UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
+};
+
+export type GetGitRepoApiV1GitReposRepoIdGetQueryResult = NonNullable<
+ Awaited>
+>;
+export type GetGitRepoApiV1GitReposRepoIdGetQueryError = HTTPValidationError;
+
+/**
+ * @summary Get Git Repo
+ */
+export const useGetGitRepoApiV1GitReposRepoIdGet = <
+ TData = Awaited>,
+ TError = HTTPValidationError,
+>(
+ repoId: string,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ >
+ >;
+ },
+): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions = getGetGitRepoApiV1GitReposRepoIdGetQueryOptions(
+ repoId,
+ options,
+ );
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+};
+
+/**
+ * Update a git repository's settings.
+ * @summary Update Git Repo
+ */
+export const updateGitRepoApiV1GitReposRepoIdPut = (
+ repoId: string,
+ updateGitRepoRequest: UpdateGitRepoRequest,
+) => {
+ return customInstance({
+ url: `/api/v1/git/repos/${repoId}`,
+ method: "PUT",
+ headers: { "Content-Type": "application/json" },
+ data: updateGitRepoRequest,
+ });
+};
+
+export const getUpdateGitRepoApiV1GitReposRepoIdPutMutationOptions = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { repoId: string; data: UpdateGitRepoRequest },
+ TContext
+ >;
+}): UseMutationOptions<
+ Awaited>,
+ TError,
+ { repoId: string; data: UpdateGitRepoRequest },
+ TContext
+> => {
+ const { mutation: mutationOptions } = options ?? {};
+
+ const mutationFn: MutationFunction<
+ Awaited>,
+ { repoId: string; data: UpdateGitRepoRequest }
+ > = (props) => {
+ const { repoId, data } = props ?? {};
+
+ return updateGitRepoApiV1GitReposRepoIdPut(repoId, data);
+ };
+
+ return { mutationFn, ...mutationOptions };
+};
+
+export type UpdateGitRepoApiV1GitReposRepoIdPutMutationResult = NonNullable<
+ Awaited>
+>;
+export type UpdateGitRepoApiV1GitReposRepoIdPutMutationBody =
+ UpdateGitRepoRequest;
+export type UpdateGitRepoApiV1GitReposRepoIdPutMutationError =
+ HTTPValidationError;
+
+/**
+ * @summary Update Git Repo
+ */
+export const useUpdateGitRepoApiV1GitReposRepoIdPut = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { repoId: string; data: UpdateGitRepoRequest },
+ TContext
+ >;
+}): UseMutationResult<
+ Awaited>,
+ TError,
+ { repoId: string; data: UpdateGitRepoRequest },
+ TContext
+> => {
+ const mutationOptions =
+ getUpdateGitRepoApiV1GitReposRepoIdPutMutationOptions(options);
+
+ return useMutation(mutationOptions);
+};
+/**
+ * Disconnect a git repository (cascades to code_changes).
+ * @summary Delete Git Repo
+ */
+export const deleteGitRepoApiV1GitReposRepoIdDelete = (repoId: string) => {
+ return customInstance({
+ url: `/api/v1/git/repos/${repoId}`,
+ method: "DELETE",
+ });
+};
+
+export const getDeleteGitRepoApiV1GitReposRepoIdDeleteMutationOptions = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { repoId: string },
+ TContext
+ >;
+}): UseMutationOptions<
+ Awaited>,
+ TError,
+ { repoId: string },
+ TContext
+> => {
+ const { mutation: mutationOptions } = options ?? {};
+
+ const mutationFn: MutationFunction<
+ Awaited>,
+ { repoId: string }
+ > = (props) => {
+ const { repoId } = props ?? {};
+
+ return deleteGitRepoApiV1GitReposRepoIdDelete(repoId);
+ };
+
+ return { mutationFn, ...mutationOptions };
+};
+
+export type DeleteGitRepoApiV1GitReposRepoIdDeleteMutationResult = NonNullable<
+ Awaited>
+>;
+
+export type DeleteGitRepoApiV1GitReposRepoIdDeleteMutationError =
+ HTTPValidationError;
+
+/**
+ * @summary Delete Git Repo
+ */
+export const useDeleteGitRepoApiV1GitReposRepoIdDelete = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { repoId: string },
+ TContext
+ >;
+}): UseMutationResult<
+ Awaited>,
+ TError,
+ { repoId: string },
+ TContext
+> => {
+ const mutationOptions =
+ getDeleteGitRepoApiV1GitReposRepoIdDeleteMutationOptions(options);
+
+ return useMutation(mutationOptions);
+};
+/**
+ * Trigger an immediate sync for a repository.
+
+Returns 202 Accepted; sync runs in the background.
+ * @summary Trigger Sync
+ */
+export const triggerSyncApiV1GitReposRepoIdSyncPost = (repoId: string) => {
+ return customInstance({
+ url: `/api/v1/git/repos/${repoId}/sync`,
+ method: "POST",
+ });
+};
+
+export const getTriggerSyncApiV1GitReposRepoIdSyncPostMutationOptions = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { repoId: string },
+ TContext
+ >;
+}): UseMutationOptions<
+ Awaited>,
+ TError,
+ { repoId: string },
+ TContext
+> => {
+ const { mutation: mutationOptions } = options ?? {};
+
+ const mutationFn: MutationFunction<
+ Awaited>,
+ { repoId: string }
+ > = (props) => {
+ const { repoId } = props ?? {};
+
+ return triggerSyncApiV1GitReposRepoIdSyncPost(repoId);
+ };
+
+ return { mutationFn, ...mutationOptions };
+};
+
+export type TriggerSyncApiV1GitReposRepoIdSyncPostMutationResult = NonNullable<
+ Awaited>
+>;
+
+export type TriggerSyncApiV1GitReposRepoIdSyncPostMutationError =
+ HTTPValidationError;
+
+/**
+ * @summary Trigger Sync
+ */
+export const useTriggerSyncApiV1GitReposRepoIdSyncPost = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { repoId: string },
+ TContext
+ >;
+}): UseMutationResult<
+ Awaited>,
+ TError,
+ { repoId: string },
+ TContext
+> => {
+ const mutationOptions =
+ getTriggerSyncApiV1GitReposRepoIdSyncPostMutationOptions(options);
+
+ return useMutation(mutationOptions);
+};
+/**
+ * List code changes for a repository with optional time range filter.
+ * @summary List Repo Changes
+ */
+export const listRepoChangesApiV1GitReposRepoIdChangesGet = (
+ repoId: string,
+ params?: ListRepoChangesApiV1GitReposRepoIdChangesGetParams,
+ signal?: AbortSignal,
+) => {
+ return customInstance({
+ url: `/api/v1/git/repos/${repoId}/changes`,
+ method: "GET",
+ params,
+ signal,
+ });
+};
+
+export const getListRepoChangesApiV1GitReposRepoIdChangesGetQueryKey = (
+ repoId: string,
+ params?: ListRepoChangesApiV1GitReposRepoIdChangesGetParams,
+) => {
+ return [
+ `/api/v1/git/repos/${repoId}/changes`,
+ ...(params ? [params] : []),
+ ] as const;
+};
+
+export const getListRepoChangesApiV1GitReposRepoIdChangesGetQueryOptions = <
+ TData = Awaited<
+ ReturnType
+ >,
+ TError = HTTPValidationError,
+>(
+ repoId: string,
+ params?: ListRepoChangesApiV1GitReposRepoIdChangesGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+) => {
+ const { query: queryOptions } = options ?? {};
+
+ const queryKey =
+ queryOptions?.queryKey ??
+ getListRepoChangesApiV1GitReposRepoIdChangesGetQueryKey(repoId, params);
+
+ const queryFn: QueryFunction<
+ Awaited>
+ > = ({ signal }) =>
+ listRepoChangesApiV1GitReposRepoIdChangesGet(repoId, params, signal);
+
+ return {
+ queryKey,
+ queryFn,
+ enabled: !!repoId,
+ ...queryOptions,
+ } as UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
+};
+
+export type ListRepoChangesApiV1GitReposRepoIdChangesGetQueryResult =
+ NonNullable<
+ Awaited>
+ >;
+export type ListRepoChangesApiV1GitReposRepoIdChangesGetQueryError =
+ HTTPValidationError;
+
+/**
+ * @summary List Repo Changes
+ */
+export const useListRepoChangesApiV1GitReposRepoIdChangesGet = <
+ TData = Awaited<
+ ReturnType
+ >,
+ TError = HTTPValidationError,
+>(
+ repoId: string,
+ params?: ListRepoChangesApiV1GitReposRepoIdChangesGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions =
+ getListRepoChangesApiV1GitReposRepoIdChangesGetQueryOptions(
+ repoId,
+ params,
+ options,
+ );
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+};
+
+/**
+ * Find code changes affecting a given asset across all tenant repos.
+
+This endpoint is called by the investigation agent to correlate
+data anomalies with code changes.
+ * @summary Find Changes By Asset
+ */
+export const findChangesByAssetApiV1GitChangesByAssetGet = (
+ params: FindChangesByAssetApiV1GitChangesByAssetGetParams,
+ signal?: AbortSignal,
+) => {
+ return customInstance({
+ url: `/api/v1/git/changes/by-asset`,
+ method: "GET",
+ params,
+ signal,
+ });
+};
+
+export const getFindChangesByAssetApiV1GitChangesByAssetGetQueryKey = (
+ params: FindChangesByAssetApiV1GitChangesByAssetGetParams,
+) => {
+ return [`/api/v1/git/changes/by-asset`, ...(params ? [params] : [])] as const;
+};
+
+export const getFindChangesByAssetApiV1GitChangesByAssetGetQueryOptions = <
+ TData = Awaited<
+ ReturnType
+ >,
+ TError = HTTPValidationError,
+>(
+ params: FindChangesByAssetApiV1GitChangesByAssetGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ >
+ >;
+ },
+) => {
+ const { query: queryOptions } = options ?? {};
+
+ const queryKey =
+ queryOptions?.queryKey ??
+ getFindChangesByAssetApiV1GitChangesByAssetGetQueryKey(params);
+
+ const queryFn: QueryFunction<
+ Awaited>
+ > = ({ signal }) =>
+ findChangesByAssetApiV1GitChangesByAssetGet(params, signal);
+
+ return { queryKey, queryFn, ...queryOptions } as UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
+};
+
+export type FindChangesByAssetApiV1GitChangesByAssetGetQueryResult =
+ NonNullable<
+ Awaited>
+ >;
+export type FindChangesByAssetApiV1GitChangesByAssetGetQueryError =
+ HTTPValidationError;
+
+/**
+ * @summary Find Changes By Asset
+ */
+export const useFindChangesByAssetApiV1GitChangesByAssetGet = <
+ TData = Awaited<
+ ReturnType
+ >,
+ TError = HTTPValidationError,
+>(
+ params: FindChangesByAssetApiV1GitChangesByAssetGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited>,
+ TError,
+ TData
+ >
+ >;
+ },
+): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions =
+ getFindChangesByAssetApiV1GitChangesByAssetGetQueryOptions(params, options);
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+};
diff --git a/frontend/app/src/lib/api/generated/investigations/investigations.ts b/frontend/app/src/lib/api/generated/investigations/investigations.ts
index 28797ff54..3c28e1c3f 100644
--- a/frontend/app/src/lib/api/generated/investigations/investigations.ts
+++ b/frontend/app/src/lib/api/generated/investigations/investigations.ts
@@ -16,18 +16,32 @@ import type {
UseQueryResult,
} from "@tanstack/react-query";
import type {
+ AdoptTestApiV1InvestigationsTestsAdoptPost200,
+ BodyImportSnapshotArchiveApiV1InvestigationsImportPost,
CancelInvestigationResponse,
ChainVerificationResponse,
+ CodifyRequest,
+ CodifyResponse,
+ GetRecentCatchesApiV1InvestigationsTestsCatchesGetParams,
+ GetTestTrackingStatsApiV1InvestigationsTestsStatsGetParams,
HTTPValidationError,
+ ImportSnapshotResponse,
InvestigationListItem,
InvestigationStateResponse,
+ RecentCatchResponse,
+ RecordTestRunApiV1InvestigationsTestsRunPost200,
SendMessageRequest,
SendMessageResponse,
SendUserInputApiV1InvestigationsInvestigationIdInputPost200,
+ SnapshotCheckpointParam,
+ SnapshotListResponse,
StartInvestigationRequest,
StartInvestigationResponse,
StreamEventsApiV1InvestigationsInvestigationIdEventsGetParams,
TemporalStatusResponse,
+ TestAdoptionRequest,
+ TestRunResultRequest,
+ TestTrackingStatsResponse,
UserInputRequest,
} from "../../model";
import { customInstance } from "../../client";
@@ -602,53 +616,57 @@ export const useVerifyInvestigationApiV1InvestigationsInvestigationIdVerifyGet =
};
/**
- * Send a message to an investigation via Temporal signal.
+ * Generate regression tests from an investigation's synthesis.
+
+Extracts testable assertions from the investigation synthesis and renders
+them to the specified format (Great Expectations, dbt, Soda, or SQL).
Args:
investigation_id: UUID of the investigation.
- request: The message request.
+ request: Codify request with output format.
auth: Authentication context from API key/JWT.
+ db: Application database for test tracking.
temporal_client: Temporal client for durable execution.
Returns:
- SendMessageResponse with status.
+ CodifyResponse with rendered test content.
Raises:
- HTTPException: If failed to send message.
- * @summary Send Message
+ HTTPException: If investigation not found or no synthesis available.
+ * @summary Codify Investigation
*/
-export const sendMessageApiV1InvestigationsInvestigationIdMessagesPost = (
+export const codifyInvestigationApiV1InvestigationsInvestigationIdCodifyPost = (
investigationId: string,
- sendMessageRequest: SendMessageRequest,
+ codifyRequest: CodifyRequest,
) => {
- return customInstance({
- url: `/api/v1/investigations/${investigationId}/messages`,
+ return customInstance({
+ url: `/api/v1/investigations/${investigationId}/codify`,
method: "POST",
headers: { "Content-Type": "application/json" },
- data: sendMessageRequest,
+ data: codifyRequest,
});
};
-export const getSendMessageApiV1InvestigationsInvestigationIdMessagesPostMutationOptions =
+export const getCodifyInvestigationApiV1InvestigationsInvestigationIdCodifyPostMutationOptions =
(options?: {
mutation?: UseMutationOptions<
Awaited<
ReturnType<
- typeof sendMessageApiV1InvestigationsInvestigationIdMessagesPost
+ typeof codifyInvestigationApiV1InvestigationsInvestigationIdCodifyPost
>
>,
TError,
- { investigationId: string; data: SendMessageRequest },
+ { investigationId: string; data: CodifyRequest },
TContext
>;
}): UseMutationOptions<
Awaited<
ReturnType<
- typeof sendMessageApiV1InvestigationsInvestigationIdMessagesPost
+ typeof codifyInvestigationApiV1InvestigationsInvestigationIdCodifyPost
>
>,
TError,
- { investigationId: string; data: SendMessageRequest },
+ { investigationId: string; data: CodifyRequest },
TContext
> => {
const { mutation: mutationOptions } = options ?? {};
@@ -656,14 +674,14 @@ export const getSendMessageApiV1InvestigationsInvestigationIdMessagesPostMutatio
const mutationFn: MutationFunction<
Awaited<
ReturnType<
- typeof sendMessageApiV1InvestigationsInvestigationIdMessagesPost
+ typeof codifyInvestigationApiV1InvestigationsInvestigationIdCodifyPost
>
>,
- { investigationId: string; data: SendMessageRequest }
+ { investigationId: string; data: CodifyRequest }
> = (props) => {
const { investigationId, data } = props ?? {};
- return sendMessageApiV1InvestigationsInvestigationIdMessagesPost(
+ return codifyInvestigationApiV1InvestigationsInvestigationIdCodifyPost(
investigationId,
data,
);
@@ -672,95 +690,100 @@ export const getSendMessageApiV1InvestigationsInvestigationIdMessagesPostMutatio
return { mutationFn, ...mutationOptions };
};
-export type SendMessageApiV1InvestigationsInvestigationIdMessagesPostMutationResult =
+export type CodifyInvestigationApiV1InvestigationsInvestigationIdCodifyPostMutationResult =
NonNullable<
Awaited<
ReturnType<
- typeof sendMessageApiV1InvestigationsInvestigationIdMessagesPost
+ typeof codifyInvestigationApiV1InvestigationsInvestigationIdCodifyPost
>
>
>;
-export type SendMessageApiV1InvestigationsInvestigationIdMessagesPostMutationBody =
- SendMessageRequest;
-export type SendMessageApiV1InvestigationsInvestigationIdMessagesPostMutationError =
+export type CodifyInvestigationApiV1InvestigationsInvestigationIdCodifyPostMutationBody =
+ CodifyRequest;
+export type CodifyInvestigationApiV1InvestigationsInvestigationIdCodifyPostMutationError =
HTTPValidationError;
/**
- * @summary Send Message
+ * @summary Codify Investigation
*/
-export const useSendMessageApiV1InvestigationsInvestigationIdMessagesPost = <
- TError = HTTPValidationError,
- TContext = unknown,
->(options?: {
- mutation?: UseMutationOptions<
+export const useCodifyInvestigationApiV1InvestigationsInvestigationIdCodifyPost =
+ (options?: {
+ mutation?: UseMutationOptions<
+ Awaited<
+ ReturnType<
+ typeof codifyInvestigationApiV1InvestigationsInvestigationIdCodifyPost
+ >
+ >,
+ TError,
+ { investigationId: string; data: CodifyRequest },
+ TContext
+ >;
+ }): UseMutationResult<
Awaited<
ReturnType<
- typeof sendMessageApiV1InvestigationsInvestigationIdMessagesPost
+ typeof codifyInvestigationApiV1InvestigationsInvestigationIdCodifyPost
>
>,
TError,
- { investigationId: string; data: SendMessageRequest },
+ { investigationId: string; data: CodifyRequest },
TContext
- >;
-}): UseMutationResult<
- Awaited<
- ReturnType
- >,
- TError,
- { investigationId: string; data: SendMessageRequest },
- TContext
-> => {
- const mutationOptions =
- getSendMessageApiV1InvestigationsInvestigationIdMessagesPostMutationOptions(
- options,
- );
+ > => {
+ const mutationOptions =
+ getCodifyInvestigationApiV1InvestigationsInvestigationIdCodifyPostMutationOptions(
+ options,
+ );
- return useMutation(mutationOptions);
-};
+ return useMutation(mutationOptions);
+ };
/**
- * Get the status of an investigation.
+ * Get test tracking statistics.
-Queries the Temporal workflow for real-time progress.
+Returns metrics on tests generated, adopted, and issues caught.
Args:
- investigation_id: UUID of the investigation.
auth: Authentication context from API key/JWT.
- temporal_client: Temporal client for durable execution.
+ db: Application database.
+ days: Number of days to look back.
Returns:
- TemporalStatusResponse with current progress and state.
- * @summary Get Investigation Status
+ TestTrackingStatsResponse with statistics.
+ * @summary Get Test Tracking Stats
*/
-export const getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet =
- (investigationId: string, signal?: AbortSignal) => {
- return customInstance({
- url: `/api/v1/investigations/${investigationId}/status`,
- method: "GET",
- signal,
- });
- };
+export const getTestTrackingStatsApiV1InvestigationsTestsStatsGet = (
+ params?: GetTestTrackingStatsApiV1InvestigationsTestsStatsGetParams,
+ signal?: AbortSignal,
+) => {
+ return customInstance({
+ url: `/api/v1/investigations/tests/stats`,
+ method: "GET",
+ params,
+ signal,
+ });
+};
-export const getGetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGetQueryKey =
- (investigationId: string) => {
- return [`/api/v1/investigations/${investigationId}/status`] as const;
- };
+export const getGetTestTrackingStatsApiV1InvestigationsTestsStatsGetQueryKey = (
+ params?: GetTestTrackingStatsApiV1InvestigationsTestsStatsGetParams,
+) => {
+ return [
+ `/api/v1/investigations/tests/stats`,
+ ...(params ? [params] : []),
+ ] as const;
+};
-export const getGetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGetQueryOptions =
+export const getGetTestTrackingStatsApiV1InvestigationsTestsStatsGetQueryOptions =
<
TData = Awaited<
- ReturnType<
- typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
- >
+ ReturnType
>,
TError = HTTPValidationError,
>(
- investigationId: string,
+ params?: GetTestTrackingStatsApiV1InvestigationsTestsStatsGetParams,
options?: {
query?: Partial<
UseQueryOptions<
Awaited<
ReturnType<
- typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
+ typeof getTestTrackingStatsApiV1InvestigationsTestsStatsGet
>
>,
TError,
@@ -773,68 +796,121 @@ export const getGetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGe
const queryKey =
queryOptions?.queryKey ??
- getGetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGetQueryKey(
- investigationId,
- );
+ getGetTestTrackingStatsApiV1InvestigationsTestsStatsGetQueryKey(params);
const queryFn: QueryFunction<
Awaited<
- ReturnType<
- typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
- >
+ ReturnType
>
> = ({ signal }) =>
- getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet(
- investigationId,
- signal,
- );
+ getTestTrackingStatsApiV1InvestigationsTestsStatsGet(params, signal);
- return {
- queryKey,
- queryFn,
- enabled: !!investigationId,
- ...queryOptions,
- } as UseQueryOptions<
+ return { queryKey, queryFn, ...queryOptions } as UseQueryOptions<
Awaited<
- ReturnType<
- typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
- >
+ ReturnType
>,
TError,
TData
> & { queryKey: QueryKey };
};
-export type GetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGetQueryResult =
+export type GetTestTrackingStatsApiV1InvestigationsTestsStatsGetQueryResult =
NonNullable<
Awaited<
- ReturnType<
- typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
- >
+ ReturnType
>
>;
-export type GetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGetQueryError =
+export type GetTestTrackingStatsApiV1InvestigationsTestsStatsGetQueryError =
HTTPValidationError;
/**
- * @summary Get Investigation Status
+ * @summary Get Test Tracking Stats
*/
-export const useGetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet =
+export const useGetTestTrackingStatsApiV1InvestigationsTestsStatsGet = <
+ TData = Awaited<
+ ReturnType
+ >,
+ TError = HTTPValidationError,
+>(
+ params?: GetTestTrackingStatsApiV1InvestigationsTestsStatsGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof getTestTrackingStatsApiV1InvestigationsTestsStatsGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions =
+ getGetTestTrackingStatsApiV1InvestigationsTestsStatsGetQueryOptions(
+ params,
+ options,
+ );
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+};
+
+/**
+ * Get recent tests that caught issues.
+
+Returns a list of recent test failures (issues caught).
+
+Args:
+ auth: Authentication context from API key/JWT.
+ db: Application database.
+ limit: Maximum number of results.
+
+Returns:
+ List of recent catches.
+ * @summary Get Recent Catches
+ */
+export const getRecentCatchesApiV1InvestigationsTestsCatchesGet = (
+ params?: GetRecentCatchesApiV1InvestigationsTestsCatchesGetParams,
+ signal?: AbortSignal,
+) => {
+ return customInstance({
+ url: `/api/v1/investigations/tests/catches`,
+ method: "GET",
+ params,
+ signal,
+ });
+};
+
+export const getGetRecentCatchesApiV1InvestigationsTestsCatchesGetQueryKey = (
+ params?: GetRecentCatchesApiV1InvestigationsTestsCatchesGetParams,
+) => {
+ return [
+ `/api/v1/investigations/tests/catches`,
+ ...(params ? [params] : []),
+ ] as const;
+};
+
+export const getGetRecentCatchesApiV1InvestigationsTestsCatchesGetQueryOptions =
<
TData = Awaited<
- ReturnType<
- typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
- >
+ ReturnType
>,
TError = HTTPValidationError,
>(
- investigationId: string,
+ params?: GetRecentCatchesApiV1InvestigationsTestsCatchesGetParams,
options?: {
query?: Partial<
UseQueryOptions<
Awaited<
ReturnType<
- typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
+ typeof getRecentCatchesApiV1InvestigationsTestsCatchesGet
>
>,
TError,
@@ -842,72 +918,299 @@ export const useGetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGe
>
>;
},
- ): UseQueryResult & { queryKey: QueryKey } => {
- const queryOptions =
- getGetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGetQueryOptions(
- investigationId,
- options,
- );
+ ) => {
+ const { query: queryOptions } = options ?? {};
- const query = useQuery(queryOptions) as UseQueryResult & {
- queryKey: QueryKey;
- };
+ const queryKey =
+ queryOptions?.queryKey ??
+ getGetRecentCatchesApiV1InvestigationsTestsCatchesGetQueryKey(params);
- query.queryKey = queryOptions.queryKey;
+ const queryFn: QueryFunction<
+ Awaited<
+ ReturnType
+ >
+ > = ({ signal }) =>
+ getRecentCatchesApiV1InvestigationsTestsCatchesGet(params, signal);
- return query;
+ return { queryKey, queryFn, ...queryOptions } as UseQueryOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
};
+export type GetRecentCatchesApiV1InvestigationsTestsCatchesGetQueryResult =
+ NonNullable<
+ Awaited<
+ ReturnType
+ >
+ >;
+export type GetRecentCatchesApiV1InvestigationsTestsCatchesGetQueryError =
+ HTTPValidationError;
+
/**
- * Send user input to an investigation awaiting feedback.
+ * @summary Get Recent Catches
+ */
+export const useGetRecentCatchesApiV1InvestigationsTestsCatchesGet = <
+ TData = Awaited<
+ ReturnType
+ >,
+ TError = HTTPValidationError,
+>(
+ params?: GetRecentCatchesApiV1InvestigationsTestsCatchesGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions =
+ getGetRecentCatchesApiV1InvestigationsTestsCatchesGetQueryOptions(
+ params,
+ options,
+ );
-This endpoint sends a signal to the Temporal workflow when it's
-in AWAIT_USER state.
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+};
+
+/**
+ * Mark a generated test as adopted.
+
+Call this when a test has been added to the user's project.
Args:
- investigation_id: UUID of the investigation.
- request: User input payload.
+ request: Adoption request with test ID.
auth: Authentication context from API key/JWT.
- temporal_client: Temporal client for durable execution.
+ db: Application database.
Returns:
- Confirmation message.
- * @summary Send User Input
+ Status message.
+ * @summary Adopt Test
*/
-export const sendUserInputApiV1InvestigationsInvestigationIdInputPost = (
- investigationId: string,
- userInputRequest: UserInputRequest,
+export const adoptTestApiV1InvestigationsTestsAdoptPost = (
+ testAdoptionRequest: TestAdoptionRequest,
) => {
- return customInstance(
- {
- url: `/api/v1/investigations/${investigationId}/input`,
- method: "POST",
- headers: { "Content-Type": "application/json" },
- data: userInputRequest,
- },
- );
+ return customInstance({
+ url: `/api/v1/investigations/tests/adopt`,
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ data: testAdoptionRequest,
+ });
};
-export const getSendUserInputApiV1InvestigationsInvestigationIdInputPostMutationOptions =
+export const getAdoptTestApiV1InvestigationsTestsAdoptPostMutationOptions = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { data: TestAdoptionRequest },
+ TContext
+ >;
+}): UseMutationOptions<
+ Awaited>,
+ TError,
+ { data: TestAdoptionRequest },
+ TContext
+> => {
+ const { mutation: mutationOptions } = options ?? {};
+
+ const mutationFn: MutationFunction<
+ Awaited>,
+ { data: TestAdoptionRequest }
+ > = (props) => {
+ const { data } = props ?? {};
+
+ return adoptTestApiV1InvestigationsTestsAdoptPost(data);
+ };
+
+ return { mutationFn, ...mutationOptions };
+};
+
+export type AdoptTestApiV1InvestigationsTestsAdoptPostMutationResult =
+ NonNullable<
+ Awaited>
+ >;
+export type AdoptTestApiV1InvestigationsTestsAdoptPostMutationBody =
+ TestAdoptionRequest;
+export type AdoptTestApiV1InvestigationsTestsAdoptPostMutationError =
+ HTTPValidationError;
+
+/**
+ * @summary Adopt Test
+ */
+export const useAdoptTestApiV1InvestigationsTestsAdoptPost = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { data: TestAdoptionRequest },
+ TContext
+ >;
+}): UseMutationResult<
+ Awaited>,
+ TError,
+ { data: TestAdoptionRequest },
+ TContext
+> => {
+ const mutationOptions =
+ getAdoptTestApiV1InvestigationsTestsAdoptPostMutationOptions(options);
+
+ return useMutation(mutationOptions);
+};
+/**
+ * Record a test run result.
+
+Call this when a generated test has been executed.
+
+Args:
+ request: Test run result.
+ auth: Authentication context from API key/JWT.
+ db: Application database.
+
+Returns:
+ Status message.
+ * @summary Record Test Run
+ */
+export const recordTestRunApiV1InvestigationsTestsRunPost = (
+ testRunResultRequest: TestRunResultRequest,
+) => {
+ return customInstance({
+ url: `/api/v1/investigations/tests/run`,
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ data: testRunResultRequest,
+ });
+};
+
+export const getRecordTestRunApiV1InvestigationsTestsRunPostMutationOptions = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { data: TestRunResultRequest },
+ TContext
+ >;
+}): UseMutationOptions<
+ Awaited>,
+ TError,
+ { data: TestRunResultRequest },
+ TContext
+> => {
+ const { mutation: mutationOptions } = options ?? {};
+
+ const mutationFn: MutationFunction<
+ Awaited>,
+ { data: TestRunResultRequest }
+ > = (props) => {
+ const { data } = props ?? {};
+
+ return recordTestRunApiV1InvestigationsTestsRunPost(data);
+ };
+
+ return { mutationFn, ...mutationOptions };
+};
+
+export type RecordTestRunApiV1InvestigationsTestsRunPostMutationResult =
+ NonNullable<
+ Awaited>
+ >;
+export type RecordTestRunApiV1InvestigationsTestsRunPostMutationBody =
+ TestRunResultRequest;
+export type RecordTestRunApiV1InvestigationsTestsRunPostMutationError =
+ HTTPValidationError;
+
+/**
+ * @summary Record Test Run
+ */
+export const useRecordTestRunApiV1InvestigationsTestsRunPost = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited>,
+ TError,
+ { data: TestRunResultRequest },
+ TContext
+ >;
+}): UseMutationResult<
+ Awaited>,
+ TError,
+ { data: TestRunResultRequest },
+ TContext
+> => {
+ const mutationOptions =
+ getRecordTestRunApiV1InvestigationsTestsRunPostMutationOptions(options);
+
+ return useMutation(mutationOptions);
+};
+/**
+ * Send a message to an investigation via Temporal signal.
+
+Args:
+ investigation_id: UUID of the investigation.
+ request: The message request.
+ auth: Authentication context from API key/JWT.
+ temporal_client: Temporal client for durable execution.
+
+Returns:
+ SendMessageResponse with status.
+
+Raises:
+ HTTPException: If failed to send message.
+ * @summary Send Message
+ */
+export const sendMessageApiV1InvestigationsInvestigationIdMessagesPost = (
+ investigationId: string,
+ sendMessageRequest: SendMessageRequest,
+) => {
+ return customInstance({
+ url: `/api/v1/investigations/${investigationId}/messages`,
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ data: sendMessageRequest,
+ });
+};
+
+export const getSendMessageApiV1InvestigationsInvestigationIdMessagesPostMutationOptions =
(options?: {
mutation?: UseMutationOptions<
Awaited<
ReturnType<
- typeof sendUserInputApiV1InvestigationsInvestigationIdInputPost
+ typeof sendMessageApiV1InvestigationsInvestigationIdMessagesPost
>
>,
TError,
- { investigationId: string; data: UserInputRequest },
+ { investigationId: string; data: SendMessageRequest },
TContext
>;
}): UseMutationOptions<
Awaited<
ReturnType<
- typeof sendUserInputApiV1InvestigationsInvestigationIdInputPost
+ typeof sendMessageApiV1InvestigationsInvestigationIdMessagesPost
>
>,
TError,
- { investigationId: string; data: UserInputRequest },
+ { investigationId: string; data: SendMessageRequest },
TContext
> => {
const { mutation: mutationOptions } = options ?? {};
@@ -915,14 +1218,14 @@ export const getSendUserInputApiV1InvestigationsInvestigationIdInputPostMutation
const mutationFn: MutationFunction<
Awaited<
ReturnType<
- typeof sendUserInputApiV1InvestigationsInvestigationIdInputPost
+ typeof sendMessageApiV1InvestigationsInvestigationIdMessagesPost
>
>,
- { investigationId: string; data: UserInputRequest }
+ { investigationId: string; data: SendMessageRequest }
> = (props) => {
const { investigationId, data } = props ?? {};
- return sendUserInputApiV1InvestigationsInvestigationIdInputPost(
+ return sendMessageApiV1InvestigationsInvestigationIdMessagesPost(
investigationId,
data,
);
@@ -931,56 +1234,55 @@ export const getSendUserInputApiV1InvestigationsInvestigationIdInputPostMutation
return { mutationFn, ...mutationOptions };
};
-export type SendUserInputApiV1InvestigationsInvestigationIdInputPostMutationResult =
+export type SendMessageApiV1InvestigationsInvestigationIdMessagesPostMutationResult =
NonNullable<
Awaited<
ReturnType<
- typeof sendUserInputApiV1InvestigationsInvestigationIdInputPost
+ typeof sendMessageApiV1InvestigationsInvestigationIdMessagesPost
>
>
>;
-export type SendUserInputApiV1InvestigationsInvestigationIdInputPostMutationBody =
- UserInputRequest;
-export type SendUserInputApiV1InvestigationsInvestigationIdInputPostMutationError =
+export type SendMessageApiV1InvestigationsInvestigationIdMessagesPostMutationBody =
+ SendMessageRequest;
+export type SendMessageApiV1InvestigationsInvestigationIdMessagesPostMutationError =
HTTPValidationError;
/**
- * @summary Send User Input
+ * @summary Send Message
*/
-export const useSendUserInputApiV1InvestigationsInvestigationIdInputPost = <
+export const useSendMessageApiV1InvestigationsInvestigationIdMessagesPost = <
TError = HTTPValidationError,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<
ReturnType<
- typeof sendUserInputApiV1InvestigationsInvestigationIdInputPost
+ typeof sendMessageApiV1InvestigationsInvestigationIdMessagesPost
>
>,
TError,
- { investigationId: string; data: UserInputRequest },
+ { investigationId: string; data: SendMessageRequest },
TContext
>;
}): UseMutationResult<
Awaited<
- ReturnType
+ ReturnType
>,
TError,
- { investigationId: string; data: UserInputRequest },
+ { investigationId: string; data: SendMessageRequest },
TContext
> => {
const mutationOptions =
- getSendUserInputApiV1InvestigationsInvestigationIdInputPostMutationOptions(
+ getSendMessageApiV1InvestigationsInvestigationIdMessagesPostMutationOptions(
options,
);
return useMutation(mutationOptions);
};
/**
- * Stream real-time updates via SSE.
+ * Get the status of an investigation.
-Returns a Server-Sent Events stream that pushes investigation
-updates as they occur by polling the Temporal workflow.
+Queries the Temporal workflow for real-time progress.
Args:
investigation_id: UUID of the investigation.
@@ -988,30 +1290,28 @@ Args:
temporal_client: Temporal client for durable execution.
Returns:
- EventSourceResponse with SSE stream.
- * @summary Stream Updates
+ TemporalStatusResponse with current progress and state.
+ * @summary Get Investigation Status
*/
-export const streamUpdatesApiV1InvestigationsInvestigationIdStreamGet = (
- investigationId: string,
- signal?: AbortSignal,
-) => {
- return customInstance({
- url: `/api/v1/investigations/${investigationId}/stream`,
- method: "GET",
- signal,
- });
-};
+export const getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet =
+ (investigationId: string, signal?: AbortSignal) => {
+ return customInstance({
+ url: `/api/v1/investigations/${investigationId}/status`,
+ method: "GET",
+ signal,
+ });
+ };
-export const getStreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryKey =
+export const getGetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGetQueryKey =
(investigationId: string) => {
- return [`/api/v1/investigations/${investigationId}/stream`] as const;
+ return [`/api/v1/investigations/${investigationId}/status`] as const;
};
-export const getStreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryOptions =
+export const getGetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGetQueryOptions =
<
TData = Awaited<
ReturnType<
- typeof streamUpdatesApiV1InvestigationsInvestigationIdStreamGet
+ typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
>
>,
TError = HTTPValidationError,
@@ -1022,7 +1322,7 @@ export const getStreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryOpt
UseQueryOptions<
Awaited<
ReturnType<
- typeof streamUpdatesApiV1InvestigationsInvestigationIdStreamGet
+ typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
>
>,
TError,
@@ -1035,18 +1335,18 @@ export const getStreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryOpt
const queryKey =
queryOptions?.queryKey ??
- getStreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryKey(
+ getGetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGetQueryKey(
investigationId,
);
const queryFn: QueryFunction<
Awaited<
ReturnType<
- typeof streamUpdatesApiV1InvestigationsInvestigationIdStreamGet
+ typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
>
>
> = ({ signal }) =>
- streamUpdatesApiV1InvestigationsInvestigationIdStreamGet(
+ getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet(
investigationId,
signal,
);
@@ -1059,7 +1359,7 @@ export const getStreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryOpt
} as UseQueryOptions<
Awaited<
ReturnType<
- typeof streamUpdatesApiV1InvestigationsInvestigationIdStreamGet
+ typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
>
>,
TError,
@@ -1067,115 +1367,224 @@ export const getStreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryOpt
> & { queryKey: QueryKey };
};
-export type StreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryResult =
+export type GetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGetQueryResult =
NonNullable<
Awaited<
ReturnType<
- typeof streamUpdatesApiV1InvestigationsInvestigationIdStreamGet
+ typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
>
>
>;
-export type StreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryError =
+export type GetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGetQueryError =
HTTPValidationError;
/**
- * @summary Stream Updates
+ * @summary Get Investigation Status
*/
-export const useStreamUpdatesApiV1InvestigationsInvestigationIdStreamGet = <
- TData = Awaited<
- ReturnType
- >,
- TError = HTTPValidationError,
->(
- investigationId: string,
- options?: {
- query?: Partial<
- UseQueryOptions<
- Awaited<
- ReturnType<
- typeof streamUpdatesApiV1InvestigationsInvestigationIdStreamGet
- >
- >,
- TError,
- TData
+export const useGetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet =
+ <
+ TData = Awaited<
+ ReturnType<
+ typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
>
- >;
- },
-): UseQueryResult & { queryKey: QueryKey } => {
- const queryOptions =
- getStreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryOptions(
- investigationId,
- options,
- );
+ >,
+ TError = HTTPValidationError,
+ >(
+ investigationId: string,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof getInvestigationStatusApiV1InvestigationsInvestigationIdStatusGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+ ): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions =
+ getGetInvestigationStatusApiV1InvestigationsInvestigationIdStatusGetQueryOptions(
+ investigationId,
+ options,
+ );
- const query = useQuery(queryOptions) as UseQueryResult & {
- queryKey: QueryKey;
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
};
- query.queryKey = queryOptions.queryKey;
+/**
+ * Send user input to an investigation awaiting feedback.
- return query;
+This endpoint sends a signal to the Temporal workflow when it's
+in AWAIT_USER state.
+
+Args:
+ investigation_id: UUID of the investigation.
+ request: User input payload.
+ auth: Authentication context from API key/JWT.
+ temporal_client: Temporal client for durable execution.
+
+Returns:
+ Confirmation message.
+ * @summary Send User Input
+ */
+export const sendUserInputApiV1InvestigationsInvestigationIdInputPost = (
+ investigationId: string,
+ userInputRequest: UserInputRequest,
+) => {
+ return customInstance(
+ {
+ url: `/api/v1/investigations/${investigationId}/input`,
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ data: userInputRequest,
+ },
+ );
};
+export const getSendUserInputApiV1InvestigationsInvestigationIdInputPostMutationOptions =
+ (options?: {
+ mutation?: UseMutationOptions<
+ Awaited<
+ ReturnType<
+ typeof sendUserInputApiV1InvestigationsInvestigationIdInputPost
+ >
+ >,
+ TError,
+ { investigationId: string; data: UserInputRequest },
+ TContext
+ >;
+ }): UseMutationOptions<
+ Awaited<
+ ReturnType<
+ typeof sendUserInputApiV1InvestigationsInvestigationIdInputPost
+ >
+ >,
+ TError,
+ { investigationId: string; data: UserInputRequest },
+ TContext
+ > => {
+ const { mutation: mutationOptions } = options ?? {};
+
+ const mutationFn: MutationFunction<
+ Awaited<
+ ReturnType<
+ typeof sendUserInputApiV1InvestigationsInvestigationIdInputPost
+ >
+ >,
+ { investigationId: string; data: UserInputRequest }
+ > = (props) => {
+ const { investigationId, data } = props ?? {};
+
+ return sendUserInputApiV1InvestigationsInvestigationIdInputPost(
+ investigationId,
+ data,
+ );
+ };
+
+ return { mutationFn, ...mutationOptions };
+ };
+
+export type SendUserInputApiV1InvestigationsInvestigationIdInputPostMutationResult =
+ NonNullable<
+ Awaited<
+ ReturnType<
+ typeof sendUserInputApiV1InvestigationsInvestigationIdInputPost
+ >
+ >
+ >;
+export type SendUserInputApiV1InvestigationsInvestigationIdInputPostMutationBody =
+ UserInputRequest;
+export type SendUserInputApiV1InvestigationsInvestigationIdInputPostMutationError =
+ HTTPValidationError;
+
/**
- * Stream SSE events for an investigation.
+ * @summary Send User Input
+ */
+export const useSendUserInputApiV1InvestigationsInvestigationIdInputPost = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited<
+ ReturnType<
+ typeof sendUserInputApiV1InvestigationsInvestigationIdInputPost
+ >
+ >,
+ TError,
+ { investigationId: string; data: UserInputRequest },
+ TContext
+ >;
+}): UseMutationResult<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ { investigationId: string; data: UserInputRequest },
+ TContext
+> => {
+ const mutationOptions =
+ getSendUserInputApiV1InvestigationsInvestigationIdInputPostMutationOptions(
+ options,
+ );
-Events have an integer `seq` field for resumption.
-Use `?seq=N` to resume from sequence N.
+ return useMutation(mutationOptions);
+};
+/**
+ * Stream real-time updates via SSE.
-Returns 410 Gone if the replay window has expired.
+Returns a Server-Sent Events stream that pushes investigation
+updates as they occur by polling the Temporal workflow.
Args:
- request: FastAPI request object.
investigation_id: UUID of the investigation.
auth: Authentication context from API key/JWT.
- db: Application database.
- temporal_client: Temporal client for status polling.
- last_event_id: Optional sequence number to resume from.
+ temporal_client: Temporal client for durable execution.
Returns:
EventSourceResponse with SSE stream.
- * @summary Stream Events
+ * @summary Stream Updates
*/
-export const streamEventsApiV1InvestigationsInvestigationIdEventsGet = (
+export const streamUpdatesApiV1InvestigationsInvestigationIdStreamGet = (
investigationId: string,
- params?: StreamEventsApiV1InvestigationsInvestigationIdEventsGetParams,
signal?: AbortSignal,
) => {
return customInstance({
- url: `/api/v1/investigations/${investigationId}/events`,
+ url: `/api/v1/investigations/${investigationId}/stream`,
method: "GET",
- params,
signal,
});
};
-export const getStreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryKey =
- (
- investigationId: string,
- params?: StreamEventsApiV1InvestigationsInvestigationIdEventsGetParams,
- ) => {
- return [
- `/api/v1/investigations/${investigationId}/events`,
- ...(params ? [params] : []),
- ] as const;
+export const getStreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryKey =
+ (investigationId: string) => {
+ return [`/api/v1/investigations/${investigationId}/stream`] as const;
};
-export const getStreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryOptions =
+export const getStreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryOptions =
<
TData = Awaited<
- ReturnType
+ ReturnType<
+ typeof streamUpdatesApiV1InvestigationsInvestigationIdStreamGet
+ >
>,
TError = HTTPValidationError,
>(
investigationId: string,
- params?: StreamEventsApiV1InvestigationsInvestigationIdEventsGetParams,
options?: {
query?: Partial<
UseQueryOptions<
Awaited<
ReturnType<
- typeof streamEventsApiV1InvestigationsInvestigationIdEventsGet
+ typeof streamUpdatesApiV1InvestigationsInvestigationIdStreamGet
>
>,
TError,
@@ -1188,21 +1597,19 @@ export const getStreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryOpti
const queryKey =
queryOptions?.queryKey ??
- getStreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryKey(
+ getStreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryKey(
investigationId,
- params,
);
const queryFn: QueryFunction<
Awaited<
ReturnType<
- typeof streamEventsApiV1InvestigationsInvestigationIdEventsGet
+ typeof streamUpdatesApiV1InvestigationsInvestigationIdStreamGet
>
>
> = ({ signal }) =>
- streamEventsApiV1InvestigationsInvestigationIdEventsGet(
+ streamUpdatesApiV1InvestigationsInvestigationIdStreamGet(
investigationId,
- params,
signal,
);
@@ -1214,7 +1621,7 @@ export const getStreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryOpti
} as UseQueryOptions<
Awaited<
ReturnType<
- typeof streamEventsApiV1InvestigationsInvestigationIdEventsGet
+ typeof streamUpdatesApiV1InvestigationsInvestigationIdStreamGet
>
>,
TError,
@@ -1222,32 +1629,33 @@ export const getStreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryOpti
> & { queryKey: QueryKey };
};
-export type StreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryResult =
+export type StreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryResult =
NonNullable<
Awaited<
- ReturnType
+ ReturnType<
+ typeof streamUpdatesApiV1InvestigationsInvestigationIdStreamGet
+ >
>
>;
-export type StreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryError =
+export type StreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryError =
HTTPValidationError;
/**
- * @summary Stream Events
+ * @summary Stream Updates
*/
-export const useStreamEventsApiV1InvestigationsInvestigationIdEventsGet = <
+export const useStreamUpdatesApiV1InvestigationsInvestigationIdStreamGet = <
TData = Awaited<
- ReturnType
+ ReturnType
>,
TError = HTTPValidationError,
>(
investigationId: string,
- params?: StreamEventsApiV1InvestigationsInvestigationIdEventsGetParams,
options?: {
query?: Partial<
UseQueryOptions<
Awaited<
ReturnType<
- typeof streamEventsApiV1InvestigationsInvestigationIdEventsGet
+ typeof streamUpdatesApiV1InvestigationsInvestigationIdStreamGet
>
>,
TError,
@@ -1257,9 +1665,8 @@ export const useStreamEventsApiV1InvestigationsInvestigationIdEventsGet = <
},
): UseQueryResult & { queryKey: QueryKey } => {
const queryOptions =
- getStreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryOptions(
+ getStreamUpdatesApiV1InvestigationsInvestigationIdStreamGetQueryOptions(
investigationId,
- params,
options,
);
@@ -1271,3 +1678,715 @@ export const useStreamEventsApiV1InvestigationsInvestigationIdEventsGet = <
return query;
};
+
+/**
+ * Stream SSE events for an investigation.
+
+Events have an integer `seq` field for resumption.
+Use `?seq=N` to resume from sequence N.
+
+Returns 410 Gone if the replay window has expired.
+
+Args:
+ request: FastAPI request object.
+ investigation_id: UUID of the investigation.
+ auth: Authentication context from API key/JWT.
+ db: Application database.
+ temporal_client: Temporal client for status polling.
+ last_event_id: Optional sequence number to resume from.
+
+Returns:
+ EventSourceResponse with SSE stream.
+ * @summary Stream Events
+ */
+export const streamEventsApiV1InvestigationsInvestigationIdEventsGet = (
+ investigationId: string,
+ params?: StreamEventsApiV1InvestigationsInvestigationIdEventsGetParams,
+ signal?: AbortSignal,
+) => {
+ return customInstance({
+ url: `/api/v1/investigations/${investigationId}/events`,
+ method: "GET",
+ params,
+ signal,
+ });
+};
+
+export const getStreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryKey =
+ (
+ investigationId: string,
+ params?: StreamEventsApiV1InvestigationsInvestigationIdEventsGetParams,
+ ) => {
+ return [
+ `/api/v1/investigations/${investigationId}/events`,
+ ...(params ? [params] : []),
+ ] as const;
+ };
+
+export const getStreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryOptions =
+ <
+ TData = Awaited<
+ ReturnType
+ >,
+ TError = HTTPValidationError,
+ >(
+ investigationId: string,
+ params?: StreamEventsApiV1InvestigationsInvestigationIdEventsGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof streamEventsApiV1InvestigationsInvestigationIdEventsGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+ ) => {
+ const { query: queryOptions } = options ?? {};
+
+ const queryKey =
+ queryOptions?.queryKey ??
+ getStreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryKey(
+ investigationId,
+ params,
+ );
+
+ const queryFn: QueryFunction<
+ Awaited<
+ ReturnType<
+ typeof streamEventsApiV1InvestigationsInvestigationIdEventsGet
+ >
+ >
+ > = ({ signal }) =>
+ streamEventsApiV1InvestigationsInvestigationIdEventsGet(
+ investigationId,
+ params,
+ signal,
+ );
+
+ return {
+ queryKey,
+ queryFn,
+ enabled: !!investigationId,
+ ...queryOptions,
+ } as UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof streamEventsApiV1InvestigationsInvestigationIdEventsGet
+ >
+ >,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
+ };
+
+export type StreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryResult =
+ NonNullable<
+ Awaited<
+ ReturnType
+ >
+ >;
+export type StreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryError =
+ HTTPValidationError;
+
+/**
+ * @summary Stream Events
+ */
+export const useStreamEventsApiV1InvestigationsInvestigationIdEventsGet = <
+ TData = Awaited<
+ ReturnType
+ >,
+ TError = HTTPValidationError,
+>(
+ investigationId: string,
+ params?: StreamEventsApiV1InvestigationsInvestigationIdEventsGetParams,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof streamEventsApiV1InvestigationsInvestigationIdEventsGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions =
+ getStreamEventsApiV1InvestigationsInvestigationIdEventsGetQueryOptions(
+ investigationId,
+ params,
+ options,
+ );
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+};
+
+/**
+ * List available snapshots for an investigation.
+
+Args:
+ investigation_id: UUID of the investigation.
+ auth: Authentication context from API key/JWT.
+ db: Application database.
+
+Returns:
+ SnapshotListResponse with list of available snapshots.
+
+Raises:
+ HTTPException: If investigation not found or access denied.
+ * @summary List Snapshots
+ */
+export const listSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGet = (
+ investigationId: string,
+ signal?: AbortSignal,
+) => {
+ return customInstance({
+ url: `/api/v1/investigations/${investigationId}/snapshots`,
+ method: "GET",
+ signal,
+ });
+};
+
+export const getListSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGetQueryKey =
+ (investigationId: string) => {
+ return [`/api/v1/investigations/${investigationId}/snapshots`] as const;
+ };
+
+export const getListSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGetQueryOptions =
+ <
+ TData = Awaited<
+ ReturnType<
+ typeof listSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGet
+ >
+ >,
+ TError = HTTPValidationError,
+ >(
+ investigationId: string,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof listSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+ ) => {
+ const { query: queryOptions } = options ?? {};
+
+ const queryKey =
+ queryOptions?.queryKey ??
+ getListSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGetQueryKey(
+ investigationId,
+ );
+
+ const queryFn: QueryFunction<
+ Awaited<
+ ReturnType<
+ typeof listSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGet
+ >
+ >
+ > = ({ signal }) =>
+ listSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGet(
+ investigationId,
+ signal,
+ );
+
+ return {
+ queryKey,
+ queryFn,
+ enabled: !!investigationId,
+ ...queryOptions,
+ } as UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof listSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGet
+ >
+ >,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
+ };
+
+export type ListSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGetQueryResult =
+ NonNullable<
+ Awaited<
+ ReturnType<
+ typeof listSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGet
+ >
+ >
+ >;
+export type ListSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGetQueryError =
+ HTTPValidationError;
+
+/**
+ * @summary List Snapshots
+ */
+export const useListSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGet = <
+ TData = Awaited<
+ ReturnType<
+ typeof listSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGet
+ >
+ >,
+ TError = HTTPValidationError,
+>(
+ investigationId: string,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof listSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions =
+ getListSnapshotsApiV1InvestigationsInvestigationIdSnapshotsGetQueryOptions(
+ investigationId,
+ options,
+ );
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+};
+
+/**
+ * Download a snapshot for local hydration.
+
+Supports streaming response for large snapshots and optional gzip compression.
+
+Args:
+ investigation_id: UUID of the investigation.
+ checkpoint: The checkpoint to download (start, hypothesis_generated, etc).
+ auth: Authentication context from API key/JWT.
+ db: Application database.
+ snapshot_store: Snapshot storage backend.
+ accept_encoding: Accept-Encoding header for compression.
+
+Returns:
+ StreamingResponse with snapshot data.
+
+Raises:
+ HTTPException: If investigation not found, access denied, or snapshot missing.
+ * @summary Download Snapshot
+ */
+export const downloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGet =
+ (
+ investigationId: string,
+ checkpoint: SnapshotCheckpointParam,
+ signal?: AbortSignal,
+ ) => {
+ return customInstance({
+ url: `/api/v1/investigations/${investigationId}/snapshots/${checkpoint}`,
+ method: "GET",
+ signal,
+ });
+ };
+
+export const getDownloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGetQueryKey =
+ (investigationId: string, checkpoint: SnapshotCheckpointParam) => {
+ return [
+ `/api/v1/investigations/${investigationId}/snapshots/${checkpoint}`,
+ ] as const;
+ };
+
+export const getDownloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGetQueryOptions =
+ <
+ TData = Awaited<
+ ReturnType<
+ typeof downloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGet
+ >
+ >,
+ TError = HTTPValidationError,
+ >(
+ investigationId: string,
+ checkpoint: SnapshotCheckpointParam,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof downloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+ ) => {
+ const { query: queryOptions } = options ?? {};
+
+ const queryKey =
+ queryOptions?.queryKey ??
+ getDownloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGetQueryKey(
+ investigationId,
+ checkpoint,
+ );
+
+ const queryFn: QueryFunction<
+ Awaited<
+ ReturnType<
+ typeof downloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGet
+ >
+ >
+ > = ({ signal }) =>
+ downloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGet(
+ investigationId,
+ checkpoint,
+ signal,
+ );
+
+ return {
+ queryKey,
+ queryFn,
+ enabled: !!(investigationId && checkpoint),
+ ...queryOptions,
+ } as UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof downloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGet
+ >
+ >,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
+ };
+
+export type DownloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGetQueryResult =
+ NonNullable<
+ Awaited<
+ ReturnType<
+ typeof downloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGet
+ >
+ >
+ >;
+export type DownloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGetQueryError =
+ HTTPValidationError;
+
+/**
+ * @summary Download Snapshot
+ */
+export const useDownloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGet =
+ <
+ TData = Awaited<
+ ReturnType<
+ typeof downloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGet
+ >
+ >,
+ TError = HTTPValidationError,
+ >(
+ investigationId: string,
+ checkpoint: SnapshotCheckpointParam,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof downloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+ ): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions =
+ getDownloadSnapshotApiV1InvestigationsInvestigationIdSnapshotsCheckpointGetQueryOptions(
+ investigationId,
+ checkpoint,
+ options,
+ );
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+ };
+
+/**
+ * Download investigation as a snapshot tar.gz archive.
+
+Generates a compressed archive containing all evidence, lineage,
+and metadata needed to replay the investigation.
+
+Args:
+ investigation_id: UUID of the investigation.
+ auth: Authentication context from API key/JWT.
+ db: Application database.
+ temporal_client: Temporal client for durable execution.
+
+Returns:
+ StreamingResponse with tar.gz archive.
+
+Raises:
+ HTTPException: If investigation not found or not complete.
+ * @summary Export Snapshot Archive
+ */
+export const exportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGet =
+ (investigationId: string, signal?: AbortSignal) => {
+ return customInstance({
+ url: `/api/v1/investigations/${investigationId}/snapshot`,
+ method: "GET",
+ signal,
+ });
+ };
+
+export const getExportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGetQueryKey =
+ (investigationId: string) => {
+ return [`/api/v1/investigations/${investigationId}/snapshot`] as const;
+ };
+
+export const getExportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGetQueryOptions =
+ <
+ TData = Awaited<
+ ReturnType<
+ typeof exportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGet
+ >
+ >,
+ TError = HTTPValidationError,
+ >(
+ investigationId: string,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof exportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+ ) => {
+ const { query: queryOptions } = options ?? {};
+
+ const queryKey =
+ queryOptions?.queryKey ??
+ getExportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGetQueryKey(
+ investigationId,
+ );
+
+ const queryFn: QueryFunction<
+ Awaited<
+ ReturnType<
+ typeof exportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGet
+ >
+ >
+ > = ({ signal }) =>
+ exportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGet(
+ investigationId,
+ signal,
+ );
+
+ return {
+ queryKey,
+ queryFn,
+ enabled: !!investigationId,
+ ...queryOptions,
+ } as UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof exportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGet
+ >
+ >,
+ TError,
+ TData
+ > & { queryKey: QueryKey };
+ };
+
+export type ExportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGetQueryResult =
+ NonNullable<
+ Awaited<
+ ReturnType<
+ typeof exportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGet
+ >
+ >
+ >;
+export type ExportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGetQueryError =
+ HTTPValidationError;
+
+/**
+ * @summary Export Snapshot Archive
+ */
+export const useExportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGet =
+ <
+ TData = Awaited<
+ ReturnType<
+ typeof exportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGet
+ >
+ >,
+ TError = HTTPValidationError,
+ >(
+ investigationId: string,
+ options?: {
+ query?: Partial<
+ UseQueryOptions<
+ Awaited<
+ ReturnType<
+ typeof exportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGet
+ >
+ >,
+ TError,
+ TData
+ >
+ >;
+ },
+ ): UseQueryResult & { queryKey: QueryKey } => {
+ const queryOptions =
+ getExportSnapshotArchiveApiV1InvestigationsInvestigationIdSnapshotGetQueryOptions(
+ investigationId,
+ options,
+ );
+
+ const query = useQuery(queryOptions) as UseQueryResult & {
+ queryKey: QueryKey;
+ };
+
+ query.queryKey = queryOptions.queryKey;
+
+ return query;
+ };
+
+/**
+ * Import a snapshot archive as a replayed investigation.
+
+Validates the archive and creates a new investigation marked as a replay.
+
+Args:
+ auth: Authentication context from API key/JWT.
+ db: Application database.
+ file: The uploaded tar.gz file.
+
+Returns:
+ ImportSnapshotResponse with new investigation ID.
+
+Raises:
+ HTTPException: If file is invalid or too large.
+ * @summary Import Snapshot Archive
+ */
+export const importSnapshotArchiveApiV1InvestigationsImportPost = (
+ bodyImportSnapshotArchiveApiV1InvestigationsImportPost: BodyImportSnapshotArchiveApiV1InvestigationsImportPost,
+) => {
+ const formData = new FormData();
+ formData.append(
+ "file",
+ bodyImportSnapshotArchiveApiV1InvestigationsImportPost.file,
+ );
+
+ return customInstance({
+ url: `/api/v1/investigations/import`,
+ method: "POST",
+ headers: { "Content-Type": "multipart/form-data" },
+ data: formData,
+ });
+};
+
+export const getImportSnapshotArchiveApiV1InvestigationsImportPostMutationOptions =
+ (options?: {
+ mutation?: UseMutationOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ { data: BodyImportSnapshotArchiveApiV1InvestigationsImportPost },
+ TContext
+ >;
+ }): UseMutationOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ { data: BodyImportSnapshotArchiveApiV1InvestigationsImportPost },
+ TContext
+ > => {
+ const { mutation: mutationOptions } = options ?? {};
+
+ const mutationFn: MutationFunction<
+ Awaited<
+ ReturnType
+ >,
+ { data: BodyImportSnapshotArchiveApiV1InvestigationsImportPost }
+ > = (props) => {
+ const { data } = props ?? {};
+
+ return importSnapshotArchiveApiV1InvestigationsImportPost(data);
+ };
+
+ return { mutationFn, ...mutationOptions };
+ };
+
+export type ImportSnapshotArchiveApiV1InvestigationsImportPostMutationResult =
+ NonNullable<
+ Awaited<
+ ReturnType
+ >
+ >;
+export type ImportSnapshotArchiveApiV1InvestigationsImportPostMutationBody =
+ BodyImportSnapshotArchiveApiV1InvestigationsImportPost;
+export type ImportSnapshotArchiveApiV1InvestigationsImportPostMutationError =
+ HTTPValidationError;
+
+/**
+ * @summary Import Snapshot Archive
+ */
+export const useImportSnapshotArchiveApiV1InvestigationsImportPost = <
+ TError = HTTPValidationError,
+ TContext = unknown,
+>(options?: {
+ mutation?: UseMutationOptions<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ { data: BodyImportSnapshotArchiveApiV1InvestigationsImportPost },
+ TContext
+ >;
+}): UseMutationResult<
+ Awaited<
+ ReturnType
+ >,
+ TError,
+ { data: BodyImportSnapshotArchiveApiV1InvestigationsImportPost },
+ TContext
+> => {
+ const mutationOptions =
+ getImportSnapshotArchiveApiV1InvestigationsImportPostMutationOptions(
+ options,
+ );
+
+ return useMutation(mutationOptions);
+};
diff --git a/frontend/app/src/lib/api/generated/lineage/lineage.ts b/frontend/app/src/lib/api/generated/lineage/lineage.ts
index 94d57a042..74372c076 100644
--- a/frontend/app/src/lib/api/generated/lineage/lineage.ts
+++ b/frontend/app/src/lib/api/generated/lineage/lineage.ts
@@ -14,6 +14,7 @@ import type {
} from "@tanstack/react-query";
import type {
ColumnLineageListResponse,
+ DataingEntrypointsApiRoutesLineageLineageGraphResponse,
DatasetResponse,
DownstreamResponse,
GetColumnLineageApiV1LineageColumnLineageGetParams,
@@ -26,7 +27,6 @@ import type {
HTTPValidationError,
JobResponse,
JobRunsResponse,
- LineageGraphResponse,
LineageProvidersResponse,
ListDatasetsApiV1LineageDatasetsGetParams,
SearchDatasetsApiV1LineageSearchGetParams,
@@ -312,12 +312,9 @@ export const getLineageGraphApiV1LineageGraphGet = (
params: GetLineageGraphApiV1LineageGraphGetParams,
signal?: AbortSignal,
) => {
- return customInstance({
- url: `/api/v1/lineage/graph`,
- method: "GET",
- params,
- signal,
- });
+ return customInstance(
+ { url: `/api/v1/lineage/graph`, method: "GET", params, signal },
+ );
};
export const getGetLineageGraphApiV1LineageGraphGetQueryKey = (
diff --git a/frontend/app/src/lib/api/investigation-feedback.ts b/frontend/app/src/lib/api/investigation-feedback.ts
index 0cc4e7188..eb09d905d 100644
--- a/frontend/app/src/lib/api/investigation-feedback.ts
+++ b/frontend/app/src/lib/api/investigation-feedback.ts
@@ -8,12 +8,13 @@ export type TargetType =
| "evidence"
| "synthesis"
| "investigation"
- | "recommendation";
+ | "recommendation"
+ | "assistant_message";
export interface FeedbackCreate {
target_type: TargetType;
target_id: string;
- investigation_id: string;
+ investigation_id?: string; // Optional for assistant messages
rating: 1 | -1;
reason?: string;
comment?: string;
@@ -74,3 +75,13 @@ export function useSubmitInvestigationFeedback(investigationId: string) {
},
});
}
+
+/**
+ * Generic feedback submission hook (not tied to a specific investigation).
+ * Used for assistant message feedback.
+ */
+export function useSubmitFeedback() {
+ return useMutation({
+ mutationFn: submitInvestigationFeedback,
+ });
+}
diff --git a/frontend/app/src/lib/api/model/adoptTestApiV1InvestigationsTestsAdoptPost200.ts b/frontend/app/src/lib/api/model/adoptTestApiV1InvestigationsTestsAdoptPost200.ts
new file mode 100644
index 000000000..3d7abd8ea
--- /dev/null
+++ b/frontend/app/src/lib/api/model/adoptTestApiV1InvestigationsTestsAdoptPost200.ts
@@ -0,0 +1,11 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type AdoptTestApiV1InvestigationsTestsAdoptPost200 = {
+ [key: string]: string;
+};
diff --git a/frontend/app/src/lib/api/model/bodyImportSnapshotArchiveApiV1InvestigationsImportPost.ts b/frontend/app/src/lib/api/model/bodyImportSnapshotArchiveApiV1InvestigationsImportPost.ts
new file mode 100644
index 000000000..49742238c
--- /dev/null
+++ b/frontend/app/src/lib/api/model/bodyImportSnapshotArchiveApiV1InvestigationsImportPost.ts
@@ -0,0 +1,11 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export interface BodyImportSnapshotArchiveApiV1InvestigationsImportPost {
+ file: Blob;
+}
diff --git a/frontend/app/src/lib/api/model/codeChangeListResponse.ts b/frontend/app/src/lib/api/model/codeChangeListResponse.ts
new file mode 100644
index 000000000..8283f0565
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codeChangeListResponse.ts
@@ -0,0 +1,16 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { CodeChangeResponse } from "./codeChangeResponse";
+
+/**
+ * Response for a list of code changes.
+ */
+export interface CodeChangeListResponse {
+ items: CodeChangeResponse[];
+ total: number;
+}
diff --git a/frontend/app/src/lib/api/model/codeChangeResponse.ts b/frontend/app/src/lib/api/model/codeChangeResponse.ts
new file mode 100644
index 000000000..a8d37d641
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codeChangeResponse.ts
@@ -0,0 +1,29 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { CodeChangeResponseAffectedAssetsItem } from "./codeChangeResponseAffectedAssetsItem";
+import type { CodeChangeResponseAuthorEmail } from "./codeChangeResponseAuthorEmail";
+import type { CodeChangeResponseAuthorName } from "./codeChangeResponseAuthorName";
+import type { CodeChangeResponseCommittedAt } from "./codeChangeResponseCommittedAt";
+import type { CodeChangeResponseFilesChanged } from "./codeChangeResponseFilesChanged";
+import type { CodeChangeResponseMessage } from "./codeChangeResponseMessage";
+
+/**
+ * Response for a code change (commit).
+ */
+export interface CodeChangeResponse {
+ affected_assets: CodeChangeResponseAffectedAssetsItem[];
+ author_email: CodeChangeResponseAuthorEmail;
+ author_name: CodeChangeResponseAuthorName;
+ commit_hash: string;
+ committed_at: CodeChangeResponseCommittedAt;
+ created_at: string;
+ files_changed: CodeChangeResponseFilesChanged;
+ id: string;
+ message: CodeChangeResponseMessage;
+ repo_id: string;
+}
diff --git a/frontend/app/src/lib/api/model/codeChangeResponseAffectedAssetsItem.ts b/frontend/app/src/lib/api/model/codeChangeResponseAffectedAssetsItem.ts
new file mode 100644
index 000000000..51cb4547c
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codeChangeResponseAffectedAssetsItem.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type CodeChangeResponseAffectedAssetsItem = { [key: string]: unknown };
diff --git a/frontend/app/src/lib/api/model/codeChangeResponseAuthorEmail.ts b/frontend/app/src/lib/api/model/codeChangeResponseAuthorEmail.ts
new file mode 100644
index 000000000..5db60e76f
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codeChangeResponseAuthorEmail.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type CodeChangeResponseAuthorEmail = string | null;
diff --git a/frontend/app/src/lib/api/model/codeChangeResponseAuthorName.ts b/frontend/app/src/lib/api/model/codeChangeResponseAuthorName.ts
new file mode 100644
index 000000000..670abd57a
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codeChangeResponseAuthorName.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type CodeChangeResponseAuthorName = string | null;
diff --git a/frontend/app/src/lib/api/model/codeChangeResponseCommittedAt.ts b/frontend/app/src/lib/api/model/codeChangeResponseCommittedAt.ts
new file mode 100644
index 000000000..8be56799e
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codeChangeResponseCommittedAt.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type CodeChangeResponseCommittedAt = string | null;
diff --git a/frontend/app/src/lib/api/model/codeChangeResponseFilesChanged.ts b/frontend/app/src/lib/api/model/codeChangeResponseFilesChanged.ts
new file mode 100644
index 000000000..05fbbd727
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codeChangeResponseFilesChanged.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type CodeChangeResponseFilesChanged = string[] | null;
diff --git a/frontend/app/src/lib/api/model/codeChangeResponseMessage.ts b/frontend/app/src/lib/api/model/codeChangeResponseMessage.ts
new file mode 100644
index 000000000..353702498
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codeChangeResponseMessage.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type CodeChangeResponseMessage = string | null;
diff --git a/frontend/app/src/lib/api/model/codifyFormat.ts b/frontend/app/src/lib/api/model/codifyFormat.ts
new file mode 100644
index 000000000..24f53128c
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codifyFormat.ts
@@ -0,0 +1,20 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Output format for test generation.
+ */
+export type CodifyFormat = (typeof CodifyFormat)[keyof typeof CodifyFormat];
+
+// eslint-disable-next-line @typescript-eslint/no-redeclare
+export const CodifyFormat = {
+ gx: "gx",
+ dbt: "dbt",
+ soda: "soda",
+ sql: "sql",
+} as const;
diff --git a/frontend/app/src/lib/api/model/codifyRequest.ts b/frontend/app/src/lib/api/model/codifyRequest.ts
new file mode 100644
index 000000000..49831b4da
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codifyRequest.ts
@@ -0,0 +1,15 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { CodifyFormat } from "./codifyFormat";
+
+/**
+ * Request body for codifying an investigation.
+ */
+export interface CodifyRequest {
+ format?: CodifyFormat;
+}
diff --git a/frontend/app/src/lib/api/model/codifyResponse.ts b/frontend/app/src/lib/api/model/codifyResponse.ts
new file mode 100644
index 000000000..6353170a0
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codifyResponse.ts
@@ -0,0 +1,19 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { CodifyTestResponse } from "./codifyTestResponse";
+
+/**
+ * Response for codifying an investigation.
+ */
+export interface CodifyResponse {
+ confidence: number;
+ content: string;
+ format: string;
+ investigation_id: string;
+ tests: CodifyTestResponse[];
+}
diff --git a/frontend/app/src/lib/api/model/codifyTestResponse.ts b/frontend/app/src/lib/api/model/codifyTestResponse.ts
new file mode 100644
index 000000000..582a5dfe7
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codifyTestResponse.ts
@@ -0,0 +1,18 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { CodifyTestResponseColumn } from "./codifyTestResponseColumn";
+
+/**
+ * A single test extracted from the investigation.
+ */
+export interface CodifyTestResponse {
+ column?: CodifyTestResponseColumn;
+ description: string;
+ table: string;
+ test_type: string;
+}
diff --git a/frontend/app/src/lib/api/model/codifyTestResponseColumn.ts b/frontend/app/src/lib/api/model/codifyTestResponseColumn.ts
new file mode 100644
index 000000000..fd99c3faf
--- /dev/null
+++ b/frontend/app/src/lib/api/model/codifyTestResponseColumn.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type CodifyTestResponseColumn = string | null;
diff --git a/frontend/app/src/lib/api/model/connectGitRepoRequest.ts b/frontend/app/src/lib/api/model/connectGitRepoRequest.ts
new file mode 100644
index 000000000..92bc0921e
--- /dev/null
+++ b/frontend/app/src/lib/api/model/connectGitRepoRequest.ts
@@ -0,0 +1,30 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { ConnectGitRepoRequestAccessToken } from "./connectGitRepoRequestAccessToken";
+import type { ConnectGitRepoRequestTrackedPaths } from "./connectGitRepoRequestTrackedPaths";
+
+/**
+ * Request to connect a git repository.
+ */
+export interface ConnectGitRepoRequest {
+ access_token?: ConnectGitRepoRequestAccessToken;
+ default_branch?: string;
+ /**
+ * @minLength 1
+ * @maxLength 200
+ */
+ name: string;
+ /** @pattern ^(github|gitlab|bitbucket)$ */
+ provider: string;
+ tracked_paths?: ConnectGitRepoRequestTrackedPaths;
+ /**
+ * @minLength 1
+ * @maxLength 500
+ */
+ url: string;
+}
diff --git a/frontend/app/src/lib/api/model/connectGitRepoRequestAccessToken.ts b/frontend/app/src/lib/api/model/connectGitRepoRequestAccessToken.ts
new file mode 100644
index 000000000..1c5c6f139
--- /dev/null
+++ b/frontend/app/src/lib/api/model/connectGitRepoRequestAccessToken.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type ConnectGitRepoRequestAccessToken = string | null;
diff --git a/frontend/app/src/lib/api/model/connectGitRepoRequestTrackedPaths.ts b/frontend/app/src/lib/api/model/connectGitRepoRequestTrackedPaths.ts
new file mode 100644
index 000000000..31cf7001b
--- /dev/null
+++ b/frontend/app/src/lib/api/model/connectGitRepoRequestTrackedPaths.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type ConnectGitRepoRequestTrackedPaths = string[] | null;
diff --git a/frontend/app/src/lib/api/model/contextBundleResponseLineage.ts b/frontend/app/src/lib/api/model/contextBundleResponseLineage.ts
index e899efeea..8b03bb62f 100644
--- a/frontend/app/src/lib/api/model/contextBundleResponseLineage.ts
+++ b/frontend/app/src/lib/api/model/contextBundleResponseLineage.ts
@@ -5,7 +5,6 @@
* Autonomous Data Quality Investigation
* OpenAPI spec version: 2.0.0
*/
-import type { DataingEntrypointsApiRoutesBundlesLineageGraphResponse } from "./dataingEntrypointsApiRoutesBundlesLineageGraphResponse";
+import type { LineageGraphResponse } from "./lineageGraphResponse";
-export type ContextBundleResponseLineage =
- DataingEntrypointsApiRoutesBundlesLineageGraphResponse | null;
+export type ContextBundleResponseLineage = LineageGraphResponse | null;
diff --git a/frontend/app/src/lib/api/model/createSessionRequest.ts b/frontend/app/src/lib/api/model/createSessionRequest.ts
new file mode 100644
index 000000000..cc47fd42b
--- /dev/null
+++ b/frontend/app/src/lib/api/model/createSessionRequest.ts
@@ -0,0 +1,21 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { CreateSessionRequestMetadata } from "./createSessionRequestMetadata";
+import type { CreateSessionRequestParentInvestigationId } from "./createSessionRequestParentInvestigationId";
+import type { CreateSessionRequestTitle } from "./createSessionRequestTitle";
+
+/**
+ * Request to create a new assistant session.
+ */
+export interface CreateSessionRequest {
+ metadata?: CreateSessionRequestMetadata;
+ /** Optional parent investigation to link to */
+ parent_investigation_id?: CreateSessionRequestParentInvestigationId;
+ /** Optional session title */
+ title?: CreateSessionRequestTitle;
+}
diff --git a/frontend/app/src/lib/api/model/createSessionRequestMetadata.ts b/frontend/app/src/lib/api/model/createSessionRequestMetadata.ts
new file mode 100644
index 000000000..2e2fb297b
--- /dev/null
+++ b/frontend/app/src/lib/api/model/createSessionRequestMetadata.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type CreateSessionRequestMetadata = { [key: string]: unknown };
diff --git a/frontend/app/src/lib/api/model/createSessionRequestParentInvestigationId.ts b/frontend/app/src/lib/api/model/createSessionRequestParentInvestigationId.ts
new file mode 100644
index 000000000..39a86861a
--- /dev/null
+++ b/frontend/app/src/lib/api/model/createSessionRequestParentInvestigationId.ts
@@ -0,0 +1,12 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Optional parent investigation to link to
+ */
+export type CreateSessionRequestParentInvestigationId = string | null;
diff --git a/frontend/app/src/lib/api/model/createSessionRequestTitle.ts b/frontend/app/src/lib/api/model/createSessionRequestTitle.ts
new file mode 100644
index 000000000..9cdd4f345
--- /dev/null
+++ b/frontend/app/src/lib/api/model/createSessionRequestTitle.ts
@@ -0,0 +1,12 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Optional session title
+ */
+export type CreateSessionRequestTitle = string | null;
diff --git a/frontend/app/src/lib/api/model/createSessionResponse.ts b/frontend/app/src/lib/api/model/createSessionResponse.ts
new file mode 100644
index 000000000..2f619ce8f
--- /dev/null
+++ b/frontend/app/src/lib/api/model/createSessionResponse.ts
@@ -0,0 +1,16 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Response from creating a session.
+ */
+export interface CreateSessionResponse {
+ created_at: string;
+ investigation_id: string;
+ session_id: string;
+}
diff --git a/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesAssistantSendMessageRequest.ts b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesAssistantSendMessageRequest.ts
new file mode 100644
index 000000000..f75a40510
--- /dev/null
+++ b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesAssistantSendMessageRequest.ts
@@ -0,0 +1,20 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { DataingEntrypointsApiRoutesAssistantSendMessageRequestPageContext } from "./dataingEntrypointsApiRoutesAssistantSendMessageRequestPageContext";
+
+/**
+ * Request to send a message.
+ */
+export interface DataingEntrypointsApiRoutesAssistantSendMessageRequest {
+ /**
+ * @minLength 1
+ * @maxLength 32000
+ */
+ content: string;
+ page_context?: DataingEntrypointsApiRoutesAssistantSendMessageRequestPageContext;
+}
diff --git a/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesAssistantSendMessageRequestPageContext.ts b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesAssistantSendMessageRequestPageContext.ts
new file mode 100644
index 000000000..a0e7ee8cb
--- /dev/null
+++ b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesAssistantSendMessageRequestPageContext.ts
@@ -0,0 +1,11 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { PageContext } from "./pageContext";
+
+export type DataingEntrypointsApiRoutesAssistantSendMessageRequestPageContext =
+ PageContext | null;
diff --git a/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesAssistantSendMessageResponse.ts b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesAssistantSendMessageResponse.ts
new file mode 100644
index 000000000..0f07a2d01
--- /dev/null
+++ b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesAssistantSendMessageResponse.ts
@@ -0,0 +1,15 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Response from sending a message.
+ */
+export interface DataingEntrypointsApiRoutesAssistantSendMessageResponse {
+ message_id: string;
+ status?: string;
+}
diff --git a/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesInvestigationsSendMessageRequest.ts b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesInvestigationsSendMessageRequest.ts
new file mode 100644
index 000000000..4a59f1c75
--- /dev/null
+++ b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesInvestigationsSendMessageRequest.ts
@@ -0,0 +1,14 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Request body for sending a message.
+ */
+export interface DataingEntrypointsApiRoutesInvestigationsSendMessageRequest {
+ message: string;
+}
diff --git a/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesInvestigationsSendMessageResponse.ts b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesInvestigationsSendMessageResponse.ts
new file mode 100644
index 000000000..73e1d0b12
--- /dev/null
+++ b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesInvestigationsSendMessageResponse.ts
@@ -0,0 +1,15 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Response for sending a message.
+ */
+export interface DataingEntrypointsApiRoutesInvestigationsSendMessageResponse {
+ investigation_id: string;
+ status: string;
+}
diff --git a/frontend/app/src/lib/api/model/deleteGitRepoApiV1GitReposRepoIdDelete200.ts b/frontend/app/src/lib/api/model/deleteGitRepoApiV1GitReposRepoIdDelete200.ts
new file mode 100644
index 000000000..982a56477
--- /dev/null
+++ b/frontend/app/src/lib/api/model/deleteGitRepoApiV1GitReposRepoIdDelete200.ts
@@ -0,0 +1,11 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type DeleteGitRepoApiV1GitReposRepoIdDelete200 = {
+ [key: string]: boolean;
+};
diff --git a/frontend/app/src/lib/api/model/deleteSessionApiV1AssistantSessionsSessionIdDelete200.ts b/frontend/app/src/lib/api/model/deleteSessionApiV1AssistantSessionsSessionIdDelete200.ts
new file mode 100644
index 000000000..5243fa91f
--- /dev/null
+++ b/frontend/app/src/lib/api/model/deleteSessionApiV1AssistantSessionsSessionIdDelete200.ts
@@ -0,0 +1,11 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type DeleteSessionApiV1AssistantSessionsSessionIdDelete200 = {
+ [key: string]: string;
+};
diff --git a/frontend/app/src/lib/api/model/exportFormat.ts b/frontend/app/src/lib/api/model/exportFormat.ts
new file mode 100644
index 000000000..52fd4572e
--- /dev/null
+++ b/frontend/app/src/lib/api/model/exportFormat.ts
@@ -0,0 +1,18 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Export format options.
+ */
+export type ExportFormat = (typeof ExportFormat)[keyof typeof ExportFormat];
+
+// eslint-disable-next-line @typescript-eslint/no-redeclare
+export const ExportFormat = {
+ json: "json",
+ markdown: "markdown",
+} as const;
diff --git a/frontend/app/src/lib/api/model/exportSessionApiV1AssistantSessionsSessionIdExportPost200.ts b/frontend/app/src/lib/api/model/exportSessionApiV1AssistantSessionsSessionIdExportPost200.ts
new file mode 100644
index 000000000..5e9d5b70a
--- /dev/null
+++ b/frontend/app/src/lib/api/model/exportSessionApiV1AssistantSessionsSessionIdExportPost200.ts
@@ -0,0 +1,11 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type ExportSessionApiV1AssistantSessionsSessionIdExportPost200 = {
+ [key: string]: unknown;
+};
diff --git a/frontend/app/src/lib/api/model/exportSessionApiV1AssistantSessionsSessionIdExportPostParams.ts b/frontend/app/src/lib/api/model/exportSessionApiV1AssistantSessionsSessionIdExportPostParams.ts
new file mode 100644
index 000000000..f396aa1f7
--- /dev/null
+++ b/frontend/app/src/lib/api/model/exportSessionApiV1AssistantSessionsSessionIdExportPostParams.ts
@@ -0,0 +1,12 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { ExportFormat } from "./exportFormat";
+
+export type ExportSessionApiV1AssistantSessionsSessionIdExportPostParams = {
+ format?: ExportFormat;
+};
diff --git a/frontend/app/src/lib/api/model/feedbackCreate.ts b/frontend/app/src/lib/api/model/feedbackCreate.ts
index de7f4914b..48ddd86de 100644
--- a/frontend/app/src/lib/api/model/feedbackCreate.ts
+++ b/frontend/app/src/lib/api/model/feedbackCreate.ts
@@ -6,6 +6,7 @@
* OpenAPI spec version: 2.0.0
*/
import type { FeedbackCreateComment } from "./feedbackCreateComment";
+import type { FeedbackCreateInvestigationId } from "./feedbackCreateInvestigationId";
import type { FeedbackCreateRating } from "./feedbackCreateRating";
import type { FeedbackCreateReason } from "./feedbackCreateReason";
import type { FeedbackCreateTargetType } from "./feedbackCreateTargetType";
@@ -15,7 +16,7 @@ import type { FeedbackCreateTargetType } from "./feedbackCreateTargetType";
*/
export interface FeedbackCreate {
comment?: FeedbackCreateComment;
- investigation_id: string;
+ investigation_id?: FeedbackCreateInvestigationId;
rating: FeedbackCreateRating;
reason?: FeedbackCreateReason;
target_id: string;
diff --git a/frontend/app/src/lib/api/model/feedbackCreateInvestigationId.ts b/frontend/app/src/lib/api/model/feedbackCreateInvestigationId.ts
new file mode 100644
index 000000000..f9d560799
--- /dev/null
+++ b/frontend/app/src/lib/api/model/feedbackCreateInvestigationId.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type FeedbackCreateInvestigationId = string | null;
diff --git a/frontend/app/src/lib/api/model/feedbackCreateTargetType.ts b/frontend/app/src/lib/api/model/feedbackCreateTargetType.ts
index 4479ab81e..f718b956c 100644
--- a/frontend/app/src/lib/api/model/feedbackCreateTargetType.ts
+++ b/frontend/app/src/lib/api/model/feedbackCreateTargetType.ts
@@ -17,4 +17,5 @@ export const FeedbackCreateTargetType = {
synthesis: "synthesis",
investigation: "investigation",
recommendation: "recommendation",
+ assistant_message: "assistant_message",
} as const;
diff --git a/frontend/app/src/lib/api/model/findChangesByAssetApiV1GitChangesByAssetGetParams.ts b/frontend/app/src/lib/api/model/findChangesByAssetApiV1GitChangesByAssetGetParams.ts
new file mode 100644
index 000000000..7ccef74d3
--- /dev/null
+++ b/frontend/app/src/lib/api/model/findChangesByAssetApiV1GitChangesByAssetGetParams.ts
@@ -0,0 +1,14 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type FindChangesByAssetApiV1GitChangesByAssetGetParams = {
+ asset_name: string;
+ since?: string | null;
+ until?: string | null;
+ limit?: number;
+};
diff --git a/frontend/app/src/lib/api/model/getRecentCatchesApiV1InvestigationsTestsCatchesGetParams.ts b/frontend/app/src/lib/api/model/getRecentCatchesApiV1InvestigationsTestsCatchesGetParams.ts
new file mode 100644
index 000000000..1d6dd096e
--- /dev/null
+++ b/frontend/app/src/lib/api/model/getRecentCatchesApiV1InvestigationsTestsCatchesGetParams.ts
@@ -0,0 +1,14 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type GetRecentCatchesApiV1InvestigationsTestsCatchesGetParams = {
+ /**
+ * Maximum results
+ */
+ limit?: number;
+};
diff --git a/frontend/app/src/lib/api/model/getTestTrackingStatsApiV1InvestigationsTestsStatsGetParams.ts b/frontend/app/src/lib/api/model/getTestTrackingStatsApiV1InvestigationsTestsStatsGetParams.ts
new file mode 100644
index 000000000..aa009a37c
--- /dev/null
+++ b/frontend/app/src/lib/api/model/getTestTrackingStatsApiV1InvestigationsTestsStatsGetParams.ts
@@ -0,0 +1,14 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type GetTestTrackingStatsApiV1InvestigationsTestsStatsGetParams = {
+ /**
+ * Days to look back
+ */
+ days?: number;
+};
diff --git a/frontend/app/src/lib/api/model/gitRepoListResponse.ts b/frontend/app/src/lib/api/model/gitRepoListResponse.ts
new file mode 100644
index 000000000..e1378b35c
--- /dev/null
+++ b/frontend/app/src/lib/api/model/gitRepoListResponse.ts
@@ -0,0 +1,16 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { GitRepoResponse } from "./gitRepoResponse";
+
+/**
+ * Response for a list of git repositories.
+ */
+export interface GitRepoListResponse {
+ items: GitRepoResponse[];
+ total: number;
+}
diff --git a/frontend/app/src/lib/api/model/gitRepoResponse.ts b/frontend/app/src/lib/api/model/gitRepoResponse.ts
new file mode 100644
index 000000000..614fc174c
--- /dev/null
+++ b/frontend/app/src/lib/api/model/gitRepoResponse.ts
@@ -0,0 +1,28 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { GitRepoResponseLastSyncAt } from "./gitRepoResponseLastSyncAt";
+import type { GitRepoResponseSyncError } from "./gitRepoResponseSyncError";
+import type { GitRepoResponseTrackedPaths } from "./gitRepoResponseTrackedPaths";
+import type { GitRepoResponseUpdatedAt } from "./gitRepoResponseUpdatedAt";
+
+/**
+ * Response for a git repository.
+ */
+export interface GitRepoResponse {
+ created_at: string;
+ default_branch: string;
+ id: string;
+ last_sync_at: GitRepoResponseLastSyncAt;
+ name: string;
+ provider: string;
+ sync_error: GitRepoResponseSyncError;
+ sync_status: string;
+ tracked_paths: GitRepoResponseTrackedPaths;
+ updated_at: GitRepoResponseUpdatedAt;
+ url: string;
+}
diff --git a/frontend/app/src/lib/api/model/gitRepoResponseLastSyncAt.ts b/frontend/app/src/lib/api/model/gitRepoResponseLastSyncAt.ts
new file mode 100644
index 000000000..82c16464b
--- /dev/null
+++ b/frontend/app/src/lib/api/model/gitRepoResponseLastSyncAt.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type GitRepoResponseLastSyncAt = string | null;
diff --git a/frontend/app/src/lib/api/model/gitRepoResponseSyncError.ts b/frontend/app/src/lib/api/model/gitRepoResponseSyncError.ts
new file mode 100644
index 000000000..b8389a664
--- /dev/null
+++ b/frontend/app/src/lib/api/model/gitRepoResponseSyncError.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type GitRepoResponseSyncError = string | null;
diff --git a/frontend/app/src/lib/api/model/gitRepoResponseTrackedPaths.ts b/frontend/app/src/lib/api/model/gitRepoResponseTrackedPaths.ts
new file mode 100644
index 000000000..14489c645
--- /dev/null
+++ b/frontend/app/src/lib/api/model/gitRepoResponseTrackedPaths.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type GitRepoResponseTrackedPaths = string[] | null;
diff --git a/frontend/app/src/lib/api/model/gitRepoResponseUpdatedAt.ts b/frontend/app/src/lib/api/model/gitRepoResponseUpdatedAt.ts
new file mode 100644
index 000000000..c860fbb8c
--- /dev/null
+++ b/frontend/app/src/lib/api/model/gitRepoResponseUpdatedAt.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type GitRepoResponseUpdatedAt = string | null;
diff --git a/frontend/app/src/lib/api/model/importSnapshotResponse.ts b/frontend/app/src/lib/api/model/importSnapshotResponse.ts
new file mode 100644
index 000000000..2aa467b0e
--- /dev/null
+++ b/frontend/app/src/lib/api/model/importSnapshotResponse.ts
@@ -0,0 +1,18 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Response for importing a snapshot archive.
+ */
+export interface ImportSnapshotResponse {
+ evidence_count: number;
+ investigation_id: string;
+ is_replay?: boolean;
+ original_investigation_id: string;
+ status?: string;
+}
diff --git a/frontend/app/src/lib/api/model/index.ts b/frontend/app/src/lib/api/model/index.ts
index c72344818..5b59d1462 100644
--- a/frontend/app/src/lib/api/model/index.ts
+++ b/frontend/app/src/lib/api/model/index.ts
@@ -15,6 +15,7 @@ export * from "./activationStatusResponseFirstIssueAt";
export * from "./addInvestigationTagApiV1InvestigationsInvestigationIdTagsPost201";
export * from "./addTeamMemberApiV1TeamsTeamIdMembersPost201";
export * from "./addTeamMemberApiV1TeamsTeamsTeamIdMembersPost201";
+export * from "./adoptTestApiV1InvestigationsTestsAdoptPost200";
export * from "./ambiguousAssetCandidate";
export * from "./ambiguousAssetCandidateConnectionInfo";
export * from "./ambiguousAssetError";
@@ -65,6 +66,7 @@ export * from "./auditLogResponseResourceName";
export * from "./auditLogResponseResourceType";
export * from "./auditLogResponseStatusCode";
export * from "./bodyImportDbtManifestApiV1DatasetRepoMappingsImportDbtManifestPost";
+export * from "./bodyImportSnapshotArchiveApiV1InvestigationsImportPost";
export * from "./branchStateResponse";
export * from "./branchStateResponseEvidenceItem";
export * from "./branchStateResponseParentBranchId";
@@ -83,10 +85,26 @@ export * from "./chainVerificationResponseError";
export * from "./chainVerificationResponseFirstBrokenSeq";
export * from "./chainVerificationResponseRootHash";
export * from "./chainVerificationResponseRootHashMatches";
+export * from "./codeChangeListResponse";
+export * from "./codeChangeResponse";
+export * from "./codeChangeResponseAffectedAssetsItem";
+export * from "./codeChangeResponseAuthorEmail";
+export * from "./codeChangeResponseAuthorName";
+export * from "./codeChangeResponseCommittedAt";
+export * from "./codeChangeResponseFilesChanged";
+export * from "./codeChangeResponseMessage";
+export * from "./codifyFormat";
+export * from "./codifyRequest";
+export * from "./codifyResponse";
+export * from "./codifyTestResponse";
+export * from "./codifyTestResponseColumn";
export * from "./columnLineageListResponse";
export * from "./columnLineageResponse";
export * from "./columnLineageResponseTransformation";
export * from "./confirmPasswordResetApiV1AuthPasswordResetConfirmPost200";
+export * from "./connectGitRepoRequest";
+export * from "./connectGitRepoRequestAccessToken";
+export * from "./connectGitRepoRequestTrackedPaths";
export * from "./contextBundleResponse";
export * from "./contextBundleResponseAnomalies";
export * from "./contextBundleResponseDefaultDatasourceId";
@@ -116,6 +134,11 @@ export * from "./createRepoMappingRequestMetadataAnyOf";
export * from "./createRunRequest";
export * from "./createRunRequestBundle";
export * from "./createRunRequestBundleId";
+export * from "./createSessionRequest";
+export * from "./createSessionRequestMetadata";
+export * from "./createSessionRequestParentInvestigationId";
+export * from "./createSessionRequestTitle";
+export * from "./createSessionResponse";
export * from "./createUserApiV1ScimV2UsersPost201";
export * from "./createUserRequest";
export * from "./createUserRequestName";
@@ -130,6 +153,7 @@ export * from "./dashboardStats";
export * from "./dataSourceListResponse";
export * from "./dataSourceResponse";
export * from "./dataSourceResponseLastHealthCheckAt";
+export * from "./dataingEntrypointsApiRoutesAssistantSendMessageRequest";
export * from "./dataingEntrypointsApiRoutesBundlesAssetRefRequest";
export * from "./dataingEntrypointsApiRoutesBundlesAssetRefRequestDatasourceId";
export * from "./dataingEntrypointsApiRoutesBundlesLineageGraphResponse";
@@ -141,6 +165,8 @@ export * from "./dataingEntrypointsApiRoutesCredentialsTestConnectionResponseTab
export * from "./dataingEntrypointsApiRoutesDatasourcesTestConnectionResponse";
export * from "./dataingEntrypointsApiRoutesDatasourcesTestConnectionResponseLatencyMs";
export * from "./dataingEntrypointsApiRoutesDatasourcesTestConnectionResponseServerVersion";
+export * from "./dataingEntrypointsApiRoutesInvestigationsSendMessageRequest";
+export * from "./dataingEntrypointsApiRoutesInvestigationsSendMessageResponse";
export * from "./dataingEntrypointsApiRoutesLineageLineageGraphResponse";
export * from "./dataingEntrypointsApiRoutesLineageLineageGraphResponseDatasets";
export * from "./dataingEntrypointsApiRoutesLineageLineageGraphResponseJobs";
@@ -174,7 +200,9 @@ export * from "./datasetSummarySchemaName";
export * from "./datasourceDatasetsResponse";
export * from "./dbtManifestImportResponse";
export * from "./deleteCredentialsResponse";
+export * from "./deleteGitRepoApiV1GitReposRepoIdDelete200";
export * from "./deleteRepoMappingApiV1DatasetRepoMappingsMappingIdDelete200";
+export * from "./deleteSessionApiV1AssistantSessionsSessionIdDelete200";
export * from "./diffRequest";
export * from "./diffRequestDatasourceId";
export * from "./diffResponse";
@@ -192,8 +220,12 @@ export * from "./explainRequest";
export * from "./explainRequestFocus";
export * from "./explainResponse";
export * from "./exportAuditLogsApiV1AuditLogsExportGetParams";
+export * from "./exportFormat";
+export * from "./exportSessionApiV1AssistantSessionsSessionIdExportPost200";
+export * from "./exportSessionApiV1AssistantSessionsSessionIdExportPostParams";
export * from "./feedbackCreate";
export * from "./feedbackCreateComment";
+export * from "./feedbackCreateInvestigationId";
export * from "./feedbackCreateRating";
export * from "./feedbackCreateReason";
export * from "./feedbackCreateTargetType";
@@ -201,6 +233,7 @@ export * from "./feedbackItem";
export * from "./feedbackItemComment";
export * from "./feedbackItemReason";
export * from "./feedbackResponse";
+export * from "./findChangesByAssetApiV1GitChangesByAssetGetParams";
export * from "./getActivationFunnelApiV1AnalyticsActivationFunnelGetParams";
export * from "./getColumnLineageApiV1LineageColumnLineageGetParams";
export * from "./getCurrentUserApiV1AuthMeGet200";
@@ -216,14 +249,23 @@ export * from "./getGroupApiV1ScimV2GroupsGroupIdGet200";
export * from "./getJobApiV1LineageJobJobIdGetParams";
export * from "./getJobRunsApiV1LineageJobJobIdRunsGetParams";
export * from "./getLineageGraphApiV1LineageGraphGetParams";
+export * from "./getRecentCatchesApiV1InvestigationsTestsCatchesGetParams";
export * from "./getRunApiV1RunsRunIdGet200";
+export * from "./getTestTrackingStatsApiV1InvestigationsTestsStatsGetParams";
export * from "./getUpstreamApiV1LineageUpstreamGetParams";
export * from "./getUserApiV1ScimV2UsersUserIdGet200";
export * from "./getUserOrgsApiV1AuthMeOrgsGet200Item";
export * from "./getWeeklyUsageApiV1AnalyticsWeeklyUsageGetParams";
+export * from "./gitRepoListResponse";
+export * from "./gitRepoResponse";
+export * from "./gitRepoResponseLastSyncAt";
+export * from "./gitRepoResponseSyncError";
+export * from "./gitRepoResponseTrackedPaths";
+export * from "./gitRepoResponseUpdatedAt";
export * from "./hTTPValidationError";
export * from "./healthCheckHealthGet200";
export * from "./importDbtManifestApiV1DatasetRepoMappingsImportDbtManifestPostParams";
+export * from "./importSnapshotResponse";
export * from "./inlineBundleRequest";
export * from "./inlineBundleRequestWindow";
export * from "./investigationListItem";
@@ -324,6 +366,7 @@ export * from "./listAuditLogsApiV1AuditLogsGetParams";
export * from "./listDatasetsApiV1LineageDatasetsGetParams";
export * from "./listDatasourceDatasetsApiV1DatasourcesDatasourceIdDatasetsGetParams";
export * from "./listDatasourceDatasetsApiV1V2DatasourcesDatasourceIdDatasetsGetParams";
+export * from "./listGitReposApiV1GitReposGetParams";
export * from "./listGroupsApiV1ScimV2GroupsGet200";
export * from "./listGroupsApiV1ScimV2GroupsGetParams";
export * from "./listInvestigationsApiV1InvestigationsGet200Item";
@@ -331,8 +374,12 @@ export * from "./listIssueEventsApiV1IssuesIssueIdEventsGetParams";
export * from "./listIssuesApiV1IssuesGetParams";
export * from "./listNotificationsApiV1NotificationsGetParams";
export * from "./listOrgMembersApiV1UsersOrgMembersGetParams";
+export * from "./listRepoChangesApiV1GitReposRepoIdChangesGetParams";
export * from "./listRepoMappingsApiV1DatasetRepoMappingsGetParams";
export * from "./listSchemaCommentsApiV1DatasetsDatasetIdSchemaCommentsGetParams";
+export * from "./listSessionsApiV1AssistantSessionsGetParams";
+export * from "./listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetParams";
+export * from "./listSessionsResponse";
export * from "./listSlaPoliciesApiV1SlaPoliciesGetParams";
export * from "./listSuggestionsApiV1DatasetRepoMappingsSuggestionsGetParams";
export * from "./listUsersApiV1ScimV2UsersGet200";
@@ -342,6 +389,11 @@ export * from "./markAllReadResponse";
export * from "./markAllReadResponseCursor";
export * from "./matchedPatternResponse";
export * from "./matchedPatternResponseDescription";
+export * from "./messageResponse";
+export * from "./messageResponseTokenCount";
+export * from "./messageResponseToolCalls";
+export * from "./messageResponseToolCallsAnyOfItem";
+export * from "./messageRole";
export * from "./metricSpecRequest";
export * from "./metricSpecRequestSourceUrl";
export * from "./modifyRequest";
@@ -381,8 +433,12 @@ export * from "./queryResponse";
export * from "./queryResponseColumnsItem";
export * from "./queryResponseExecutionTimeMs";
export * from "./queryResponseRowsItem";
+export * from "./recentCatchResponse";
+export * from "./recentCatchResponseColumn";
+export * from "./recentCatchResponseFailureMessage";
export * from "./recentInvestigation";
export * from "./recentInvestigationSeverity";
+export * from "./recordTestRunApiV1InvestigationsTestsRunPost200";
export * from "./recoveryMethodResponse";
export * from "./recoveryMethodResponseActionUrl";
export * from "./recoveryMethodResponseAdminEmail";
@@ -475,10 +531,19 @@ export * from "./searchResultsResponse";
export * from "./sendMessageRequest";
export * from "./sendMessageResponse";
export * from "./sendUserInputApiV1InvestigationsInvestigationIdInputPost200";
+export * from "./sessionDetailResponse";
+export * from "./sessionDetailResponseParentInvestigationId";
+export * from "./sessionDetailResponseTitle";
+export * from "./sessionSummary";
+export * from "./sessionSummaryTitle";
export * from "./severityOverride";
export * from "./severityOverrideTimeToAcknowledge";
export * from "./severityOverrideTimeToProgress";
export * from "./severityOverrideTimeToResolve";
+export * from "./snapshotCheckpointParam";
+export * from "./snapshotListItem";
+export * from "./snapshotListItemSizeBytes";
+export * from "./snapshotListResponse";
export * from "./sourceTypeResponse";
export * from "./sourceTypeResponseCapabilities";
export * from "./sourceTypeResponseConfigSchema";
@@ -499,7 +564,9 @@ export * from "./storeEvidenceRequestContent";
export * from "./streamEventsApiV1InvestigationsInvestigationIdEventsGetParams";
export * from "./streamEventsApiV1RunsRunIdEventsGetParams";
export * from "./streamIssueEventsApiV1IssuesIssueIdStreamGetParams";
+export * from "./streamResponseApiV1AssistantSessionsSessionIdStreamGetParams";
export * from "./syncResponse";
+export * from "./syncTriggerResponse";
export * from "./tagCreate";
export * from "./tagListResponse";
export * from "./tagResponse";
@@ -558,6 +625,8 @@ export * from "./temporalStatusResponseProgress";
export * from "./tenantSettings";
export * from "./tenantSettingsNotificationEmail";
export * from "./tenantSettingsSlackChannel";
+export * from "./testAdoptionRequest";
+export * from "./testAdoptionRequestAdoptedBy";
export * from "./testConnectionRequest";
export * from "./testConnectionRequestConfig";
export * from "./testConnectionResponse";
@@ -565,6 +634,9 @@ export * from "./testConnectionResponseError";
export * from "./testConnectionResponseLatencyMs";
export * from "./testConnectionResponseServerVersion";
export * from "./testConnectionResponseTablesAccessible";
+export * from "./testRunResultRequest";
+export * from "./testRunResultRequestFailureMessage";
+export * from "./testTrackingStatsResponse";
export * from "./tokenResponse";
export * from "./tokenResponseOrg";
export * from "./tokenResponseOrgAnyOf";
@@ -573,6 +645,10 @@ export * from "./tokenResponseRole";
export * from "./tokenResponseUser";
export * from "./tokenResponseUserAnyOf";
export * from "./unreadCountResponse";
+export * from "./updateGitRepoRequest";
+export * from "./updateGitRepoRequestDefaultBranch";
+export * from "./updateGitRepoRequestName";
+export * from "./updateGitRepoRequestTrackedPaths";
export * from "./updateMemberRoleApiV1UsersUserIdRolePatch200";
export * from "./updateMemberRoleApiV1UsersUserIdRolePatchParams";
export * from "./updateRepoMappingRequest";
@@ -622,3 +698,12 @@ export * from "./webhookResponseLastStatus";
export * from "./webhookResponseLastTriggeredAt";
export * from "./weeklyUsageListResponse";
export * from "./weeklyUsageResponse";
+export * from "./dataingEntrypointsApiRoutesAssistantSendMessageRequestPageContext";
+export * from "./dataingEntrypointsApiRoutesAssistantSendMessageResponse";
+export * from "./pageContext";
+export * from "./pageContextError";
+export * from "./pageContextErrorStackPreview";
+export * from "./pageContextErrorStatus";
+export * from "./pageContextErrorUrl";
+export * from "./pageContextPageData";
+export * from "./pageContextRouteParams";
diff --git a/frontend/app/src/lib/api/model/lineageGraphResponse.ts b/frontend/app/src/lib/api/model/lineageGraphResponse.ts
index 54819f2c2..b7eb1d852 100644
--- a/frontend/app/src/lib/api/model/lineageGraphResponse.ts
+++ b/frontend/app/src/lib/api/model/lineageGraphResponse.ts
@@ -6,15 +6,14 @@
* OpenAPI spec version: 2.0.0
*/
import type { LineageGraphResponseDatasets } from "./lineageGraphResponseDatasets";
-import type { LineageEdgeResponse } from "./lineageEdgeResponse";
-import type { LineageGraphResponseJobs } from "./lineageGraphResponseJobs";
+import type { LineageEdge } from "./lineageEdge";
+import type { LineageGraphResponseRoot } from "./lineageGraphResponseRoot";
/**
- * Response for a lineage graph.
+ * Lineage graph response.
*/
export interface LineageGraphResponse {
- datasets: LineageGraphResponseDatasets;
- edges: LineageEdgeResponse[];
- jobs: LineageGraphResponseJobs;
- root: string;
+ datasets?: LineageGraphResponseDatasets;
+ edges?: LineageEdge[];
+ root?: LineageGraphResponseRoot;
}
diff --git a/frontend/app/src/lib/api/model/lineageGraphResponseDatasets.ts b/frontend/app/src/lib/api/model/lineageGraphResponseDatasets.ts
index a5fcdf557..ad14e5e00 100644
--- a/frontend/app/src/lib/api/model/lineageGraphResponseDatasets.ts
+++ b/frontend/app/src/lib/api/model/lineageGraphResponseDatasets.ts
@@ -5,6 +5,7 @@
* Autonomous Data Quality Investigation
* OpenAPI spec version: 2.0.0
*/
-import type { DatasetResponse } from "./datasetResponse";
-export type LineageGraphResponseDatasets = { [key: string]: DatasetResponse };
+export type LineageGraphResponseDatasets = {
+ [key: string]: { [key: string]: unknown };
+};
diff --git a/frontend/app/src/lib/api/model/listGitReposApiV1GitReposGetParams.ts b/frontend/app/src/lib/api/model/listGitReposApiV1GitReposGetParams.ts
new file mode 100644
index 000000000..383affa79
--- /dev/null
+++ b/frontend/app/src/lib/api/model/listGitReposApiV1GitReposGetParams.ts
@@ -0,0 +1,13 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type ListGitReposApiV1GitReposGetParams = {
+ provider?: string | null;
+ limit?: number;
+ offset?: number;
+};
diff --git a/frontend/app/src/lib/api/model/listRepoChangesApiV1GitReposRepoIdChangesGetParams.ts b/frontend/app/src/lib/api/model/listRepoChangesApiV1GitReposRepoIdChangesGetParams.ts
new file mode 100644
index 000000000..e2dd17e10
--- /dev/null
+++ b/frontend/app/src/lib/api/model/listRepoChangesApiV1GitReposRepoIdChangesGetParams.ts
@@ -0,0 +1,14 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type ListRepoChangesApiV1GitReposRepoIdChangesGetParams = {
+ since?: string | null;
+ until?: string | null;
+ limit?: number;
+ offset?: number;
+};
diff --git a/frontend/app/src/lib/api/model/listSessionsApiV1AssistantSessionsGetParams.ts b/frontend/app/src/lib/api/model/listSessionsApiV1AssistantSessionsGetParams.ts
new file mode 100644
index 000000000..e87d431d7
--- /dev/null
+++ b/frontend/app/src/lib/api/model/listSessionsApiV1AssistantSessionsGetParams.ts
@@ -0,0 +1,12 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type ListSessionsApiV1AssistantSessionsGetParams = {
+ limit?: number;
+ offset?: number;
+};
diff --git a/frontend/app/src/lib/api/model/listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetParams.ts b/frontend/app/src/lib/api/model/listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetParams.ts
new file mode 100644
index 000000000..eac18cd61
--- /dev/null
+++ b/frontend/app/src/lib/api/model/listSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetParams.ts
@@ -0,0 +1,13 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type ListSessionsForInvestigationApiV1AssistantInvestigationsInvestigationIdSessionsGetParams =
+ {
+ limit?: number;
+ offset?: number;
+ };
diff --git a/frontend/app/src/lib/api/model/listSessionsResponse.ts b/frontend/app/src/lib/api/model/listSessionsResponse.ts
new file mode 100644
index 000000000..77172c919
--- /dev/null
+++ b/frontend/app/src/lib/api/model/listSessionsResponse.ts
@@ -0,0 +1,15 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { SessionSummary } from "./sessionSummary";
+
+/**
+ * Response from listing sessions.
+ */
+export interface ListSessionsResponse {
+ sessions: SessionSummary[];
+}
diff --git a/frontend/app/src/lib/api/model/messageResponse.ts b/frontend/app/src/lib/api/model/messageResponse.ts
new file mode 100644
index 000000000..8710d51ec
--- /dev/null
+++ b/frontend/app/src/lib/api/model/messageResponse.ts
@@ -0,0 +1,22 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { MessageRole } from "./messageRole";
+import type { MessageResponseTokenCount } from "./messageResponseTokenCount";
+import type { MessageResponseToolCalls } from "./messageResponseToolCalls";
+
+/**
+ * A message in a session.
+ */
+export interface MessageResponse {
+ content: string;
+ created_at: string;
+ id: string;
+ role: MessageRole;
+ token_count?: MessageResponseTokenCount;
+ tool_calls?: MessageResponseToolCalls;
+}
diff --git a/frontend/app/src/lib/api/model/messageResponseTokenCount.ts b/frontend/app/src/lib/api/model/messageResponseTokenCount.ts
new file mode 100644
index 000000000..4af5f8f38
--- /dev/null
+++ b/frontend/app/src/lib/api/model/messageResponseTokenCount.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type MessageResponseTokenCount = number | null;
diff --git a/frontend/app/src/lib/api/model/messageResponseToolCalls.ts b/frontend/app/src/lib/api/model/messageResponseToolCalls.ts
new file mode 100644
index 000000000..80581cd54
--- /dev/null
+++ b/frontend/app/src/lib/api/model/messageResponseToolCalls.ts
@@ -0,0 +1,12 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { MessageResponseToolCallsAnyOfItem } from "./messageResponseToolCallsAnyOfItem";
+
+export type MessageResponseToolCalls =
+ | MessageResponseToolCallsAnyOfItem[]
+ | null;
diff --git a/frontend/app/src/lib/api/model/messageResponseToolCallsAnyOfItem.ts b/frontend/app/src/lib/api/model/messageResponseToolCallsAnyOfItem.ts
new file mode 100644
index 000000000..7ed485b42
--- /dev/null
+++ b/frontend/app/src/lib/api/model/messageResponseToolCallsAnyOfItem.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type MessageResponseToolCallsAnyOfItem = { [key: string]: unknown };
diff --git a/frontend/app/src/lib/api/model/messageRole.ts b/frontend/app/src/lib/api/model/messageRole.ts
new file mode 100644
index 000000000..875412e6a
--- /dev/null
+++ b/frontend/app/src/lib/api/model/messageRole.ts
@@ -0,0 +1,20 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Message role types.
+ */
+export type MessageRole = (typeof MessageRole)[keyof typeof MessageRole];
+
+// eslint-disable-next-line @typescript-eslint/no-redeclare
+export const MessageRole = {
+ user: "user",
+ assistant: "assistant",
+ system: "system",
+ tool: "tool",
+} as const;
diff --git a/frontend/app/src/lib/api/model/pageContext.ts b/frontend/app/src/lib/api/model/pageContext.ts
new file mode 100644
index 000000000..5c548a02f
--- /dev/null
+++ b/frontend/app/src/lib/api/model/pageContext.ts
@@ -0,0 +1,23 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { PageContextError } from "./pageContextError";
+import type { PageContextPageData } from "./pageContextPageData";
+import type { PageContextRouteParams } from "./pageContextRouteParams";
+
+/**
+ * Context about the page the user is currently viewing.
+ */
+export interface PageContext {
+ errors?: PageContextError[];
+ page_data?: PageContextPageData;
+ page_title: string;
+ page_type: string;
+ route: string;
+ route_params?: PageContextRouteParams;
+ route_pattern: string;
+}
diff --git a/frontend/app/src/lib/api/model/pageContextError.ts b/frontend/app/src/lib/api/model/pageContextError.ts
new file mode 100644
index 000000000..d7b56c2a3
--- /dev/null
+++ b/frontend/app/src/lib/api/model/pageContextError.ts
@@ -0,0 +1,23 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { PageContextErrorStackPreview } from "./pageContextErrorStackPreview";
+import type { PageContextErrorStatus } from "./pageContextErrorStatus";
+import type { PageContextErrorUrl } from "./pageContextErrorUrl";
+
+/**
+ * A frontend error captured by the error bus.
+ */
+export interface PageContextError {
+ message: string;
+ stack_preview?: PageContextErrorStackPreview;
+ status?: PageContextErrorStatus;
+ timestamp: number;
+ /** Error type: api, react, or console */
+ type: string;
+ url?: PageContextErrorUrl;
+}
diff --git a/frontend/app/src/lib/api/model/pageContextErrorStackPreview.ts b/frontend/app/src/lib/api/model/pageContextErrorStackPreview.ts
new file mode 100644
index 000000000..bbc49a4e5
--- /dev/null
+++ b/frontend/app/src/lib/api/model/pageContextErrorStackPreview.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type PageContextErrorStackPreview = string | null;
diff --git a/frontend/app/src/lib/api/model/pageContextErrorStatus.ts b/frontend/app/src/lib/api/model/pageContextErrorStatus.ts
new file mode 100644
index 000000000..edeae12ad
--- /dev/null
+++ b/frontend/app/src/lib/api/model/pageContextErrorStatus.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type PageContextErrorStatus = number | null;
diff --git a/frontend/app/src/lib/api/model/pageContextErrorUrl.ts b/frontend/app/src/lib/api/model/pageContextErrorUrl.ts
new file mode 100644
index 000000000..601211aa1
--- /dev/null
+++ b/frontend/app/src/lib/api/model/pageContextErrorUrl.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type PageContextErrorUrl = string | null;
diff --git a/frontend/app/src/lib/api/model/pageContextPageData.ts b/frontend/app/src/lib/api/model/pageContextPageData.ts
new file mode 100644
index 000000000..512642a36
--- /dev/null
+++ b/frontend/app/src/lib/api/model/pageContextPageData.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type PageContextPageData = { [key: string]: unknown };
diff --git a/frontend/app/src/lib/api/model/pageContextRouteParams.ts b/frontend/app/src/lib/api/model/pageContextRouteParams.ts
new file mode 100644
index 000000000..13c1e324e
--- /dev/null
+++ b/frontend/app/src/lib/api/model/pageContextRouteParams.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type PageContextRouteParams = { [key: string]: string };
diff --git a/frontend/app/src/lib/api/model/recentCatchResponse.ts b/frontend/app/src/lib/api/model/recentCatchResponse.ts
new file mode 100644
index 000000000..64a724759
--- /dev/null
+++ b/frontend/app/src/lib/api/model/recentCatchResponse.ts
@@ -0,0 +1,22 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { RecentCatchResponseColumn } from "./recentCatchResponseColumn";
+import type { RecentCatchResponseFailureMessage } from "./recentCatchResponseFailureMessage";
+
+/**
+ * A recent test failure catch.
+ */
+export interface RecentCatchResponse {
+ column: RecentCatchResponseColumn;
+ failure_message: RecentCatchResponseFailureMessage;
+ investigation_id: string;
+ run_at: string;
+ table: string;
+ test_id: string;
+ test_type: string;
+}
diff --git a/frontend/app/src/lib/api/model/recentCatchResponseColumn.ts b/frontend/app/src/lib/api/model/recentCatchResponseColumn.ts
new file mode 100644
index 000000000..8890d26a0
--- /dev/null
+++ b/frontend/app/src/lib/api/model/recentCatchResponseColumn.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type RecentCatchResponseColumn = string | null;
diff --git a/frontend/app/src/lib/api/model/recentCatchResponseFailureMessage.ts b/frontend/app/src/lib/api/model/recentCatchResponseFailureMessage.ts
new file mode 100644
index 000000000..95d0cef58
--- /dev/null
+++ b/frontend/app/src/lib/api/model/recentCatchResponseFailureMessage.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type RecentCatchResponseFailureMessage = string | null;
diff --git a/frontend/app/src/lib/api/model/recordTestRunApiV1InvestigationsTestsRunPost200.ts b/frontend/app/src/lib/api/model/recordTestRunApiV1InvestigationsTestsRunPost200.ts
new file mode 100644
index 000000000..ab67843b4
--- /dev/null
+++ b/frontend/app/src/lib/api/model/recordTestRunApiV1InvestigationsTestsRunPost200.ts
@@ -0,0 +1,11 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type RecordTestRunApiV1InvestigationsTestsRunPost200 = {
+ [key: string]: string;
+};
diff --git a/frontend/app/src/lib/api/model/sessionDetailResponse.ts b/frontend/app/src/lib/api/model/sessionDetailResponse.ts
new file mode 100644
index 000000000..a1a26f855
--- /dev/null
+++ b/frontend/app/src/lib/api/model/sessionDetailResponse.ts
@@ -0,0 +1,24 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { MessageResponse } from "./messageResponse";
+import type { SessionDetailResponseParentInvestigationId } from "./sessionDetailResponseParentInvestigationId";
+import type { SessionDetailResponseTitle } from "./sessionDetailResponseTitle";
+
+/**
+ * Full session details with messages.
+ */
+export interface SessionDetailResponse {
+ created_at: string;
+ id: string;
+ investigation_id: string;
+ last_activity: string;
+ messages: MessageResponse[];
+ parent_investigation_id?: SessionDetailResponseParentInvestigationId;
+ title: SessionDetailResponseTitle;
+ token_count: number;
+}
diff --git a/frontend/app/src/lib/api/model/sessionDetailResponseParentInvestigationId.ts b/frontend/app/src/lib/api/model/sessionDetailResponseParentInvestigationId.ts
new file mode 100644
index 000000000..44071bb23
--- /dev/null
+++ b/frontend/app/src/lib/api/model/sessionDetailResponseParentInvestigationId.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type SessionDetailResponseParentInvestigationId = string | null;
diff --git a/frontend/app/src/lib/api/model/sessionDetailResponseTitle.ts b/frontend/app/src/lib/api/model/sessionDetailResponseTitle.ts
new file mode 100644
index 000000000..d94f557ca
--- /dev/null
+++ b/frontend/app/src/lib/api/model/sessionDetailResponseTitle.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type SessionDetailResponseTitle = string | null;
diff --git a/frontend/app/src/lib/api/model/sessionSummary.ts b/frontend/app/src/lib/api/model/sessionSummary.ts
new file mode 100644
index 000000000..b6a1816d7
--- /dev/null
+++ b/frontend/app/src/lib/api/model/sessionSummary.ts
@@ -0,0 +1,20 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { SessionSummaryTitle } from "./sessionSummaryTitle";
+
+/**
+ * Summary of a session for listing.
+ */
+export interface SessionSummary {
+ created_at: string;
+ id: string;
+ last_activity: string;
+ message_count: number;
+ title: SessionSummaryTitle;
+ token_count: number;
+}
diff --git a/frontend/app/src/lib/api/model/sessionSummaryTitle.ts b/frontend/app/src/lib/api/model/sessionSummaryTitle.ts
new file mode 100644
index 000000000..8da5ad6cf
--- /dev/null
+++ b/frontend/app/src/lib/api/model/sessionSummaryTitle.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type SessionSummaryTitle = string | null;
diff --git a/frontend/app/src/lib/api/model/snapshotCheckpointParam.ts b/frontend/app/src/lib/api/model/snapshotCheckpointParam.ts
new file mode 100644
index 000000000..4fc2c75f0
--- /dev/null
+++ b/frontend/app/src/lib/api/model/snapshotCheckpointParam.ts
@@ -0,0 +1,22 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Valid checkpoint values for snapshot download.
+ */
+export type SnapshotCheckpointParam =
+ (typeof SnapshotCheckpointParam)[keyof typeof SnapshotCheckpointParam];
+
+// eslint-disable-next-line @typescript-eslint/no-redeclare
+export const SnapshotCheckpointParam = {
+ start: "start",
+ hypothesis_generated: "hypothesis_generated",
+ evidence_collected: "evidence_collected",
+ complete: "complete",
+ failed: "failed",
+} as const;
diff --git a/frontend/app/src/lib/api/model/snapshotListItem.ts b/frontend/app/src/lib/api/model/snapshotListItem.ts
new file mode 100644
index 000000000..00ee86653
--- /dev/null
+++ b/frontend/app/src/lib/api/model/snapshotListItem.ts
@@ -0,0 +1,18 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { SnapshotListItemSizeBytes } from "./snapshotListItemSizeBytes";
+
+/**
+ * Snapshot metadata for listing.
+ */
+export interface SnapshotListItem {
+ captured_at: string;
+ checkpoint: string;
+ size_bytes?: SnapshotListItemSizeBytes;
+ storage_path: string;
+}
diff --git a/frontend/app/src/lib/api/model/snapshotListItemSizeBytes.ts b/frontend/app/src/lib/api/model/snapshotListItemSizeBytes.ts
new file mode 100644
index 000000000..12768083b
--- /dev/null
+++ b/frontend/app/src/lib/api/model/snapshotListItemSizeBytes.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type SnapshotListItemSizeBytes = number | null;
diff --git a/frontend/app/src/lib/api/model/snapshotListResponse.ts b/frontend/app/src/lib/api/model/snapshotListResponse.ts
new file mode 100644
index 000000000..60687c4bd
--- /dev/null
+++ b/frontend/app/src/lib/api/model/snapshotListResponse.ts
@@ -0,0 +1,16 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { SnapshotListItem } from "./snapshotListItem";
+
+/**
+ * Response for listing available snapshots.
+ */
+export interface SnapshotListResponse {
+ investigation_id: string;
+ snapshots: SnapshotListItem[];
+}
diff --git a/frontend/app/src/lib/api/model/streamResponseApiV1AssistantSessionsSessionIdStreamGetParams.ts b/frontend/app/src/lib/api/model/streamResponseApiV1AssistantSessionsSessionIdStreamGetParams.ts
new file mode 100644
index 000000000..f040c71f4
--- /dev/null
+++ b/frontend/app/src/lib/api/model/streamResponseApiV1AssistantSessionsSessionIdStreamGetParams.ts
@@ -0,0 +1,14 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type StreamResponseApiV1AssistantSessionsSessionIdStreamGetParams = {
+ /**
+ * Resume from event ID
+ */
+ last_event_id?: number | null;
+};
diff --git a/frontend/app/src/lib/api/model/syncTriggerResponse.ts b/frontend/app/src/lib/api/model/syncTriggerResponse.ts
new file mode 100644
index 000000000..498745b40
--- /dev/null
+++ b/frontend/app/src/lib/api/model/syncTriggerResponse.ts
@@ -0,0 +1,15 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Response for sync trigger.
+ */
+export interface SyncTriggerResponse {
+ message: string;
+ sync_status: string;
+}
diff --git a/frontend/app/src/lib/api/model/testAdoptionRequest.ts b/frontend/app/src/lib/api/model/testAdoptionRequest.ts
new file mode 100644
index 000000000..3e78de670
--- /dev/null
+++ b/frontend/app/src/lib/api/model/testAdoptionRequest.ts
@@ -0,0 +1,16 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { TestAdoptionRequestAdoptedBy } from "./testAdoptionRequestAdoptedBy";
+
+/**
+ * Request to mark a test as adopted.
+ */
+export interface TestAdoptionRequest {
+ adopted_by?: TestAdoptionRequestAdoptedBy;
+ test_id: string;
+}
diff --git a/frontend/app/src/lib/api/model/testAdoptionRequestAdoptedBy.ts b/frontend/app/src/lib/api/model/testAdoptionRequestAdoptedBy.ts
new file mode 100644
index 000000000..a11015979
--- /dev/null
+++ b/frontend/app/src/lib/api/model/testAdoptionRequestAdoptedBy.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type TestAdoptionRequestAdoptedBy = string | null;
diff --git a/frontend/app/src/lib/api/model/testRunResultRequest.ts b/frontend/app/src/lib/api/model/testRunResultRequest.ts
new file mode 100644
index 000000000..ca1e04dd6
--- /dev/null
+++ b/frontend/app/src/lib/api/model/testRunResultRequest.ts
@@ -0,0 +1,17 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { TestRunResultRequestFailureMessage } from "./testRunResultRequestFailureMessage";
+
+/**
+ * Request to record a test run result.
+ */
+export interface TestRunResultRequest {
+ failure_message?: TestRunResultRequestFailureMessage;
+ passed: boolean;
+ test_id: string;
+}
diff --git a/frontend/app/src/lib/api/model/testRunResultRequestFailureMessage.ts b/frontend/app/src/lib/api/model/testRunResultRequestFailureMessage.ts
new file mode 100644
index 000000000..fa822f1d8
--- /dev/null
+++ b/frontend/app/src/lib/api/model/testRunResultRequestFailureMessage.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type TestRunResultRequestFailureMessage = string | null;
diff --git a/frontend/app/src/lib/api/model/testTrackingStatsResponse.ts b/frontend/app/src/lib/api/model/testTrackingStatsResponse.ts
new file mode 100644
index 000000000..e10fbbb0e
--- /dev/null
+++ b/frontend/app/src/lib/api/model/testTrackingStatsResponse.ts
@@ -0,0 +1,19 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+/**
+ * Test tracking statistics response.
+ */
+export interface TestTrackingStatsResponse {
+ adoption_rate: number;
+ effectiveness_rate: number;
+ issues_caught: number;
+ tests_adopted: number;
+ tests_generated: number;
+ tests_run: number;
+}
diff --git a/frontend/app/src/lib/api/model/updateGitRepoRequest.ts b/frontend/app/src/lib/api/model/updateGitRepoRequest.ts
new file mode 100644
index 000000000..4ccace2d4
--- /dev/null
+++ b/frontend/app/src/lib/api/model/updateGitRepoRequest.ts
@@ -0,0 +1,19 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+import type { UpdateGitRepoRequestDefaultBranch } from "./updateGitRepoRequestDefaultBranch";
+import type { UpdateGitRepoRequestName } from "./updateGitRepoRequestName";
+import type { UpdateGitRepoRequestTrackedPaths } from "./updateGitRepoRequestTrackedPaths";
+
+/**
+ * Request to update a git repository.
+ */
+export interface UpdateGitRepoRequest {
+ default_branch?: UpdateGitRepoRequestDefaultBranch;
+ name?: UpdateGitRepoRequestName;
+ tracked_paths?: UpdateGitRepoRequestTrackedPaths;
+}
diff --git a/frontend/app/src/lib/api/model/updateGitRepoRequestDefaultBranch.ts b/frontend/app/src/lib/api/model/updateGitRepoRequestDefaultBranch.ts
new file mode 100644
index 000000000..4c7e5cb80
--- /dev/null
+++ b/frontend/app/src/lib/api/model/updateGitRepoRequestDefaultBranch.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type UpdateGitRepoRequestDefaultBranch = string | null;
diff --git a/frontend/app/src/lib/api/model/updateGitRepoRequestName.ts b/frontend/app/src/lib/api/model/updateGitRepoRequestName.ts
new file mode 100644
index 000000000..fbd1be797
--- /dev/null
+++ b/frontend/app/src/lib/api/model/updateGitRepoRequestName.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type UpdateGitRepoRequestName = string | null;
diff --git a/frontend/app/src/lib/api/model/updateGitRepoRequestTrackedPaths.ts b/frontend/app/src/lib/api/model/updateGitRepoRequestTrackedPaths.ts
new file mode 100644
index 000000000..d3e0e5193
--- /dev/null
+++ b/frontend/app/src/lib/api/model/updateGitRepoRequestTrackedPaths.ts
@@ -0,0 +1,9 @@
+/**
+ * Generated by orval v6.31.0 🍺
+ * Do not edit manually.
+ * dataing
+ * Autonomous Data Quality Investigation
+ * OpenAPI spec version: 2.0.0
+ */
+
+export type UpdateGitRepoRequestTrackedPaths = string[] | null;
diff --git a/frontend/app/src/lib/assistant/error-bus.ts b/frontend/app/src/lib/assistant/error-bus.ts
new file mode 100644
index 000000000..09111621b
--- /dev/null
+++ b/frontend/app/src/lib/assistant/error-bus.ts
@@ -0,0 +1,66 @@
+/**
+ * Error bus for capturing frontend errors and exposing them to the assistant.
+ *
+ * Simple pub/sub so non-React code (HTTP client) can emit errors
+ * that the PageContextProvider consumes.
+ */
+
+export interface PageError {
+ type: "api" | "react" | "console";
+ message: string;
+ status?: number;
+ url?: string;
+ timestamp: number;
+ stackPreview?: string;
+}
+
+type ErrorSubscriber = (error: PageError) => void;
+
+const subscribers = new Set();
+
+export function emitApiError(error: {
+ message: string;
+ status?: number;
+ url?: string;
+}): void {
+ const pageError: PageError = {
+ type: "api",
+ message: error.message,
+ status: error.status,
+ url: error.url,
+ timestamp: Date.now(),
+ };
+ for (const fn of subscribers) {
+ fn(pageError);
+ }
+}
+
+export function emitReactError(error: Error, componentStack?: string): void {
+ const pageError: PageError = {
+ type: "react",
+ message: error.message,
+ timestamp: Date.now(),
+ stackPreview: componentStack?.slice(0, 300),
+ };
+ for (const fn of subscribers) {
+ fn(pageError);
+ }
+}
+
+export function emitConsoleError(message: string): void {
+ const pageError: PageError = {
+ type: "console",
+ message,
+ timestamp: Date.now(),
+ };
+ for (const fn of subscribers) {
+ fn(pageError);
+ }
+}
+
+export function subscribeToErrors(fn: ErrorSubscriber): () => void {
+ subscribers.add(fn);
+ return () => {
+ subscribers.delete(fn);
+ };
+}
diff --git a/frontend/app/src/lib/assistant/page-context.tsx b/frontend/app/src/lib/assistant/page-context.tsx
new file mode 100644
index 000000000..3197818a9
--- /dev/null
+++ b/frontend/app/src/lib/assistant/page-context.tsx
@@ -0,0 +1,203 @@
+/**
+ * Page context provider for the assistant.
+ *
+ * Tracks current route, page-specific data, and recent frontend errors
+ * so the assistant always knows what the user is looking at.
+ */
+
+import {
+ createContext,
+ useContext,
+ useState,
+ useEffect,
+ useCallback,
+ useRef,
+ type ReactNode,
+} from "react";
+import { useLocation } from "react-router-dom";
+import { subscribeToErrors, emitConsoleError, type PageError } from "./error-bus";
+
+const MAX_ERRORS = 10;
+
+export interface PageContextData {
+ route: string;
+ routePattern: string;
+ routeParams: Record;
+ pageType: string;
+ pageTitle: string;
+ pageData: Record;
+ errors: PageError[];
+}
+
+interface PageContextValue {
+ context: PageContextData;
+ registerPageContext: (data: {
+ pageType: string;
+ pageTitle: string;
+ pageData?: Record;
+ }) => void;
+}
+
+const PageContext = createContext(null);
+
+/** Route patterns we recognize, in order of specificity. */
+const ROUTE_PATTERNS = [
+ { pattern: "/investigations/:id", pageType: "investigation_detail" },
+ { pattern: "/investigations/new", pageType: "investigation_new" },
+ { pattern: "/investigations", pageType: "investigation_list" },
+ { pattern: "/datasources/:datasourceId/datasets", pageType: "dataset_list" },
+ { pattern: "/datasources", pageType: "datasource_list" },
+ { pattern: "/datasets/:datasetId", pageType: "dataset_detail" },
+ { pattern: "/issues/:id", pageType: "issue_detail" },
+ { pattern: "/issues/new", pageType: "issue_create" },
+ { pattern: "/issues", pageType: "issue_list" },
+ { pattern: "/settings", pageType: "settings" },
+ { pattern: "/usage", pageType: "usage" },
+ { pattern: "/notifications", pageType: "notifications" },
+ { pattern: "/admin", pageType: "admin" },
+ { pattern: "/", pageType: "dashboard" },
+] as const;
+
+function useMatchedRoute(): { pattern: string; params: Record } {
+ const location = useLocation();
+
+ // Try each pattern — useMatch can't be called conditionally,
+ // so we match manually against the pathname.
+ for (const route of ROUTE_PATTERNS) {
+ const regex = new RegExp(
+ "^" + route.pattern.replace(/:(\w+)/g, "(?<$1>[^/]+)") + "$",
+ );
+ const match = location.pathname.match(regex);
+ if (match) {
+ return {
+ pattern: route.pattern,
+ params: (match.groups ?? {}) as Record,
+ };
+ }
+ }
+
+ return { pattern: location.pathname, params: {} };
+}
+
+export function PageContextProvider({ children }: { children: ReactNode }) {
+ const location = useLocation();
+ const { pattern: routePattern, params: routeParams } = useMatchedRoute();
+
+ const [pageInfo, setPageInfo] = useState<{
+ pageType: string;
+ pageTitle: string;
+ pageData: Record;
+ }>({
+ pageType: "unknown",
+ pageTitle: "",
+ pageData: {},
+ });
+
+ const [errors, setErrors] = useState([]);
+ const consoleGuardRef = useRef(false);
+
+ // Subscribe to error bus
+ useEffect(() => {
+ return subscribeToErrors((err) => {
+ setErrors((prev) => [...prev.slice(-(MAX_ERRORS - 1)), err]);
+ });
+ }, []);
+
+ // Intercept console.error
+ useEffect(() => {
+ const original = console.error;
+ console.error = (...args: unknown[]) => {
+ original.apply(console, args);
+ // Recursion guard
+ if (consoleGuardRef.current) return;
+ consoleGuardRef.current = true;
+ try {
+ const message = args
+ .map((a) => (typeof a === "string" ? a : String(a)))
+ .join(" ")
+ .slice(0, 300);
+ // Skip React internal errors that are already captured by ErrorBoundary
+ if (!message.startsWith("Error caught by boundary:")) {
+ emitConsoleError(message);
+ }
+ } finally {
+ consoleGuardRef.current = false;
+ }
+ };
+ return () => {
+ console.error = original;
+ };
+ }, []);
+
+ // Reset page info on route change
+ useEffect(() => {
+ const matched = ROUTE_PATTERNS.find((r) => r.pattern === routePattern);
+ setPageInfo({
+ pageType: matched?.pageType ?? "unknown",
+ pageTitle: "",
+ pageData: {},
+ });
+ }, [routePattern]);
+
+ const registerPageContext = useCallback(
+ (data: {
+ pageType: string;
+ pageTitle: string;
+ pageData?: Record;
+ }) => {
+ setPageInfo({
+ pageType: data.pageType,
+ pageTitle: data.pageTitle,
+ pageData: data.pageData ?? {},
+ });
+ },
+ [],
+ );
+
+ const contextValue: PageContextValue = {
+ context: {
+ route: location.pathname,
+ routePattern,
+ routeParams,
+ pageType: pageInfo.pageType,
+ pageTitle: pageInfo.pageTitle,
+ pageData: pageInfo.pageData,
+ errors,
+ },
+ registerPageContext,
+ };
+
+ return (
+ {children}
+ );
+}
+
+/** Get the current page context (consumed by useAssistant). */
+export function usePageContext(): PageContextData {
+ const ctx = useContext(PageContext);
+ if (!ctx) {
+ throw new Error("usePageContext must be used within PageContextProvider");
+ }
+ return ctx.context;
+}
+
+/**
+ * Register page-specific context from a page component.
+ *
+ * Call this in a useEffect or at the top-level of your page component
+ * to advertise what the user is currently viewing.
+ */
+export function useRegisterPageContext(data: {
+ pageType: string;
+ pageTitle: string;
+ pageData?: Record;
+}): void {
+ const ctx = useContext(PageContext);
+ useEffect(() => {
+ if (ctx) {
+ ctx.registerPageContext(data);
+ }
+ // Only re-register when the serialized data changes
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [ctx, JSON.stringify(data)]);
+}
diff --git a/python-packages/dataing/migrations/035_dataing_assistant.sql b/python-packages/dataing/migrations/035_dataing_assistant.sql
new file mode 100644
index 000000000..2036b49be
--- /dev/null
+++ b/python-packages/dataing/migrations/035_dataing_assistant.sql
@@ -0,0 +1,115 @@
+-- Migration: 035_dataing_assistant.sql
+-- Dataing Assistant - Chat sessions, messages, and audit logging
+-- Epic fn-56: Dataing Assistant
+
+-- =============================================================================
+-- Session Table
+-- =============================================================================
+
+-- Assistant sessions - each session is linked to an investigation
+CREATE TABLE assistant_sessions (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ investigation_id UUID NOT NULL REFERENCES investigations(id) ON DELETE CASCADE,
+ tenant_id UUID NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
+ user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
+
+ -- Parent/child investigation linking
+ parent_investigation_id UUID REFERENCES investigations(id) ON DELETE SET NULL,
+ is_parent BOOLEAN DEFAULT false,
+
+ -- Session state
+ title TEXT, -- User-provided or auto-generated title
+ token_count INTEGER DEFAULT 0,
+ last_activity TIMESTAMPTZ DEFAULT NOW(),
+
+ -- Metadata (user preferences, panel size, etc.)
+ metadata JSONB DEFAULT '{}'::jsonb,
+
+ -- Timestamps
+ created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX idx_assistant_sessions_tenant ON assistant_sessions(tenant_id);
+CREATE INDEX idx_assistant_sessions_user ON assistant_sessions(user_id);
+CREATE INDEX idx_assistant_sessions_investigation ON assistant_sessions(investigation_id);
+CREATE INDEX idx_assistant_sessions_activity ON assistant_sessions(tenant_id, last_activity DESC);
+
+COMMENT ON TABLE assistant_sessions IS 'Chat sessions for the Dataing Assistant';
+COMMENT ON COLUMN assistant_sessions.investigation_id IS 'Each session creates its own investigation';
+COMMENT ON COLUMN assistant_sessions.parent_investigation_id IS 'Optional link to parent investigation for context';
+COMMENT ON COLUMN assistant_sessions.is_parent IS 'Whether this session is the parent of linked investigations';
+
+-- =============================================================================
+-- Message Table
+-- =============================================================================
+
+-- Messages within sessions
+CREATE TABLE assistant_messages (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ session_id UUID NOT NULL REFERENCES assistant_sessions(id) ON DELETE CASCADE,
+
+ -- Message content
+ role TEXT NOT NULL CHECK (role IN ('user', 'assistant', 'system', 'tool')),
+ content TEXT NOT NULL,
+
+ -- Tool tracking (for assistant/tool messages)
+ tool_calls JSONB, -- Array of {name, arguments, result}
+
+ -- Token usage
+ token_count INTEGER,
+
+ -- Timestamps
+ created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX idx_assistant_messages_session ON assistant_messages(session_id, created_at);
+
+COMMENT ON TABLE assistant_messages IS 'Messages in Dataing Assistant sessions';
+COMMENT ON COLUMN assistant_messages.tool_calls IS 'Tool calls made by assistant: [{name, arguments, result}]';
+
+-- =============================================================================
+-- Audit Log Table
+-- =============================================================================
+
+-- Audit log for security and debugging
+CREATE TABLE assistant_audit_log (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ session_id UUID NOT NULL REFERENCES assistant_sessions(id) ON DELETE CASCADE,
+
+ -- Action details
+ action TEXT NOT NULL, -- 'file_read', 'search', 'query', 'docker_status', 'git_read'
+ target TEXT NOT NULL, -- File path, query, container name, etc.
+ result_summary TEXT, -- Brief summary of result or error
+
+ -- Metadata
+ metadata JSONB DEFAULT '{}'::jsonb, -- Extra details (bytes read, lines returned, etc.)
+
+ -- Timestamps
+ created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX idx_assistant_audit_session ON assistant_audit_log(session_id, created_at);
+CREATE INDEX idx_assistant_audit_action ON assistant_audit_log(action, created_at DESC);
+
+COMMENT ON TABLE assistant_audit_log IS 'Audit log of tool usage in Dataing Assistant';
+COMMENT ON COLUMN assistant_audit_log.action IS 'Tool action: file_read, search, query, docker_status, git_read';
+COMMENT ON COLUMN assistant_audit_log.target IS 'Target of action: file path, SQL query, container name, etc.';
+
+-- =============================================================================
+-- Trigger for last_activity update
+-- =============================================================================
+
+-- Auto-update last_activity when messages are added
+CREATE OR REPLACE FUNCTION update_assistant_session_activity()
+RETURNS TRIGGER AS $$
+BEGIN
+ UPDATE assistant_sessions
+ SET last_activity = NOW()
+ WHERE id = NEW.session_id;
+ RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE TRIGGER assistant_message_activity_trigger
+ AFTER INSERT ON assistant_messages
+ FOR EACH ROW EXECUTE FUNCTION update_assistant_session_activity();
diff --git a/python-packages/dataing/openapi.json b/python-packages/dataing/openapi.json
index a7dedc09a..ba4fcf628 100644
--- a/python-packages/dataing/openapi.json
+++ b/python-packages/dataing/openapi.json
@@ -327,14 +327,14 @@
}
}
},
- "/api/v1/asset-instances/search": {
- "get": {
+ "/api/v1/assistant/sessions": {
+ "post": {
"tags": [
- "asset-instances"
+ "assistant"
],
- "summary": "Search Asset Instances",
- "description": "Search for asset instances (tables/views) across all tenant datasources.\n\nReturns fully qualified asset instances with datasource context,\nsuitable for binding to a notebook context.\n\nResults are ranked by match quality:\n- name_prefix: Query matches start of table name\n- path_match: Query matches in native path\n- fuzzy: Query appears anywhere in name or path",
- "operationId": "search_asset_instances_api_v1_asset_instances_search_get",
+ "summary": "Create Session",
+ "description": "Create a new assistant session.\n\nEach session is linked to an investigation for tracking and context.",
+ "operationId": "create_session_api_v1_assistant_sessions_post",
"security": [
{
"APIKeyHeader": []
@@ -343,79 +343,23 @@
"HTTPBearer": []
}
],
- "parameters": [
- {
- "name": "q",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string",
- "minLength": 1,
- "maxLength": 200,
- "description": "Search query (min 1 char)",
- "title": "Q"
- },
- "description": "Search query (min 1 char)"
- },
- {
- "name": "limit",
- "in": "query",
- "required": false,
- "schema": {
- "type": "integer",
- "maximum": 100,
- "minimum": 1,
- "description": "Max results (default 10, max 100)",
- "default": 10,
- "title": "Limit"
- },
- "description": "Max results (default 10, max 100)"
- },
- {
- "name": "cursor",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Opaque pagination cursor",
- "title": "Cursor"
- },
- "description": "Opaque pagination cursor"
- },
- {
- "name": "datasource_id",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string",
- "format": "uuid"
- },
- {
- "type": "null"
- }
- ],
- "description": "Filter to single datasource",
- "title": "Datasource Id"
- },
- "description": "Filter to single datasource"
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/CreateSessionRequest"
+ }
+ }
}
- ],
+ },
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/AssetInstanceSearchResponse"
+ "$ref": "#/components/schemas/CreateSessionResponse"
}
}
}
@@ -431,32 +375,14 @@
}
}
}
- }
- },
- "/api/v1/investigations": {
+ },
"get": {
"tags": [
- "investigations"
+ "assistant"
],
- "summary": "List Investigations",
- "description": "List all investigations for the tenant.\n\nArgs:\n auth: Authentication context from API key/JWT.\n db: Application database.\n\nReturns:\n List of investigations.",
- "operationId": "list_investigations_api_v1_investigations_get",
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "items": {
- "$ref": "#/components/schemas/InvestigationListItem"
- },
- "type": "array",
- "title": "Response List Investigations Api V1 Investigations Get"
- }
- }
- }
- }
- },
+ "summary": "List Sessions",
+ "description": "List the user's assistant sessions.",
+ "operationId": "list_sessions_api_v1_assistant_sessions_get",
"security": [
{
"APIKeyHeader": []
@@ -464,32 +390,39 @@
{
"HTTPBearer": []
}
- ]
- },
- "post": {
- "tags": [
- "investigations"
],
- "summary": "Start Investigation",
- "description": "Start a new investigation for an alert.\n\nCreates a new investigation with Temporal workflow for durable execution.\n\nArgs:\n http_request: The HTTP request for accessing app state.\n request: The investigation request containing alert data.\n auth: Authentication context from API key/JWT.\n db: Application database.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n StartInvestigationResponse with investigation and branch IDs.",
- "operationId": "start_investigation_api_v1_investigations_post",
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/StartInvestigationRequest"
- }
+ "parameters": [
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 100,
+ "minimum": 1,
+ "default": 20,
+ "title": "Limit"
}
},
- "required": true
- },
+ {
+ "name": "offset",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "minimum": 0,
+ "default": 0,
+ "title": "Offset"
+ }
+ }
+ ],
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/StartInvestigationResponse"
+ "$ref": "#/components/schemas/ListSessionsResponse"
}
}
}
@@ -504,25 +437,17 @@
}
}
}
- },
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
- ]
+ }
}
},
- "/api/v1/investigations/{investigation_id}/cancel": {
- "post": {
+ "/api/v1/assistant/investigations/{investigation_id}/sessions": {
+ "get": {
"tags": [
- "investigations"
+ "assistant"
],
- "summary": "Cancel Investigation",
- "description": "Cancel an investigation and all its child workflows.\n\nArgs:\n investigation_id: UUID of the investigation to cancel.\n auth: Authentication context from API key/JWT.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n CancelInvestigationResponse with cancellation status.\n\nRaises:\n HTTPException: If investigation not found or already complete.",
- "operationId": "cancel_investigation_api_v1_investigations__investigation_id__cancel_post",
+ "summary": "List Sessions For Investigation",
+ "description": "List assistant sessions linked to an investigation.\n\nReturns sessions where the investigation is the parent.",
+ "operationId": "list_sessions_for_investigation_api_v1_assistant_investigations__investigation_id__sessions_get",
"security": [
{
"APIKeyHeader": []
@@ -541,6 +466,29 @@
"format": "uuid",
"title": "Investigation Id"
}
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 100,
+ "minimum": 1,
+ "default": 20,
+ "title": "Limit"
+ }
+ },
+ {
+ "name": "offset",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "minimum": 0,
+ "default": 0,
+ "title": "Offset"
+ }
}
],
"responses": {
@@ -549,7 +497,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/CancelInvestigationResponse"
+ "$ref": "#/components/schemas/ListSessionsResponse"
}
}
}
@@ -567,14 +515,14 @@
}
}
},
- "/api/v1/investigations/{investigation_id}": {
+ "/api/v1/assistant/sessions/{session_id}": {
"get": {
"tags": [
- "investigations"
+ "assistant"
],
- "summary": "Get Investigation",
- "description": "Get investigation state from Temporal workflow.\n\nReturns the current state of the investigation including progress\nand any available results.\n\nArgs:\n investigation_id: UUID of the investigation.\n auth: Authentication context from API key/JWT.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n InvestigationStateResponse with main branch state.\n\nRaises:\n HTTPException: If investigation not found.",
- "operationId": "get_investigation_api_v1_investigations__investigation_id__get",
+ "summary": "Get Session",
+ "description": "Get full session details with messages.",
+ "operationId": "get_session_api_v1_assistant_sessions__session_id__get",
"security": [
{
"APIKeyHeader": []
@@ -585,13 +533,13 @@
],
"parameters": [
{
- "name": "investigation_id",
+ "name": "session_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Investigation Id"
+ "title": "Session Id"
}
}
],
@@ -601,7 +549,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/InvestigationStateResponse"
+ "$ref": "#/components/schemas/SessionDetailResponse"
}
}
}
@@ -617,16 +565,14 @@
}
}
}
- }
- },
- "/api/v1/investigations/{investigation_id}/verify": {
- "get": {
+ },
+ "delete": {
"tags": [
- "investigations"
+ "assistant"
],
- "summary": "Verify Investigation",
- "description": "Verify the integrity of an investigation's evidence hash chain.\n\nValidates that evidence items have not been tampered with by checking\ncontent hashes and chain linkage.\n\nArgs:\n investigation_id: UUID of the investigation.\n auth: Authentication context from API key/JWT.\n db: Application database.\n\nReturns:\n ChainVerificationResponse with verification result.\n\nRaises:\n HTTPException: If investigation not found.",
- "operationId": "verify_investigation_api_v1_investigations__investigation_id__verify_get",
+ "summary": "Delete Session",
+ "description": "Delete an assistant session.",
+ "operationId": "delete_session_api_v1_assistant_sessions__session_id__delete",
"security": [
{
"APIKeyHeader": []
@@ -637,13 +583,13 @@
],
"parameters": [
{
- "name": "investigation_id",
+ "name": "session_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Investigation Id"
+ "title": "Session Id"
}
}
],
@@ -653,7 +599,11 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ChainVerificationResponse"
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ },
+ "title": "Response Delete Session Api V1 Assistant Sessions Session Id Delete"
}
}
}
@@ -671,14 +621,14 @@
}
}
},
- "/api/v1/investigations/{investigation_id}/messages": {
+ "/api/v1/assistant/sessions/{session_id}/messages": {
"post": {
"tags": [
- "investigations"
+ "assistant"
],
"summary": "Send Message",
- "description": "Send a message to an investigation via Temporal signal.\n\nArgs:\n investigation_id: UUID of the investigation.\n request: The message request.\n auth: Authentication context from API key/JWT.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n SendMessageResponse with status.\n\nRaises:\n HTTPException: If failed to send message.",
- "operationId": "send_message_api_v1_investigations__investigation_id__messages_post",
+ "description": "Send a message to the assistant.\n\nThe response will be streamed via the /stream endpoint.",
+ "operationId": "send_message_api_v1_assistant_sessions__session_id__messages_post",
"security": [
{
"APIKeyHeader": []
@@ -689,13 +639,13 @@
],
"parameters": [
{
- "name": "investigation_id",
+ "name": "session_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Investigation Id"
+ "title": "Session Id"
}
}
],
@@ -704,7 +654,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SendMessageRequest"
+ "$ref": "#/components/schemas/dataing__entrypoints__api__routes__assistant__SendMessageRequest"
}
}
}
@@ -715,7 +665,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SendMessageResponse"
+ "$ref": "#/components/schemas/dataing__entrypoints__api__routes__assistant__SendMessageResponse"
}
}
}
@@ -733,14 +683,14 @@
}
}
},
- "/api/v1/investigations/{investigation_id}/status": {
+ "/api/v1/assistant/sessions/{session_id}/stream": {
"get": {
"tags": [
- "investigations"
+ "assistant"
],
- "summary": "Get Investigation Status",
- "description": "Get the status of an investigation.\n\nQueries the Temporal workflow for real-time progress.\n\nArgs:\n investigation_id: UUID of the investigation.\n auth: Authentication context from API key/JWT.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n TemporalStatusResponse with current progress and state.",
- "operationId": "get_investigation_status_api_v1_investigations__investigation_id__status_get",
+ "summary": "Stream Response",
+ "description": "Stream assistant responses via Server-Sent Events.\n\nConnect to this endpoint after sending a message to receive real-time\nupdates including text chunks, tool calls, and completion status.",
+ "operationId": "stream_response_api_v1_assistant_sessions__session_id__stream_get",
"security": [
{
"APIKeyHeader": []
@@ -751,14 +701,32 @@
],
"parameters": [
{
- "name": "investigation_id",
+ "name": "session_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Investigation Id"
+ "title": "Session Id"
}
+ },
+ {
+ "name": "last_event_id",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "integer"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Resume from event ID",
+ "title": "Last Event Id"
+ },
+ "description": "Resume from event ID"
}
],
"responses": {
@@ -766,9 +734,7 @@
"description": "Successful Response",
"content": {
"application/json": {
- "schema": {
- "$ref": "#/components/schemas/TemporalStatusResponse"
- }
+ "schema": {}
}
}
},
@@ -785,14 +751,14 @@
}
}
},
- "/api/v1/investigations/{investigation_id}/input": {
+ "/api/v1/assistant/sessions/{session_id}/export": {
"post": {
"tags": [
- "investigations"
+ "assistant"
],
- "summary": "Send User Input",
- "description": "Send user input to an investigation awaiting feedback.\n\nThis endpoint sends a signal to the Temporal workflow when it's\nin AWAIT_USER state.\n\nArgs:\n investigation_id: UUID of the investigation.\n request: User input payload.\n auth: Authentication context from API key/JWT.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n Confirmation message.",
- "operationId": "send_user_input_api_v1_investigations__investigation_id__input_post",
+ "summary": "Export Session",
+ "description": "Export a session as JSON or Markdown.",
+ "operationId": "export_session_api_v1_assistant_sessions__session_id__export_post",
"security": [
{
"APIKeyHeader": []
@@ -803,26 +769,25 @@
],
"parameters": [
{
- "name": "investigation_id",
+ "name": "session_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Investigation Id"
+ "title": "Session Id"
}
- }
- ],
- "requestBody": {
- "required": true,
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/UserInputRequest"
- }
+ },
+ {
+ "name": "format",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "$ref": "#/components/schemas/ExportFormat",
+ "default": "markdown"
}
}
- },
+ ],
"responses": {
"200": {
"description": "Successful Response",
@@ -830,10 +795,8 @@
"application/json": {
"schema": {
"type": "object",
- "additionalProperties": {
- "type": "string"
- },
- "title": "Response Send User Input Api V1 Investigations Investigation Id Input Post"
+ "additionalProperties": true,
+ "title": "Response Export Session Api V1 Assistant Sessions Session Id Export Post"
}
}
}
@@ -851,14 +814,202 @@
}
}
},
- "/api/v1/investigations/{investigation_id}/stream": {
+ "/api/v1/asset-instances/search": {
+ "get": {
+ "tags": [
+ "asset-instances"
+ ],
+ "summary": "Search Asset Instances",
+ "description": "Search for asset instances (tables/views) across all tenant datasources.\n\nReturns fully qualified asset instances with datasource context,\nsuitable for binding to a notebook context.\n\nResults are ranked by match quality:\n- name_prefix: Query matches start of table name\n- path_match: Query matches in native path\n- fuzzy: Query appears anywhere in name or path",
+ "operationId": "search_asset_instances_api_v1_asset_instances_search_get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "q",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 200,
+ "description": "Search query (min 1 char)",
+ "title": "Q"
+ },
+ "description": "Search query (min 1 char)"
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 100,
+ "minimum": 1,
+ "description": "Max results (default 10, max 100)",
+ "default": 10,
+ "title": "Limit"
+ },
+ "description": "Max results (default 10, max 100)"
+ },
+ {
+ "name": "cursor",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Opaque pagination cursor",
+ "title": "Cursor"
+ },
+ "description": "Opaque pagination cursor"
+ },
+ {
+ "name": "datasource_id",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string",
+ "format": "uuid"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Filter to single datasource",
+ "title": "Datasource Id"
+ },
+ "description": "Filter to single datasource"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/AssetInstanceSearchResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/investigations": {
"get": {
"tags": [
"investigations"
],
- "summary": "Stream Updates",
- "description": "Stream real-time updates via SSE.\n\nReturns a Server-Sent Events stream that pushes investigation\nupdates as they occur by polling the Temporal workflow.\n\nArgs:\n investigation_id: UUID of the investigation.\n auth: Authentication context from API key/JWT.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n EventSourceResponse with SSE stream.",
- "operationId": "stream_updates_api_v1_investigations__investigation_id__stream_get",
+ "summary": "List Investigations",
+ "description": "List all investigations for the tenant.\n\nArgs:\n auth: Authentication context from API key/JWT.\n db: Application database.\n\nReturns:\n List of investigations.",
+ "operationId": "list_investigations_api_v1_investigations_get",
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "items": {
+ "$ref": "#/components/schemas/InvestigationListItem"
+ },
+ "type": "array",
+ "title": "Response List Investigations Api V1 Investigations Get"
+ }
+ }
+ }
+ }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ },
+ "post": {
+ "tags": [
+ "investigations"
+ ],
+ "summary": "Start Investigation",
+ "description": "Start a new investigation for an alert.\n\nCreates a new investigation with Temporal workflow for durable execution.\n\nArgs:\n http_request: The HTTP request for accessing app state.\n request: The investigation request containing alert data.\n auth: Authentication context from API key/JWT.\n db: Application database.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n StartInvestigationResponse with investigation and branch IDs.",
+ "operationId": "start_investigation_api_v1_investigations_post",
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/StartInvestigationRequest"
+ }
+ }
+ },
+ "required": true
+ },
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/StartInvestigationResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/investigations/{investigation_id}/cancel": {
+ "post": {
+ "tags": [
+ "investigations"
+ ],
+ "summary": "Cancel Investigation",
+ "description": "Cancel an investigation and all its child workflows.\n\nArgs:\n investigation_id: UUID of the investigation to cancel.\n auth: Authentication context from API key/JWT.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n CancelInvestigationResponse with cancellation status.\n\nRaises:\n HTTPException: If investigation not found or already complete.",
+ "operationId": "cancel_investigation_api_v1_investigations__investigation_id__cancel_post",
"security": [
{
"APIKeyHeader": []
@@ -884,7 +1035,9 @@
"description": "Successful Response",
"content": {
"application/json": {
- "schema": {}
+ "schema": {
+ "$ref": "#/components/schemas/CancelInvestigationResponse"
+ }
}
}
},
@@ -901,14 +1054,14 @@
}
}
},
- "/api/v1/investigations/{investigation_id}/events": {
+ "/api/v1/investigations/{investigation_id}": {
"get": {
"tags": [
"investigations"
],
- "summary": "Stream Events",
- "description": "Stream SSE events for an investigation.\n\nEvents have an integer `seq` field for resumption.\nUse `?seq=N` to resume from sequence N.\n\nReturns 410 Gone if the replay window has expired.\n\nArgs:\n request: FastAPI request object.\n investigation_id: UUID of the investigation.\n auth: Authentication context from API key/JWT.\n db: Application database.\n temporal_client: Temporal client for status polling.\n last_event_id: Optional sequence number to resume from.\n\nReturns:\n EventSourceResponse with SSE stream.",
- "operationId": "stream_events_api_v1_investigations__investigation_id__events_get",
+ "summary": "Get Investigation",
+ "description": "Get investigation state from Temporal workflow.\n\nReturns the current state of the investigation including progress\nand any available results.\n\nArgs:\n investigation_id: UUID of the investigation.\n auth: Authentication context from API key/JWT.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n InvestigationStateResponse with main branch state.\n\nRaises:\n HTTPException: If investigation not found.",
+ "operationId": "get_investigation_api_v1_investigations__investigation_id__get",
"security": [
{
"APIKeyHeader": []
@@ -927,24 +1080,6 @@
"format": "uuid",
"title": "Investigation Id"
}
- },
- {
- "name": "seq",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "integer"
- },
- {
- "type": "null"
- }
- ],
- "description": "Resume from this sequence number",
- "title": "Seq"
- },
- "description": "Resume from this sequence number"
}
],
"responses": {
@@ -952,7 +1087,9 @@
"description": "Successful Response",
"content": {
"application/json": {
- "schema": {}
+ "schema": {
+ "$ref": "#/components/schemas/InvestigationStateResponse"
+ }
}
}
},
@@ -969,14 +1106,14 @@
}
}
},
- "/api/v1/issues": {
+ "/api/v1/investigations/{investigation_id}/verify": {
"get": {
"tags": [
- "issues"
+ "investigations"
],
- "summary": "List Issues",
- "description": "List issues with filters and cursor-based pagination.\n\nUses cursor-based pagination with base64(updated_at|id) format.\nReturns issues ordered by updated_at descending.",
- "operationId": "list_issues_api_v1_issues_get",
+ "summary": "Verify Investigation",
+ "description": "Verify the integrity of an investigation's evidence hash chain.\n\nValidates that evidence items have not been tampered with by checking\ncontent hashes and chain linkage.\n\nArgs:\n investigation_id: UUID of the investigation.\n auth: Authentication context from API key/JWT.\n db: Application database.\n\nReturns:\n ChainVerificationResponse with verification result.\n\nRaises:\n HTTPException: If investigation not found.",
+ "operationId": "verify_investigation_api_v1_investigations__investigation_id__verify_get",
"security": [
{
"APIKeyHeader": []
@@ -987,127 +1124,14 @@
],
"parameters": [
{
- "name": "status",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Filter by status",
- "title": "Status"
- },
- "description": "Filter by status"
- },
- {
- "name": "priority",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Filter by priority",
- "title": "Priority"
- },
- "description": "Filter by priority"
- },
- {
- "name": "severity",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Filter by severity",
- "title": "Severity"
- },
- "description": "Filter by severity"
- },
- {
- "name": "assignee",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string",
- "format": "uuid"
- },
- {
- "type": "null"
- }
- ],
- "description": "Filter by assignee",
- "title": "Assignee"
- },
- "description": "Filter by assignee"
- },
- {
- "name": "search",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Full-text search",
- "title": "Search"
- },
- "description": "Full-text search"
- },
- {
- "name": "cursor",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Pagination cursor",
- "title": "Cursor"
- },
- "description": "Pagination cursor"
- },
- {
- "name": "limit",
- "in": "query",
- "required": false,
+ "name": "investigation_id",
+ "in": "path",
+ "required": true,
"schema": {
- "type": "integer",
- "maximum": 100,
- "minimum": 1,
- "description": "Max issues",
- "default": 50,
- "title": "Limit"
- },
- "description": "Max issues"
+ "type": "string",
+ "format": "uuid",
+ "title": "Investigation Id"
+ }
}
],
"responses": {
@@ -1116,7 +1140,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/IssueListResponse"
+ "$ref": "#/components/schemas/ChainVerificationResponse"
}
}
}
@@ -1132,14 +1156,16 @@
}
}
}
- },
+ }
+ },
+ "/api/v1/investigations/{investigation_id}/codify": {
"post": {
"tags": [
- "issues"
+ "investigations"
],
- "summary": "Create Issue",
- "description": "Create a new issue.\n\nIssues are created in OPEN status. Number is auto-assigned per-tenant.",
- "operationId": "create_issue_api_v1_issues_post",
+ "summary": "Codify Investigation",
+ "description": "Generate regression tests from an investigation's synthesis.\n\nExtracts testable assertions from the investigation synthesis and renders\nthem to the specified format (Great Expectations, dbt, Soda, or SQL).\n\nArgs:\n investigation_id: UUID of the investigation.\n request: Codify request with output format.\n auth: Authentication context from API key/JWT.\n db: Application database for test tracking.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n CodifyResponse with rendered test content.\n\nRaises:\n HTTPException: If investigation not found or no synthesis available.",
+ "operationId": "codify_investigation_api_v1_investigations__investigation_id__codify_post",
"security": [
{
"APIKeyHeader": []
@@ -1148,23 +1174,35 @@
"HTTPBearer": []
}
],
+ "parameters": [
+ {
+ "name": "investigation_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Investigation Id"
+ }
+ }
+ ],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/IssueCreate"
+ "$ref": "#/components/schemas/CodifyRequest"
}
}
}
},
"responses": {
- "201": {
+ "200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/IssueResponse"
+ "$ref": "#/components/schemas/CodifyResponse"
}
}
}
@@ -1182,14 +1220,14 @@
}
}
},
- "/api/v1/issues/{issue_id}": {
+ "/api/v1/investigations/tests/stats": {
"get": {
"tags": [
- "issues"
+ "investigations"
],
- "summary": "Get Issue",
- "description": "Get issue by ID.\n\nReturns the full issue if user has access, 404 if not found.",
- "operationId": "get_issue_api_v1_issues__issue_id__get",
+ "summary": "Get Test Tracking Stats",
+ "description": "Get test tracking statistics.\n\nReturns metrics on tests generated, adopted, and issues caught.\n\nArgs:\n auth: Authentication context from API key/JWT.\n db: Application database.\n days: Number of days to look back.\n\nReturns:\n TestTrackingStatsResponse with statistics.",
+ "operationId": "get_test_tracking_stats_api_v1_investigations_tests_stats_get",
"security": [
{
"APIKeyHeader": []
@@ -1200,14 +1238,18 @@
],
"parameters": [
{
- "name": "issue_id",
- "in": "path",
- "required": true,
+ "name": "days",
+ "in": "query",
+ "required": false,
"schema": {
- "type": "string",
- "format": "uuid",
- "title": "Issue Id"
- }
+ "type": "integer",
+ "maximum": 365,
+ "minimum": 1,
+ "description": "Days to look back",
+ "default": 30,
+ "title": "Days"
+ },
+ "description": "Days to look back"
}
],
"responses": {
@@ -1216,7 +1258,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/IssueResponse"
+ "$ref": "#/components/schemas/TestTrackingStatsResponse"
}
}
}
@@ -1232,14 +1274,16 @@
}
}
}
- },
- "patch": {
+ }
+ },
+ "/api/v1/investigations/tests/catches": {
+ "get": {
"tags": [
- "issues"
+ "investigations"
],
- "summary": "Update Issue",
- "description": "Update issue fields.\n\nEnforces state machine transitions when status is changed.",
- "operationId": "update_issue_api_v1_issues__issue_id__patch",
+ "summary": "Get Recent Catches",
+ "description": "Get recent tests that caught issues.\n\nReturns a list of recent test failures (issues caught).\n\nArgs:\n auth: Authentication context from API key/JWT.\n db: Application database.\n limit: Maximum number of results.\n\nReturns:\n List of recent catches.",
+ "operationId": "get_recent_catches_api_v1_investigations_tests_catches_get",
"security": [
{
"APIKeyHeader": []
@@ -1250,25 +1294,65 @@
],
"parameters": [
{
- "name": "issue_id",
- "in": "path",
- "required": true,
+ "name": "limit",
+ "in": "query",
+ "required": false,
"schema": {
- "type": "string",
- "format": "uuid",
- "title": "Issue Id"
+ "type": "integer",
+ "maximum": 100,
+ "minimum": 1,
+ "description": "Maximum results",
+ "default": 10,
+ "title": "Limit"
+ },
+ "description": "Maximum results"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/RecentCatchResponse"
+ },
+ "title": "Response Get Recent Catches Api V1 Investigations Tests Catches Get"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
}
}
+ }
+ }
+ },
+ "/api/v1/investigations/tests/adopt": {
+ "post": {
+ "tags": [
+ "investigations"
],
+ "summary": "Adopt Test",
+ "description": "Mark a generated test as adopted.\n\nCall this when a test has been added to the user's project.\n\nArgs:\n request: Adoption request with test ID.\n auth: Authentication context from API key/JWT.\n db: Application database.\n\nReturns:\n Status message.",
+ "operationId": "adopt_test_api_v1_investigations_tests_adopt_post",
"requestBody": {
- "required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/IssueUpdate"
+ "$ref": "#/components/schemas/TestAdoptionRequest"
}
}
- }
+ },
+ "required": true
},
"responses": {
"200": {
@@ -1276,7 +1360,11 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/IssueResponse"
+ "additionalProperties": {
+ "type": "string"
+ },
+ "type": "object",
+ "title": "Response Adopt Test Api V1 Investigations Tests Adopt Post"
}
}
}
@@ -1291,17 +1379,7 @@
}
}
}
- }
- }
- },
- "/api/v1/issues/{issue_id}/comments": {
- "get": {
- "tags": [
- "issues"
- ],
- "summary": "List Issue Comments",
- "description": "List comments for an issue.",
- "operationId": "list_issue_comments_api_v1_issues__issue_id__comments_get",
+ },
"security": [
{
"APIKeyHeader": []
@@ -1309,26 +1387,38 @@
{
"HTTPBearer": []
}
+ ]
+ }
+ },
+ "/api/v1/investigations/tests/run": {
+ "post": {
+ "tags": [
+ "investigations"
],
- "parameters": [
- {
- "name": "issue_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string",
- "format": "uuid",
- "title": "Issue Id"
- }
- }
- ],
+ "summary": "Record Test Run",
+ "description": "Record a test run result.\n\nCall this when a generated test has been executed.\n\nArgs:\n request: Test run result.\n auth: Authentication context from API key/JWT.\n db: Application database.\n\nReturns:\n Status message.",
+ "operationId": "record_test_run_api_v1_investigations_tests_run_post",
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/TestRunResultRequest"
+ }
+ }
+ },
+ "required": true
+ },
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/IssueCommentListResponse"
+ "additionalProperties": {
+ "type": "string"
+ },
+ "type": "object",
+ "title": "Response Record Test Run Api V1 Investigations Tests Run Post"
}
}
}
@@ -1343,15 +1433,25 @@
}
}
}
- }
- },
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/investigations/{investigation_id}/messages": {
"post": {
"tags": [
- "issues"
+ "investigations"
],
- "summary": "Create Issue Comment",
- "description": "Add a comment to an issue.\n\nRequires user identity (JWT auth or user-scoped API key).",
- "operationId": "create_issue_comment_api_v1_issues__issue_id__comments_post",
+ "summary": "Send Message",
+ "description": "Send a message to an investigation via Temporal signal.\n\nArgs:\n investigation_id: UUID of the investigation.\n request: The message request.\n auth: Authentication context from API key/JWT.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n SendMessageResponse with status.\n\nRaises:\n HTTPException: If failed to send message.",
+ "operationId": "send_message_api_v1_investigations__investigation_id__messages_post",
"security": [
{
"APIKeyHeader": []
@@ -1362,13 +1462,13 @@
],
"parameters": [
{
- "name": "issue_id",
+ "name": "investigation_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Issue Id"
+ "title": "Investigation Id"
}
}
],
@@ -1377,18 +1477,18 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/IssueCommentCreate"
+ "$ref": "#/components/schemas/SendMessageRequest"
}
}
}
},
"responses": {
- "201": {
+ "200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/IssueCommentResponse"
+ "$ref": "#/components/schemas/SendMessageResponse"
}
}
}
@@ -1406,14 +1506,14 @@
}
}
},
- "/api/v1/issues/{issue_id}/watchers": {
+ "/api/v1/investigations/{investigation_id}/status": {
"get": {
"tags": [
- "issues"
+ "investigations"
],
- "summary": "List Issue Watchers",
- "description": "List watchers for an issue.",
- "operationId": "list_issue_watchers_api_v1_issues__issue_id__watchers_get",
+ "summary": "Get Investigation Status",
+ "description": "Get the status of an investigation.\n\nQueries the Temporal workflow for real-time progress.\n\nArgs:\n investigation_id: UUID of the investigation.\n auth: Authentication context from API key/JWT.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n TemporalStatusResponse with current progress and state.",
+ "operationId": "get_investigation_status_api_v1_investigations__investigation_id__status_get",
"security": [
{
"APIKeyHeader": []
@@ -1424,13 +1524,13 @@
],
"parameters": [
{
- "name": "issue_id",
+ "name": "investigation_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Issue Id"
+ "title": "Investigation Id"
}
}
],
@@ -1440,7 +1540,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/WatcherListResponse"
+ "$ref": "#/components/schemas/TemporalStatusResponse"
}
}
}
@@ -1458,14 +1558,14 @@
}
}
},
- "/api/v1/issues/{issue_id}/watch": {
+ "/api/v1/investigations/{investigation_id}/input": {
"post": {
"tags": [
- "issues"
+ "investigations"
],
- "summary": "Add Issue Watcher",
- "description": "Subscribe the current user as a watcher.\n\nIdempotent - returns 204 even if already watching.\nRequires user identity (JWT auth or user-scoped API key).",
- "operationId": "add_issue_watcher_api_v1_issues__issue_id__watch_post",
+ "summary": "Send User Input",
+ "description": "Send user input to an investigation awaiting feedback.\n\nThis endpoint sends a signal to the Temporal workflow when it's\nin AWAIT_USER state.\n\nArgs:\n investigation_id: UUID of the investigation.\n request: User input payload.\n auth: Authentication context from API key/JWT.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n Confirmation message.",
+ "operationId": "send_user_input_api_v1_investigations__investigation_id__input_post",
"security": [
{
"APIKeyHeader": []
@@ -1476,19 +1576,40 @@
],
"parameters": [
{
- "name": "issue_id",
+ "name": "investigation_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Issue Id"
+ "title": "Investigation Id"
}
}
],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/UserInputRequest"
+ }
+ }
+ }
+ },
"responses": {
- "204": {
- "description": "Successful Response"
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ },
+ "title": "Response Send User Input Api V1 Investigations Investigation Id Input Post"
+ }
+ }
+ }
},
"422": {
"description": "Validation Error",
@@ -1501,14 +1622,16 @@
}
}
}
- },
- "delete": {
+ }
+ },
+ "/api/v1/investigations/{investigation_id}/stream": {
+ "get": {
"tags": [
- "issues"
+ "investigations"
],
- "summary": "Remove Issue Watcher",
- "description": "Unsubscribe the current user as a watcher.\n\nIdempotent - returns 204 even if not watching.\nRequires user identity (JWT auth or user-scoped API key).",
- "operationId": "remove_issue_watcher_api_v1_issues__issue_id__watch_delete",
+ "summary": "Stream Updates",
+ "description": "Stream real-time updates via SSE.\n\nReturns a Server-Sent Events stream that pushes investigation\nupdates as they occur by polling the Temporal workflow.\n\nArgs:\n investigation_id: UUID of the investigation.\n auth: Authentication context from API key/JWT.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n EventSourceResponse with SSE stream.",
+ "operationId": "stream_updates_api_v1_investigations__investigation_id__stream_get",
"security": [
{
"APIKeyHeader": []
@@ -1519,19 +1642,24 @@
],
"parameters": [
{
- "name": "issue_id",
+ "name": "investigation_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Issue Id"
+ "title": "Investigation Id"
}
}
],
"responses": {
- "204": {
- "description": "Successful Response"
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {}
+ }
+ }
},
"422": {
"description": "Validation Error",
@@ -1546,14 +1674,14 @@
}
}
},
- "/api/v1/issues/{issue_id}/investigation-runs": {
+ "/api/v1/investigations/{investigation_id}/events": {
"get": {
"tags": [
- "issues"
+ "investigations"
],
- "summary": "List Investigation Runs",
- "description": "List investigation runs for an issue.",
- "operationId": "list_investigation_runs_api_v1_issues__issue_id__investigation_runs_get",
+ "summary": "Stream Events",
+ "description": "Stream SSE events for an investigation.\n\nEvents have an integer `seq` field for resumption.\nUse `?seq=N` to resume from sequence N.\n\nReturns 410 Gone if the replay window has expired.\n\nArgs:\n request: FastAPI request object.\n investigation_id: UUID of the investigation.\n auth: Authentication context from API key/JWT.\n db: Application database.\n temporal_client: Temporal client for status polling.\n last_event_id: Optional sequence number to resume from.\n\nReturns:\n EventSourceResponse with SSE stream.",
+ "operationId": "stream_events_api_v1_investigations__investigation_id__events_get",
"security": [
{
"APIKeyHeader": []
@@ -1564,14 +1692,32 @@
],
"parameters": [
{
- "name": "issue_id",
+ "name": "investigation_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Issue Id"
+ "title": "Investigation Id"
}
+ },
+ {
+ "name": "seq",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "integer"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Resume from this sequence number",
+ "title": "Seq"
+ },
+ "description": "Resume from this sequence number"
}
],
"responses": {
@@ -1579,9 +1725,7 @@
"description": "Successful Response",
"content": {
"application/json": {
- "schema": {
- "$ref": "#/components/schemas/InvestigationRunListResponse"
- }
+ "schema": {}
}
}
},
@@ -1596,14 +1740,16 @@
}
}
}
- },
- "post": {
+ }
+ },
+ "/api/v1/investigations/{investigation_id}/snapshots": {
+ "get": {
"tags": [
- "issues"
+ "investigations"
],
- "summary": "Spawn Investigation",
- "description": "Spawn an investigation from an issue.\n\nCreates a new investigation linked to this issue. The focus_prompt\nguides the investigation direction.\n\nRequires user identity (JWT auth or user-scoped API key).\nDeep profile may require approval depending on tenant settings.",
- "operationId": "spawn_investigation_api_v1_issues__issue_id__investigation_runs_post",
+ "summary": "List Snapshots",
+ "description": "List available snapshots for an investigation.\n\nArgs:\n investigation_id: UUID of the investigation.\n auth: Authentication context from API key/JWT.\n db: Application database.\n\nReturns:\n SnapshotListResponse with list of available snapshots.\n\nRaises:\n HTTPException: If investigation not found or access denied.",
+ "operationId": "list_snapshots_api_v1_investigations__investigation_id__snapshots_get",
"security": [
{
"APIKeyHeader": []
@@ -1614,33 +1760,23 @@
],
"parameters": [
{
- "name": "issue_id",
+ "name": "investigation_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Issue Id"
+ "title": "Investigation Id"
}
}
],
- "requestBody": {
- "required": true,
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/InvestigationRunCreate"
- }
- }
- }
- },
"responses": {
- "201": {
+ "200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/InvestigationRunResponse"
+ "$ref": "#/components/schemas/SnapshotListResponse"
}
}
}
@@ -1658,14 +1794,14 @@
}
}
},
- "/api/v1/issues/{issue_id}/events": {
+ "/api/v1/investigations/{investigation_id}/snapshots/{checkpoint}": {
"get": {
"tags": [
- "issues"
+ "investigations"
],
- "summary": "List Issue Events",
- "description": "List events for an issue (activity timeline).\n\nReturns events in reverse chronological order (newest first).\nSupports cursor-based pagination.",
- "operationId": "list_issue_events_api_v1_issues__issue_id__events_get",
+ "summary": "Download Snapshot",
+ "description": "Download a snapshot for local hydration.\n\nSupports streaming response for large snapshots and optional gzip compression.\n\nArgs:\n investigation_id: UUID of the investigation.\n checkpoint: The checkpoint to download (start, hypothesis_generated, etc).\n auth: Authentication context from API key/JWT.\n db: Application database.\n snapshot_store: Snapshot storage backend.\n accept_encoding: Accept-Encoding header for compression.\n\nReturns:\n StreamingResponse with snapshot data.\n\nRaises:\n HTTPException: If investigation not found, access denied, or snapshot missing.",
+ "operationId": "download_snapshot_api_v1_investigations__investigation_id__snapshots__checkpoint__get",
"security": [
{
"APIKeyHeader": []
@@ -1676,30 +1812,26 @@
],
"parameters": [
{
- "name": "issue_id",
+ "name": "investigation_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Issue Id"
+ "title": "Investigation Id"
}
},
{
- "name": "limit",
- "in": "query",
- "required": false,
+ "name": "checkpoint",
+ "in": "path",
+ "required": true,
"schema": {
- "type": "integer",
- "maximum": 100,
- "minimum": 1,
- "default": 50,
- "title": "Limit"
+ "$ref": "#/components/schemas/SnapshotCheckpointParam"
}
},
{
- "name": "cursor",
- "in": "query",
+ "name": "Accept-Encoding",
+ "in": "header",
"required": false,
"schema": {
"anyOf": [
@@ -1710,7 +1842,7 @@
"type": "null"
}
],
- "title": "Cursor"
+ "title": "Accept-Encoding"
}
}
],
@@ -1719,9 +1851,7 @@
"description": "Successful Response",
"content": {
"application/json": {
- "schema": {
- "$ref": "#/components/schemas/IssueEventListResponse"
- }
+ "schema": {}
}
}
},
@@ -1738,14 +1868,14 @@
}
}
},
- "/api/v1/issues/{issue_id}/stream": {
+ "/api/v1/investigations/{investigation_id}/snapshot": {
"get": {
"tags": [
- "issues"
+ "investigations"
],
- "summary": "Stream Issue Events",
- "description": "Stream real-time issue updates via Server-Sent Events.\n\nDelivers events as they occur:\n- status_changed, assigned, comment_added, label_added/removed\n- investigation_spawned, investigation_completed\n\nThe `after` parameter accepts an event ID to resume from.\nSends heartbeat every 30 seconds to prevent connection timeout.",
- "operationId": "stream_issue_events_api_v1_issues__issue_id__stream_get",
+ "summary": "Export Snapshot Archive",
+ "description": "Download investigation as a snapshot tar.gz archive.\n\nGenerates a compressed archive containing all evidence, lineage,\nand metadata needed to replay the investigation.\n\nArgs:\n investigation_id: UUID of the investigation.\n auth: Authentication context from API key/JWT.\n db: Application database.\n temporal_client: Temporal client for durable execution.\n\nReturns:\n StreamingResponse with tar.gz archive.\n\nRaises:\n HTTPException: If investigation not found or not complete.",
+ "operationId": "export_snapshot_archive_api_v1_investigations__investigation_id__snapshot_get",
"security": [
{
"APIKeyHeader": []
@@ -1756,39 +1886,23 @@
],
"parameters": [
{
- "name": "issue_id",
+ "name": "investigation_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Issue Id"
- }
- },
- {
- "name": "after",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "title": "After"
- }
- }
- ],
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {}
- }
+ "title": "Investigation Id"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {}
+ }
}
},
"422": {
@@ -1804,41 +1918,19 @@
}
}
},
- "/api/v1/datasources/types": {
- "get": {
- "tags": [
- "datasources"
- ],
- "summary": "List Source Types",
- "description": "List all supported data source types.\n\nReturns the configuration schema for each type, which can be used\nto dynamically generate connection forms in the frontend.",
- "operationId": "list_source_types_api_v1_datasources_types_get",
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/SourceTypesResponse"
- }
- }
- }
- }
- }
- }
- },
- "/api/v1/datasources/test": {
+ "/api/v1/investigations/import": {
"post": {
"tags": [
- "datasources"
+ "investigations"
],
- "summary": "Test Connection",
- "description": "Test a connection without saving it.\n\nUse this endpoint to validate connection settings before creating\na data source.",
- "operationId": "test_connection_api_v1_datasources_test_post",
+ "summary": "Import Snapshot Archive",
+ "description": "Import a snapshot archive as a replayed investigation.\n\nValidates the archive and creates a new investigation marked as a replay.\n\nArgs:\n auth: Authentication context from API key/JWT.\n db: Application database.\n file: The uploaded tar.gz file.\n\nReturns:\n ImportSnapshotResponse with new investigation ID.\n\nRaises:\n HTTPException: If file is invalid or too large.",
+ "operationId": "import_snapshot_archive_api_v1_investigations_import_post",
"requestBody": {
"content": {
- "application/json": {
+ "multipart/form-data": {
"schema": {
- "$ref": "#/components/schemas/TestConnectionRequest"
+ "$ref": "#/components/schemas/Body_import_snapshot_archive_api_v1_investigations_import_post"
}
}
},
@@ -1850,7 +1942,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/TestConnectionResponse"
+ "$ref": "#/components/schemas/ImportSnapshotResponse"
}
}
}
@@ -1865,29 +1957,188 @@
}
}
}
- }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
}
},
- "/api/v1/datasources": {
+ "/api/v1/issues": {
"get": {
"tags": [
- "datasources"
+ "issues"
+ ],
+ "summary": "List Issues",
+ "description": "List issues with filters and cursor-based pagination.\n\nUses cursor-based pagination with base64(updated_at|id) format.\nReturns issues ordered by updated_at descending.",
+ "operationId": "list_issues_api_v1_issues_get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "status",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Filter by status",
+ "title": "Status"
+ },
+ "description": "Filter by status"
+ },
+ {
+ "name": "priority",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Filter by priority",
+ "title": "Priority"
+ },
+ "description": "Filter by priority"
+ },
+ {
+ "name": "severity",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Filter by severity",
+ "title": "Severity"
+ },
+ "description": "Filter by severity"
+ },
+ {
+ "name": "assignee",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string",
+ "format": "uuid"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Filter by assignee",
+ "title": "Assignee"
+ },
+ "description": "Filter by assignee"
+ },
+ {
+ "name": "search",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Full-text search",
+ "title": "Search"
+ },
+ "description": "Full-text search"
+ },
+ {
+ "name": "cursor",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Pagination cursor",
+ "title": "Cursor"
+ },
+ "description": "Pagination cursor"
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 100,
+ "minimum": 1,
+ "description": "Max issues",
+ "default": 50,
+ "title": "Limit"
+ },
+ "description": "Max issues"
+ }
],
- "summary": "List Datasources",
- "description": "List all data sources for the current tenant.",
- "operationId": "list_datasources_api_v1_datasources_get",
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DataSourceListResponse"
+ "$ref": "#/components/schemas/IssueListResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
- },
+ }
+ },
+ "post": {
+ "tags": [
+ "issues"
+ ],
+ "summary": "Create Issue",
+ "description": "Create a new issue.\n\nIssues are created in OPEN status. Number is auto-assigned per-tenant.",
+ "operationId": "create_issue_api_v1_issues_post",
"security": [
{
"APIKeyHeader": []
@@ -1895,24 +2146,16 @@
{
"HTTPBearer": []
}
- ]
- },
- "post": {
- "tags": [
- "datasources"
],
- "summary": "Create Datasource",
- "description": "Create a new data source.\n\nTests the connection before saving. Returns 400 if connection test fails.",
- "operationId": "create_datasource_api_v1_datasources_post",
"requestBody": {
+ "required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/CreateDataSourceRequest"
+ "$ref": "#/components/schemas/IssueCreate"
}
}
- },
- "required": true
+ }
},
"responses": {
"201": {
@@ -1920,7 +2163,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DataSourceResponse"
+ "$ref": "#/components/schemas/IssueResponse"
}
}
}
@@ -1935,25 +2178,17 @@
}
}
}
- },
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
- ]
+ }
}
},
- "/api/v1/datasources/{datasource_id}": {
+ "/api/v1/issues/{issue_id}": {
"get": {
"tags": [
- "datasources"
+ "issues"
],
- "summary": "Get Datasource",
- "description": "Get a specific data source.",
- "operationId": "get_datasource_api_v1_datasources__datasource_id__get",
+ "summary": "Get Issue",
+ "description": "Get issue by ID.\n\nReturns the full issue if user has access, 404 if not found.",
+ "operationId": "get_issue_api_v1_issues__issue_id__get",
"security": [
{
"APIKeyHeader": []
@@ -1964,13 +2199,13 @@
],
"parameters": [
{
- "name": "datasource_id",
+ "name": "issue_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Datasource Id"
+ "title": "Issue Id"
}
}
],
@@ -1980,7 +2215,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DataSourceResponse"
+ "$ref": "#/components/schemas/IssueResponse"
}
}
}
@@ -1997,13 +2232,13 @@
}
}
},
- "delete": {
+ "patch": {
"tags": [
- "datasources"
+ "issues"
],
- "summary": "Delete Datasource",
- "description": "Delete a data source (soft delete).",
- "operationId": "delete_datasource_api_v1_datasources__datasource_id__delete",
+ "summary": "Update Issue",
+ "description": "Update issue fields.\n\nEnforces state machine transitions when status is changed.",
+ "operationId": "update_issue_api_v1_issues__issue_id__patch",
"security": [
{
"APIKeyHeader": []
@@ -2014,19 +2249,36 @@
],
"parameters": [
{
- "name": "datasource_id",
+ "name": "issue_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Datasource Id"
+ "title": "Issue Id"
}
}
],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/IssueUpdate"
+ }
+ }
+ }
+ },
"responses": {
- "204": {
- "description": "Successful Response"
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/IssueResponse"
+ }
+ }
+ }
},
"422": {
"description": "Validation Error",
@@ -2041,14 +2293,14 @@
}
}
},
- "/api/v1/datasources/{datasource_id}/test": {
- "post": {
+ "/api/v1/issues/{issue_id}/comments": {
+ "get": {
"tags": [
- "datasources"
+ "issues"
],
- "summary": "Test Datasource Connection",
- "description": "Test connectivity for an existing data source.",
- "operationId": "test_datasource_connection_api_v1_datasources__datasource_id__test_post",
+ "summary": "List Issue Comments",
+ "description": "List comments for an issue.",
+ "operationId": "list_issue_comments_api_v1_issues__issue_id__comments_get",
"security": [
{
"APIKeyHeader": []
@@ -2059,13 +2311,13 @@
],
"parameters": [
{
- "name": "datasource_id",
+ "name": "issue_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Datasource Id"
+ "title": "Issue Id"
}
}
],
@@ -2075,7 +2327,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/TestConnectionResponse"
+ "$ref": "#/components/schemas/IssueCommentListResponse"
}
}
}
@@ -2091,16 +2343,14 @@
}
}
}
- }
- },
- "/api/v1/datasources/{datasource_id}/schema": {
- "get": {
+ },
+ "post": {
"tags": [
- "datasources"
+ "issues"
],
- "summary": "Get Datasource Schema",
- "description": "Get schema from a data source.\n\nReturns unified schema with catalogs, schemas, and tables.",
- "operationId": "get_datasource_schema_api_v1_datasources__datasource_id__schema_get",
+ "summary": "Create Issue Comment",
+ "description": "Add a comment to an issue.\n\nRequires user identity (JWT auth or user-scoped API key).",
+ "operationId": "create_issue_comment_api_v1_issues__issue_id__comments_post",
"security": [
{
"APIKeyHeader": []
@@ -2111,59 +2361,33 @@
],
"parameters": [
{
- "name": "datasource_id",
+ "name": "issue_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Datasource Id"
- }
- },
- {
- "name": "table_pattern",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "title": "Table Pattern"
- }
- },
- {
- "name": "include_views",
- "in": "query",
- "required": false,
- "schema": {
- "type": "boolean",
- "default": true,
- "title": "Include Views"
- }
- },
- {
- "name": "max_tables",
- "in": "query",
- "required": false,
- "schema": {
- "type": "integer",
- "default": 1000,
- "title": "Max Tables"
+ "title": "Issue Id"
}
}
],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/IssueCommentCreate"
+ }
+ }
+ }
+ },
"responses": {
- "200": {
+ "201": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SchemaResponseModel"
+ "$ref": "#/components/schemas/IssueCommentResponse"
}
}
}
@@ -2181,14 +2405,14 @@
}
}
},
- "/api/v1/datasources/{datasource_id}/query": {
- "post": {
+ "/api/v1/issues/{issue_id}/watchers": {
+ "get": {
"tags": [
- "datasources"
+ "issues"
],
- "summary": "Execute Query",
- "description": "Execute a query against a data source.\n\nOnly works for sources that support SQL or similar query languages.",
- "operationId": "execute_query_api_v1_datasources__datasource_id__query_post",
+ "summary": "List Issue Watchers",
+ "description": "List watchers for an issue.",
+ "operationId": "list_issue_watchers_api_v1_issues__issue_id__watchers_get",
"security": [
{
"APIKeyHeader": []
@@ -2199,33 +2423,23 @@
],
"parameters": [
{
- "name": "datasource_id",
+ "name": "issue_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Datasource Id"
+ "title": "Issue Id"
}
}
],
- "requestBody": {
- "required": true,
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/QueryRequest"
- }
- }
- }
- },
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/QueryResponse"
+ "$ref": "#/components/schemas/WatcherListResponse"
}
}
}
@@ -2243,14 +2457,14 @@
}
}
},
- "/api/v1/datasources/{datasource_id}/stats": {
+ "/api/v1/issues/{issue_id}/watch": {
"post": {
"tags": [
- "datasources"
+ "issues"
],
- "summary": "Get Column Stats",
- "description": "Get statistics for columns in a table.\n\nOnly works for sources that support column statistics.",
- "operationId": "get_column_stats_api_v1_datasources__datasource_id__stats_post",
+ "summary": "Add Issue Watcher",
+ "description": "Subscribe the current user as a watcher.\n\nIdempotent - returns 204 even if already watching.\nRequires user identity (JWT auth or user-scoped API key).",
+ "operationId": "add_issue_watcher_api_v1_issues__issue_id__watch_post",
"security": [
{
"APIKeyHeader": []
@@ -2261,36 +2475,19 @@
],
"parameters": [
{
- "name": "datasource_id",
+ "name": "issue_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Datasource Id"
+ "title": "Issue Id"
}
}
],
- "requestBody": {
- "required": true,
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/StatsRequest"
- }
- }
- }
- },
"responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/StatsResponse"
- }
- }
- }
+ "204": {
+ "description": "Successful Response"
},
"422": {
"description": "Validation Error",
@@ -2303,16 +2500,14 @@
}
}
}
- }
- },
- "/api/v1/datasources/{datasource_id}/sync": {
- "post": {
+ },
+ "delete": {
"tags": [
- "datasources"
+ "issues"
],
- "summary": "Sync Datasource Schema",
- "description": "Sync schema and register/update datasets.\n\nDiscovers all tables from the data source and upserts them\ninto the datasets table. Soft-deletes datasets that no longer exist.",
- "operationId": "sync_datasource_schema_api_v1_datasources__datasource_id__sync_post",
+ "summary": "Remove Issue Watcher",
+ "description": "Unsubscribe the current user as a watcher.\n\nIdempotent - returns 204 even if not watching.\nRequires user identity (JWT auth or user-scoped API key).",
+ "operationId": "remove_issue_watcher_api_v1_issues__issue_id__watch_delete",
"security": [
{
"APIKeyHeader": []
@@ -2323,26 +2518,19 @@
],
"parameters": [
{
- "name": "datasource_id",
+ "name": "issue_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Datasource Id"
+ "title": "Issue Id"
}
}
],
"responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/SyncResponse"
- }
- }
- }
+ "204": {
+ "description": "Successful Response"
},
"422": {
"description": "Validation Error",
@@ -2357,14 +2545,14 @@
}
}
},
- "/api/v1/datasources/{datasource_id}/datasets": {
+ "/api/v1/issues/{issue_id}/investigation-runs": {
"get": {
"tags": [
- "datasources"
+ "issues"
],
- "summary": "List Datasource Datasets",
- "description": "List datasets for a datasource.",
- "operationId": "list_datasource_datasets_api_v1_datasources__datasource_id__datasets_get",
+ "summary": "List Investigation Runs",
+ "description": "List investigation runs for an issue.",
+ "operationId": "list_investigation_runs_api_v1_issues__issue_id__investigation_runs_get",
"security": [
{
"APIKeyHeader": []
@@ -2375,33 +2563,141 @@
],
"parameters": [
{
- "name": "datasource_id",
+ "name": "issue_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Datasource Id"
+ "title": "Issue Id"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/InvestigationRunListResponse"
+ }
+ }
}
},
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ },
+ "post": {
+ "tags": [
+ "issues"
+ ],
+ "summary": "Spawn Investigation",
+ "description": "Spawn an investigation from an issue.\n\nCreates a new investigation linked to this issue. The focus_prompt\nguides the investigation direction.\n\nRequires user identity (JWT auth or user-scoped API key).\nDeep profile may require approval depending on tenant settings.",
+ "operationId": "spawn_investigation_api_v1_issues__issue_id__investigation_runs_post",
+ "security": [
{
- "name": "table_type",
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "issue_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Issue Id"
+ }
+ }
+ ],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/InvestigationRunCreate"
+ }
+ }
+ }
+ },
+ "responses": {
+ "201": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/InvestigationRunResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/issues/{issue_id}/events": {
+ "get": {
+ "tags": [
+ "issues"
+ ],
+ "summary": "List Issue Events",
+ "description": "List events for an issue (activity timeline).\n\nReturns events in reverse chronological order (newest first).\nSupports cursor-based pagination.",
+ "operationId": "list_issue_events_api_v1_issues__issue_id__events_get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "issue_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Issue Id"
+ }
+ },
+ {
+ "name": "limit",
"in": "query",
"required": false,
"schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "title": "Table Type"
+ "type": "integer",
+ "maximum": 100,
+ "minimum": 1,
+ "default": 50,
+ "title": "Limit"
}
},
{
- "name": "search",
+ "name": "cursor",
"in": "query",
"required": false,
"schema": {
@@ -2413,30 +2709,75 @@
"type": "null"
}
],
- "title": "Search"
+ "title": "Cursor"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/IssueEventListResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
}
+ }
+ }
+ }
+ },
+ "/api/v1/issues/{issue_id}/stream": {
+ "get": {
+ "tags": [
+ "issues"
+ ],
+ "summary": "Stream Issue Events",
+ "description": "Stream real-time issue updates via Server-Sent Events.\n\nDelivers events as they occur:\n- status_changed, assigned, comment_added, label_added/removed\n- investigation_spawned, investigation_completed\n\nThe `after` parameter accepts an event ID to resume from.\nSends heartbeat every 30 seconds to prevent connection timeout.",
+ "operationId": "stream_issue_events_api_v1_issues__issue_id__stream_get",
+ "security": [
+ {
+ "APIKeyHeader": []
},
{
- "name": "limit",
- "in": "query",
- "required": false,
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "issue_id",
+ "in": "path",
+ "required": true,
"schema": {
- "type": "integer",
- "maximum": 10000,
- "minimum": 1,
- "default": 1000,
- "title": "Limit"
+ "type": "string",
+ "format": "uuid",
+ "title": "Issue Id"
}
},
{
- "name": "offset",
+ "name": "after",
"in": "query",
"required": false,
"schema": {
- "type": "integer",
- "minimum": 0,
- "default": 0,
- "title": "Offset"
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "After"
}
}
],
@@ -2445,9 +2786,7 @@
"description": "Successful Response",
"content": {
"application/json": {
- "schema": {
- "$ref": "#/components/schemas/DatasourceDatasetsResponse"
- }
+ "schema": {}
}
}
},
@@ -2464,14 +2803,14 @@
}
}
},
- "/api/v1/v2/datasources/types": {
+ "/api/v1/datasources/types": {
"get": {
"tags": [
"datasources"
],
"summary": "List Source Types",
"description": "List all supported data source types.\n\nReturns the configuration schema for each type, which can be used\nto dynamically generate connection forms in the frontend.",
- "operationId": "list_source_types_api_v1_v2_datasources_types_get",
+ "operationId": "list_source_types_api_v1_datasources_types_get",
"responses": {
"200": {
"description": "Successful Response",
@@ -2486,14 +2825,14 @@
}
}
},
- "/api/v1/v2/datasources/test": {
+ "/api/v1/datasources/test": {
"post": {
"tags": [
"datasources"
],
"summary": "Test Connection",
"description": "Test a connection without saving it.\n\nUse this endpoint to validate connection settings before creating\na data source.",
- "operationId": "test_connection_api_v1_v2_datasources_test_post",
+ "operationId": "test_connection_api_v1_datasources_test_post",
"requestBody": {
"content": {
"application/json": {
@@ -2528,14 +2867,14 @@
}
}
},
- "/api/v1/v2/datasources": {
+ "/api/v1/datasources": {
"get": {
"tags": [
"datasources"
],
"summary": "List Datasources",
"description": "List all data sources for the current tenant.",
- "operationId": "list_datasources_api_v1_v2_datasources_get",
+ "operationId": "list_datasources_api_v1_datasources_get",
"responses": {
"200": {
"description": "Successful Response",
@@ -2563,7 +2902,7 @@
],
"summary": "Create Datasource",
"description": "Create a new data source.\n\nTests the connection before saving. Returns 400 if connection test fails.",
- "operationId": "create_datasource_api_v1_v2_datasources_post",
+ "operationId": "create_datasource_api_v1_datasources_post",
"requestBody": {
"content": {
"application/json": {
@@ -2606,14 +2945,14 @@
]
}
},
- "/api/v1/v2/datasources/{datasource_id}": {
+ "/api/v1/datasources/{datasource_id}": {
"get": {
"tags": [
"datasources"
],
"summary": "Get Datasource",
"description": "Get a specific data source.",
- "operationId": "get_datasource_api_v1_v2_datasources__datasource_id__get",
+ "operationId": "get_datasource_api_v1_datasources__datasource_id__get",
"security": [
{
"APIKeyHeader": []
@@ -2663,7 +3002,7 @@
],
"summary": "Delete Datasource",
"description": "Delete a data source (soft delete).",
- "operationId": "delete_datasource_api_v1_v2_datasources__datasource_id__delete",
+ "operationId": "delete_datasource_api_v1_datasources__datasource_id__delete",
"security": [
{
"APIKeyHeader": []
@@ -2701,14 +3040,14 @@
}
}
},
- "/api/v1/v2/datasources/{datasource_id}/test": {
+ "/api/v1/datasources/{datasource_id}/test": {
"post": {
"tags": [
"datasources"
],
"summary": "Test Datasource Connection",
"description": "Test connectivity for an existing data source.",
- "operationId": "test_datasource_connection_api_v1_v2_datasources__datasource_id__test_post",
+ "operationId": "test_datasource_connection_api_v1_datasources__datasource_id__test_post",
"security": [
{
"APIKeyHeader": []
@@ -2753,14 +3092,14 @@
}
}
},
- "/api/v1/v2/datasources/{datasource_id}/schema": {
+ "/api/v1/datasources/{datasource_id}/schema": {
"get": {
"tags": [
"datasources"
],
"summary": "Get Datasource Schema",
"description": "Get schema from a data source.\n\nReturns unified schema with catalogs, schemas, and tables.",
- "operationId": "get_datasource_schema_api_v1_v2_datasources__datasource_id__schema_get",
+ "operationId": "get_datasource_schema_api_v1_datasources__datasource_id__schema_get",
"security": [
{
"APIKeyHeader": []
@@ -2841,14 +3180,14 @@
}
}
},
- "/api/v1/v2/datasources/{datasource_id}/query": {
+ "/api/v1/datasources/{datasource_id}/query": {
"post": {
"tags": [
"datasources"
],
"summary": "Execute Query",
"description": "Execute a query against a data source.\n\nOnly works for sources that support SQL or similar query languages.",
- "operationId": "execute_query_api_v1_v2_datasources__datasource_id__query_post",
+ "operationId": "execute_query_api_v1_datasources__datasource_id__query_post",
"security": [
{
"APIKeyHeader": []
@@ -2903,14 +3242,14 @@
}
}
},
- "/api/v1/v2/datasources/{datasource_id}/stats": {
+ "/api/v1/datasources/{datasource_id}/stats": {
"post": {
"tags": [
"datasources"
],
"summary": "Get Column Stats",
"description": "Get statistics for columns in a table.\n\nOnly works for sources that support column statistics.",
- "operationId": "get_column_stats_api_v1_v2_datasources__datasource_id__stats_post",
+ "operationId": "get_column_stats_api_v1_datasources__datasource_id__stats_post",
"security": [
{
"APIKeyHeader": []
@@ -2965,14 +3304,14 @@
}
}
},
- "/api/v1/v2/datasources/{datasource_id}/sync": {
+ "/api/v1/datasources/{datasource_id}/sync": {
"post": {
"tags": [
"datasources"
],
"summary": "Sync Datasource Schema",
"description": "Sync schema and register/update datasets.\n\nDiscovers all tables from the data source and upserts them\ninto the datasets table. Soft-deletes datasets that no longer exist.",
- "operationId": "sync_datasource_schema_api_v1_v2_datasources__datasource_id__sync_post",
+ "operationId": "sync_datasource_schema_api_v1_datasources__datasource_id__sync_post",
"security": [
{
"APIKeyHeader": []
@@ -3017,14 +3356,14 @@
}
}
},
- "/api/v1/v2/datasources/{datasource_id}/datasets": {
+ "/api/v1/datasources/{datasource_id}/datasets": {
"get": {
"tags": [
"datasources"
],
"summary": "List Datasource Datasets",
"description": "List datasets for a datasource.",
- "operationId": "list_datasource_datasets_api_v1_v2_datasources__datasource_id__datasets_get",
+ "operationId": "list_datasource_datasets_api_v1_datasources__datasource_id__datasets_get",
"security": [
{
"APIKeyHeader": []
@@ -3124,51 +3463,53 @@
}
}
},
- "/api/v1/datasources/{datasource_id}/credentials": {
- "post": {
+ "/api/v1/v2/datasources/types": {
+ "get": {
"tags": [
- "credentials"
- ],
- "summary": "Save Credentials",
- "description": "Save or update credentials for a datasource.\n\nUsers can store their own database credentials which will be used\nfor query execution. The database enforces permissions, not Dataing.",
- "operationId": "save_credentials_api_v1_datasources__datasource_id__credentials_post",
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
+ "datasources"
],
- "parameters": [
- {
- "name": "datasource_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string",
- "format": "uuid",
- "title": "Datasource Id"
+ "summary": "List Source Types",
+ "description": "List all supported data source types.\n\nReturns the configuration schema for each type, which can be used\nto dynamically generate connection forms in the frontend.",
+ "operationId": "list_source_types_api_v1_v2_datasources_types_get",
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/SourceTypesResponse"
+ }
+ }
}
}
+ }
+ }
+ },
+ "/api/v1/v2/datasources/test": {
+ "post": {
+ "tags": [
+ "datasources"
],
+ "summary": "Test Connection",
+ "description": "Test a connection without saving it.\n\nUse this endpoint to validate connection settings before creating\na data source.",
+ "operationId": "test_connection_api_v1_v2_datasources_test_post",
"requestBody": {
- "required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SaveCredentialsRequest"
+ "$ref": "#/components/schemas/TestConnectionRequest"
}
}
- }
+ },
+ "required": true
},
"responses": {
- "201": {
+ "200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/CredentialsStatusResponse"
+ "$ref": "#/components/schemas/TestConnectionResponse"
}
}
}
@@ -3184,14 +3525,28 @@
}
}
}
- },
+ }
+ },
+ "/api/v1/v2/datasources": {
"get": {
"tags": [
- "credentials"
+ "datasources"
],
- "summary": "Get Credentials Status",
- "description": "Check if credentials are configured for a datasource.\n\nReturns configuration status without exposing the actual credentials.",
- "operationId": "get_credentials_status_api_v1_datasources__datasource_id__credentials_get",
+ "summary": "List Datasources",
+ "description": "List all data sources for the current tenant.",
+ "operationId": "list_datasources_api_v1_v2_datasources_get",
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/DataSourceListResponse"
+ }
+ }
+ }
+ }
+ },
"security": [
{
"APIKeyHeader": []
@@ -3199,26 +3554,32 @@
{
"HTTPBearer": []
}
+ ]
+ },
+ "post": {
+ "tags": [
+ "datasources"
],
- "parameters": [
- {
- "name": "datasource_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string",
- "format": "uuid",
- "title": "Datasource Id"
+ "summary": "Create Datasource",
+ "description": "Create a new data source.\n\nTests the connection before saving. Returns 400 if connection test fails.",
+ "operationId": "create_datasource_api_v1_v2_datasources_post",
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/CreateDataSourceRequest"
+ }
}
- }
- ],
+ },
+ "required": true
+ },
"responses": {
- "200": {
+ "201": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/CredentialsStatusResponse"
+ "$ref": "#/components/schemas/DataSourceResponse"
}
}
}
@@ -3233,15 +3594,25 @@
}
}
}
- }
- },
- "delete": {
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/v2/datasources/{datasource_id}": {
+ "get": {
"tags": [
- "credentials"
+ "datasources"
],
- "summary": "Delete Credentials",
- "description": "Remove credentials for a datasource.\n\nAfter deletion, the user will need to reconfigure credentials\nbefore executing queries.",
- "operationId": "delete_credentials_api_v1_datasources__datasource_id__credentials_delete",
+ "summary": "Get Datasource",
+ "description": "Get a specific data source.",
+ "operationId": "get_datasource_api_v1_v2_datasources__datasource_id__get",
"security": [
{
"APIKeyHeader": []
@@ -3268,7 +3639,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DeleteCredentialsResponse"
+ "$ref": "#/components/schemas/DataSourceResponse"
}
}
}
@@ -3284,16 +3655,14 @@
}
}
}
- }
- },
- "/api/v1/datasources/{datasource_id}/credentials/test": {
- "post": {
+ },
+ "delete": {
"tags": [
- "credentials"
+ "datasources"
],
- "summary": "Test Credentials",
- "description": "Test credentials without saving them.\n\nValidates that the provided credentials can connect to the\ndatabase and access tables.",
- "operationId": "test_credentials_api_v1_datasources__datasource_id__credentials_test_post",
+ "summary": "Delete Datasource",
+ "description": "Delete a data source (soft delete).",
+ "operationId": "delete_datasource_api_v1_v2_datasources__datasource_id__delete",
"security": [
{
"APIKeyHeader": []
@@ -3314,26 +3683,9 @@
}
}
],
- "requestBody": {
- "required": true,
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/SaveCredentialsRequest"
- }
- }
- }
- },
"responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/dataing__entrypoints__api__routes__credentials__TestConnectionResponse"
- }
- }
- }
+ "204": {
+ "description": "Successful Response"
},
"422": {
"description": "Validation Error",
@@ -3348,14 +3700,14 @@
}
}
},
- "/api/v1/datasets/{dataset_id}": {
- "get": {
+ "/api/v1/v2/datasources/{datasource_id}/test": {
+ "post": {
"tags": [
- "datasets"
+ "datasources"
],
- "summary": "Get Dataset",
- "description": "Get a dataset by ID with column information.",
- "operationId": "get_dataset_api_v1_datasets__dataset_id__get",
+ "summary": "Test Datasource Connection",
+ "description": "Test connectivity for an existing data source.",
+ "operationId": "test_datasource_connection_api_v1_v2_datasources__datasource_id__test_post",
"security": [
{
"APIKeyHeader": []
@@ -3366,13 +3718,13 @@
],
"parameters": [
{
- "name": "dataset_id",
+ "name": "datasource_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Dataset Id"
+ "title": "Datasource Id"
}
}
],
@@ -3382,7 +3734,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DatasetDetailResponse"
+ "$ref": "#/components/schemas/TestConnectionResponse"
}
}
}
@@ -3400,14 +3752,14 @@
}
}
},
- "/api/v1/datasets/{dataset_id}/investigations": {
+ "/api/v1/v2/datasources/{datasource_id}/schema": {
"get": {
"tags": [
- "datasets"
+ "datasources"
],
- "summary": "Get Dataset Investigations",
- "description": "Get investigations for a dataset.",
- "operationId": "get_dataset_investigations_api_v1_datasets__dataset_id__investigations_get",
+ "summary": "Get Datasource Schema",
+ "description": "Get schema from a data source.\n\nReturns unified schema with catalogs, schemas, and tables.",
+ "operationId": "get_datasource_schema_api_v1_v2_datasources__datasource_id__schema_get",
"security": [
{
"APIKeyHeader": []
@@ -3418,25 +3770,49 @@
],
"parameters": [
{
- "name": "dataset_id",
+ "name": "datasource_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Dataset Id"
+ "title": "Datasource Id"
}
},
{
- "name": "limit",
+ "name": "table_pattern",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Table Pattern"
+ }
+ },
+ {
+ "name": "include_views",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "boolean",
+ "default": true,
+ "title": "Include Views"
+ }
+ },
+ {
+ "name": "max_tables",
"in": "query",
"required": false,
"schema": {
"type": "integer",
- "maximum": 100,
- "minimum": 1,
- "default": 50,
- "title": "Limit"
+ "default": 1000,
+ "title": "Max Tables"
}
}
],
@@ -3446,7 +3822,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DatasetInvestigationsResponse"
+ "$ref": "#/components/schemas/SchemaResponseModel"
}
}
}
@@ -3464,44 +3840,14 @@
}
}
},
- "/api/v1/approvals/pending": {
- "get": {
+ "/api/v1/v2/datasources/{datasource_id}/query": {
+ "post": {
"tags": [
- "approvals"
+ "datasources"
],
- "summary": "List Pending Approvals",
- "description": "List all pending approval requests for this tenant.",
- "operationId": "list_pending_approvals_api_v1_approvals_pending_get",
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/PendingApprovalsResponse"
- }
- }
- }
- }
- },
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
- ]
- }
- },
- "/api/v1/approvals/{approval_id}": {
- "get": {
- "tags": [
- "approvals"
- ],
- "summary": "Get Approval Request",
- "description": "Get approval request details including context to review.",
- "operationId": "get_approval_request_api_v1_approvals__approval_id__get",
+ "summary": "Execute Query",
+ "description": "Execute a query against a data source.\n\nOnly works for sources that support SQL or similar query languages.",
+ "operationId": "execute_query_api_v1_v2_datasources__datasource_id__query_post",
"security": [
{
"APIKeyHeader": []
@@ -3512,23 +3858,33 @@
],
"parameters": [
{
- "name": "approval_id",
+ "name": "datasource_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Approval Id"
+ "title": "Datasource Id"
}
}
],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/QueryRequest"
+ }
+ }
+ }
+ },
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ApprovalRequestResponse"
+ "$ref": "#/components/schemas/QueryResponse"
}
}
}
@@ -3546,14 +3902,14 @@
}
}
},
- "/api/v1/approvals/{approval_id}/approve": {
+ "/api/v1/v2/datasources/{datasource_id}/stats": {
"post": {
"tags": [
- "approvals"
+ "datasources"
],
- "summary": "Approve Request",
- "description": "Approve an investigation to proceed.",
- "operationId": "approve_request_api_v1_approvals__approval_id__approve_post",
+ "summary": "Get Column Stats",
+ "description": "Get statistics for columns in a table.\n\nOnly works for sources that support column statistics.",
+ "operationId": "get_column_stats_api_v1_v2_datasources__datasource_id__stats_post",
"security": [
{
"APIKeyHeader": []
@@ -3564,13 +3920,13 @@
],
"parameters": [
{
- "name": "approval_id",
+ "name": "datasource_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Approval Id"
+ "title": "Datasource Id"
}
}
],
@@ -3579,7 +3935,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ApproveRequest"
+ "$ref": "#/components/schemas/StatsRequest"
}
}
}
@@ -3590,7 +3946,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ApprovalDecisionResponse"
+ "$ref": "#/components/schemas/StatsResponse"
}
}
}
@@ -3608,14 +3964,14 @@
}
}
},
- "/api/v1/approvals/{approval_id}/reject": {
+ "/api/v1/v2/datasources/{datasource_id}/sync": {
"post": {
"tags": [
- "approvals"
+ "datasources"
],
- "summary": "Reject Request",
- "description": "Reject an investigation.",
- "operationId": "reject_request_api_v1_approvals__approval_id__reject_post",
+ "summary": "Sync Datasource Schema",
+ "description": "Sync schema and register/update datasets.\n\nDiscovers all tables from the data source and upserts them\ninto the datasets table. Soft-deletes datasets that no longer exist.",
+ "operationId": "sync_datasource_schema_api_v1_v2_datasources__datasource_id__sync_post",
"security": [
{
"APIKeyHeader": []
@@ -3626,33 +3982,23 @@
],
"parameters": [
{
- "name": "approval_id",
+ "name": "datasource_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Approval Id"
+ "title": "Datasource Id"
}
}
],
- "requestBody": {
- "required": true,
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/RejectRequest"
- }
- }
- }
- },
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ApprovalDecisionResponse"
+ "$ref": "#/components/schemas/SyncResponse"
}
}
}
@@ -3670,14 +4016,14 @@
}
}
},
- "/api/v1/approvals/{approval_id}/modify": {
- "post": {
+ "/api/v1/v2/datasources/{datasource_id}/datasets": {
+ "get": {
"tags": [
- "approvals"
+ "datasources"
],
- "summary": "Modify And Approve",
- "description": "Approve with modifications.\n\nThis allows reviewers to modify the investigation context before approving.\nFor example, they can adjust which tables are included, modify query limits, etc.",
- "operationId": "modify_and_approve_api_v1_approvals__approval_id__modify_post",
+ "summary": "List Datasource Datasets",
+ "description": "List datasets for a datasource.",
+ "operationId": "list_datasource_datasets_api_v1_v2_datasources__datasource_id__datasets_get",
"security": [
{
"APIKeyHeader": []
@@ -3688,33 +4034,78 @@
],
"parameters": [
{
- "name": "approval_id",
+ "name": "datasource_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Approval Id"
+ "title": "Datasource Id"
}
- }
- ],
- "requestBody": {
- "required": true,
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/ModifyRequest"
- }
+ },
+ {
+ "name": "table_type",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Table Type"
+ }
+ },
+ {
+ "name": "search",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Search"
+ }
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 10000,
+ "minimum": 1,
+ "default": 1000,
+ "title": "Limit"
+ }
+ },
+ {
+ "name": "offset",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "minimum": 0,
+ "default": 0,
+ "title": "Offset"
}
}
- },
+ ],
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ApprovalDecisionResponse"
+ "$ref": "#/components/schemas/DatasourceDatasetsResponse"
}
}
}
@@ -3732,23 +4123,43 @@
}
}
},
- "/api/v1/approvals/": {
+ "/api/v1/datasources/{datasource_id}/credentials": {
"post": {
"tags": [
- "approvals"
+ "credentials"
+ ],
+ "summary": "Save Credentials",
+ "description": "Save or update credentials for a datasource.\n\nUsers can store their own database credentials which will be used\nfor query execution. The database enforces permissions, not Dataing.",
+ "operationId": "save_credentials_api_v1_datasources__datasource_id__credentials_post",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "datasource_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Datasource Id"
+ }
+ }
],
- "summary": "Create Approval Request",
- "description": "Create a new approval request.\n\nThis is typically called by the system when an investigation reaches\na point requiring human review (e.g., context review before executing queries).",
- "operationId": "create_approval_request_api_v1_approvals__post",
"requestBody": {
+ "required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/CreateApprovalRequest"
+ "$ref": "#/components/schemas/SaveCredentialsRequest"
}
}
- },
- "required": true
+ }
},
"responses": {
"201": {
@@ -3756,7 +4167,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ApprovalRequestResponse"
+ "$ref": "#/components/schemas/CredentialsStatusResponse"
}
}
}
@@ -3771,25 +4182,15 @@
}
}
}
- },
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
- ]
- }
- },
- "/api/v1/approvals/investigation/{investigation_id}": {
+ }
+ },
"get": {
"tags": [
- "approvals"
+ "credentials"
],
- "summary": "Get Investigation Approvals",
- "description": "Get all approval requests for a specific investigation.",
- "operationId": "get_investigation_approvals_api_v1_approvals_investigation__investigation_id__get",
+ "summary": "Get Credentials Status",
+ "description": "Check if credentials are configured for a datasource.\n\nReturns configuration status without exposing the actual credentials.",
+ "operationId": "get_credentials_status_api_v1_datasources__datasource_id__credentials_get",
"security": [
{
"APIKeyHeader": []
@@ -3800,13 +4201,13 @@
],
"parameters": [
{
- "name": "investigation_id",
+ "name": "datasource_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Investigation Id"
+ "title": "Datasource Id"
}
}
],
@@ -3816,11 +4217,7 @@
"content": {
"application/json": {
"schema": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/ApprovalRequestResponse"
- },
- "title": "Response Get Investigation Approvals Api V1 Approvals Investigation Investigation Id Get"
+ "$ref": "#/components/schemas/CredentialsStatusResponse"
}
}
}
@@ -3836,28 +4233,14 @@
}
}
}
- }
- },
- "/api/v1/users/": {
- "get": {
+ },
+ "delete": {
"tags": [
- "users"
+ "credentials"
],
- "summary": "List Users",
- "description": "List all users for the tenant.",
- "operationId": "list_users_api_v1_users__get",
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/UserListResponse"
- }
- }
- }
- }
- },
+ "summary": "Delete Credentials",
+ "description": "Remove credentials for a datasource.\n\nAfter deletion, the user will need to reconfigure credentials\nbefore executing queries.",
+ "operationId": "delete_credentials_api_v1_datasources__datasource_id__credentials_delete",
"security": [
{
"APIKeyHeader": []
@@ -3865,32 +4248,26 @@
{
"HTTPBearer": []
}
- ]
- },
- "post": {
- "tags": [
- "users"
],
- "summary": "Create User",
- "description": "Create a new user.\n\nRequires admin scope.",
- "operationId": "create_user_api_v1_users__post",
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/CreateUserRequest"
- }
+ "parameters": [
+ {
+ "name": "datasource_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Datasource Id"
}
- },
- "required": true
- },
+ }
+ ],
"responses": {
- "201": {
+ "200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/UserResponse"
+ "$ref": "#/components/schemas/DeleteCredentialsResponse"
}
}
}
@@ -3905,37 +4282,17 @@
}
}
}
- },
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
- ]
+ }
}
},
- "/api/v1/users/me": {
- "get": {
+ "/api/v1/datasources/{datasource_id}/credentials/test": {
+ "post": {
"tags": [
- "users"
+ "credentials"
],
- "summary": "Get Current User",
- "description": "Get the current authenticated user's profile.",
- "operationId": "get_current_user_api_v1_users_me_get",
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/UserResponse"
- }
- }
- }
- }
- },
+ "summary": "Test Credentials",
+ "description": "Test credentials without saving them.\n\nValidates that the provided credentials can connect to the\ndatabase and access tables.",
+ "operationId": "test_credentials_api_v1_datasources__datasource_id__credentials_test_post",
"security": [
{
"APIKeyHeader": []
@@ -3943,69 +4300,36 @@
{
"HTTPBearer": []
}
- ]
- }
- },
- "/api/v1/users/org-members": {
- "get": {
- "tags": [
- "users"
],
- "summary": "List Org Members",
- "description": "List all members of the current organization (JWT auth).",
- "operationId": "list_org_members_api_v1_users_org_members_get",
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "items": {
- "$ref": "#/components/schemas/OrgMemberResponse"
- },
- "type": "array",
- "title": "Response List Org Members Api V1 Users Org Members Get"
- }
- }
- }
- }
- },
- "security": [
+ "parameters": [
{
- "HTTPBearer": []
+ "name": "datasource_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Datasource Id"
+ }
}
- ]
- }
- },
- "/api/v1/users/invite": {
- "post": {
- "tags": [
- "users"
],
- "summary": "Invite User",
- "description": "Invite a user to the organization (admin only).\n\nIf user exists, adds them to the org. If not, creates a new user.",
- "operationId": "invite_user_api_v1_users_invite_post",
"requestBody": {
+ "required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/InviteUserRequest"
+ "$ref": "#/components/schemas/SaveCredentialsRequest"
}
}
- },
- "required": true
+ }
},
"responses": {
- "201": {
+ "200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "additionalProperties": {
- "type": "string"
- },
- "type": "object",
- "title": "Response Invite User Api V1 Users Invite Post"
+ "$ref": "#/components/schemas/dataing__entrypoints__api__routes__credentials__TestConnectionResponse"
}
}
}
@@ -4020,22 +4344,17 @@
}
}
}
- },
- "security": [
- {
- "HTTPBearer": []
- }
- ]
+ }
}
},
- "/api/v1/users/{user_id}": {
+ "/api/v1/datasets/{dataset_id}": {
"get": {
"tags": [
- "users"
+ "datasets"
],
- "summary": "Get User",
- "description": "Get a specific user.",
- "operationId": "get_user_api_v1_users__user_id__get",
+ "summary": "Get Dataset",
+ "description": "Get a dataset by ID with column information.",
+ "operationId": "get_dataset_api_v1_datasets__dataset_id__get",
"security": [
{
"APIKeyHeader": []
@@ -4046,13 +4365,13 @@
],
"parameters": [
{
- "name": "user_id",
+ "name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "User Id"
+ "title": "Dataset Id"
}
}
],
@@ -4062,7 +4381,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/UserResponse"
+ "$ref": "#/components/schemas/DatasetDetailResponse"
}
}
}
@@ -4078,14 +4397,16 @@
}
}
}
- },
- "patch": {
+ }
+ },
+ "/api/v1/datasets/{dataset_id}/investigations": {
+ "get": {
"tags": [
- "users"
+ "datasets"
],
- "summary": "Update User",
- "description": "Update a user.\n\nRequires admin scope.",
- "operationId": "update_user_api_v1_users__user_id__patch",
+ "summary": "Get Dataset Investigations",
+ "description": "Get investigations for a dataset.",
+ "operationId": "get_dataset_investigations_api_v1_datasets__dataset_id__investigations_get",
"security": [
{
"APIKeyHeader": []
@@ -4096,33 +4417,35 @@
],
"parameters": [
{
- "name": "user_id",
+ "name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "User Id"
+ "title": "Dataset Id"
}
- }
- ],
- "requestBody": {
- "required": true,
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/UpdateUserRequest"
- }
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 100,
+ "minimum": 1,
+ "default": 50,
+ "title": "Limit"
}
}
- },
+ ],
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/UserResponse"
+ "$ref": "#/components/schemas/DatasetInvestigationsResponse"
}
}
}
@@ -4138,14 +4461,46 @@
}
}
}
- },
- "delete": {
+ }
+ },
+ "/api/v1/approvals/pending": {
+ "get": {
"tags": [
- "users"
+ "approvals"
],
- "summary": "Deactivate User",
- "description": "Deactivate a user (soft delete).\n\nRequires admin scope. Users cannot delete themselves.",
- "operationId": "deactivate_user_api_v1_users__user_id__delete",
+ "summary": "List Pending Approvals",
+ "description": "List all pending approval requests for this tenant.",
+ "operationId": "list_pending_approvals_api_v1_approvals_pending_get",
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/PendingApprovalsResponse"
+ }
+ }
+ }
+ }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/approvals/{approval_id}": {
+ "get": {
+ "tags": [
+ "approvals"
+ ],
+ "summary": "Get Approval Request",
+ "description": "Get approval request details including context to review.",
+ "operationId": "get_approval_request_api_v1_approvals__approval_id__get",
"security": [
{
"APIKeyHeader": []
@@ -4156,19 +4511,26 @@
],
"parameters": [
{
- "name": "user_id",
+ "name": "approval_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "User Id"
+ "title": "Approval Id"
}
}
],
"responses": {
- "204": {
- "description": "Successful Response"
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ApprovalRequestResponse"
+ }
+ }
+ }
},
"422": {
"description": "Validation Error",
@@ -4183,28 +4545,31 @@
}
}
},
- "/api/v1/users/{user_id}/role": {
- "patch": {
+ "/api/v1/approvals/{approval_id}/approve": {
+ "post": {
"tags": [
- "users"
+ "approvals"
],
- "summary": "Update Member Role",
- "description": "Update a member's role in the organization (admin only).",
- "operationId": "update_member_role_api_v1_users__user_id__role_patch",
- "security": [
+ "summary": "Approve Request",
+ "description": "Approve an investigation to proceed.",
+ "operationId": "approve_request_api_v1_approvals__approval_id__approve_post",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
{
"HTTPBearer": []
}
],
"parameters": [
{
- "name": "user_id",
+ "name": "approval_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "User Id"
+ "title": "Approval Id"
}
}
],
@@ -4213,7 +4578,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/UpdateRoleRequest"
+ "$ref": "#/components/schemas/ApproveRequest"
}
}
}
@@ -4224,11 +4589,7 @@
"content": {
"application/json": {
"schema": {
- "type": "object",
- "additionalProperties": {
- "type": "string"
- },
- "title": "Response Update Member Role Api V1 Users User Id Role Patch"
+ "$ref": "#/components/schemas/ApprovalDecisionResponse"
}
}
}
@@ -4246,42 +4607,51 @@
}
}
},
- "/api/v1/users/{user_id}/remove": {
+ "/api/v1/approvals/{approval_id}/reject": {
"post": {
"tags": [
- "users"
+ "approvals"
],
- "summary": "Remove Org Member",
- "description": "Remove a member from the organization (admin only).",
- "operationId": "remove_org_member_api_v1_users__user_id__remove_post",
+ "summary": "Reject Request",
+ "description": "Reject an investigation.",
+ "operationId": "reject_request_api_v1_approvals__approval_id__reject_post",
"security": [
+ {
+ "APIKeyHeader": []
+ },
{
"HTTPBearer": []
}
],
"parameters": [
{
- "name": "user_id",
+ "name": "approval_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "User Id"
+ "title": "Approval Id"
}
}
],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RejectRequest"
+ }
+ }
+ }
+ },
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "type": "object",
- "additionalProperties": {
- "type": "string"
- },
- "title": "Response Remove Org Member Api V1 Users User Id Remove Post"
+ "$ref": "#/components/schemas/ApprovalDecisionResponse"
}
}
}
@@ -4299,26 +4669,14 @@
}
}
},
- "/api/v1/dashboard/": {
- "get": {
+ "/api/v1/approvals/{approval_id}/modify": {
+ "post": {
"tags": [
- "dashboard"
+ "approvals"
],
- "summary": "Get Dashboard",
- "description": "Get dashboard overview for the current tenant.",
- "operationId": "get_dashboard_api_v1_dashboard__get",
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/DashboardResponse"
- }
- }
- }
- }
- },
+ "summary": "Modify And Approve",
+ "description": "Approve with modifications.\n\nThis allows reviewers to modify the investigation context before approving.\nFor example, they can adjust which tables are included, modify query limits, etc.",
+ "operationId": "modify_and_approve_api_v1_approvals__approval_id__modify_post",
"security": [
{
"APIKeyHeader": []
@@ -4326,54 +4684,88 @@
{
"HTTPBearer": []
}
- ]
- }
- },
- "/api/v1/dashboard/stats": {
- "get": {
- "tags": [
- "dashboard"
],
- "summary": "Get Stats",
- "description": "Get just the dashboard statistics.",
- "operationId": "get_stats_api_v1_dashboard_stats_get",
+ "parameters": [
+ {
+ "name": "approval_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Approval Id"
+ }
+ }
+ ],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ModifyRequest"
+ }
+ }
+ }
+ },
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DashboardStats"
+ "$ref": "#/components/schemas/ApprovalDecisionResponse"
}
}
}
- }
- },
- "security": [
- {
- "APIKeyHeader": []
},
- {
- "HTTPBearer": []
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
}
- ]
+ }
}
},
- "/api/v1/usage/metrics": {
- "get": {
+ "/api/v1/approvals/": {
+ "post": {
"tags": [
- "usage"
+ "approvals"
],
- "summary": "Get Usage Metrics",
- "description": "Get current usage metrics for tenant.",
- "operationId": "get_usage_metrics_api_v1_usage_metrics_get",
+ "summary": "Create Approval Request",
+ "description": "Create a new approval request.\n\nThis is typically called by the system when an investigation reaches\na point requiring human review (e.g., context review before executing queries).",
+ "operationId": "create_approval_request_api_v1_approvals__post",
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/CreateApprovalRequest"
+ }
+ }
+ },
+ "required": true
+ },
"responses": {
- "200": {
+ "201": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/UsageMetricsResponse"
+ "$ref": "#/components/schemas/ApprovalRequestResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
}
}
}
@@ -4389,14 +4781,14 @@
]
}
},
- "/api/v1/analytics/weekly-usage": {
+ "/api/v1/approvals/investigation/{investigation_id}": {
"get": {
"tags": [
- "analytics"
+ "approvals"
],
- "summary": "Get Weekly Usage",
- "description": "Get weekly usage statistics for the current tenant.\n\nReturns aggregated metrics per week including:\n- Issues created\n- Investigations started and completed\n- Issues resolved\n- Active teams count\n- Issue resolution rate",
- "operationId": "get_weekly_usage_api_v1_analytics_weekly_usage_get",
+ "summary": "Get Investigation Approvals",
+ "description": "Get all approval requests for a specific investigation.",
+ "operationId": "get_investigation_approvals_api_v1_approvals_investigation__investigation_id__get",
"security": [
{
"APIKeyHeader": []
@@ -4407,36 +4799,14 @@
],
"parameters": [
{
- "name": "weeks",
- "in": "query",
- "required": false,
- "schema": {
- "type": "integer",
- "maximum": 52,
- "minimum": 1,
- "description": "Number of weeks to return",
- "default": 4,
- "title": "Weeks"
- },
- "description": "Number of weeks to return"
- },
- {
- "name": "team_id",
- "in": "query",
- "required": false,
+ "name": "investigation_id",
+ "in": "path",
+ "required": true,
"schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Filter by team ID",
- "title": "Team Id"
- },
- "description": "Filter by team ID"
+ "type": "string",
+ "format": "uuid",
+ "title": "Investigation Id"
+ }
}
],
"responses": {
@@ -4445,7 +4815,11 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/WeeklyUsageListResponse"
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ApprovalRequestResponse"
+ },
+ "title": "Response Get Investigation Approvals Api V1 Approvals Investigation Investigation Id Get"
}
}
}
@@ -4463,21 +4837,21 @@
}
}
},
- "/api/v1/analytics/activation": {
+ "/api/v1/users/": {
"get": {
"tags": [
- "analytics"
+ "users"
],
- "summary": "Get Activation Status",
- "description": "Get activation status for the current tenant.\n\nActivation is defined as having both:\n- Created at least one issue\n- Completed at least one investigation\n\nWithin the first 7 days of account creation.",
- "operationId": "get_activation_status_api_v1_analytics_activation_get",
+ "summary": "List Users",
+ "description": "List all users for the tenant.",
+ "operationId": "list_users_api_v1_users__get",
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ActivationStatusResponse"
+ "$ref": "#/components/schemas/UserListResponse"
}
}
}
@@ -4491,47 +4865,31 @@
"HTTPBearer": []
}
]
- }
- },
- "/api/v1/analytics/activation/funnel": {
- "get": {
+ },
+ "post": {
"tags": [
- "analytics"
+ "users"
],
- "summary": "Get Activation Funnel",
- "description": "Get activation funnel statistics.\n\nReturns aggregated funnel metrics showing:\n- Total tenants created in period\n- Tenants that created at least one issue\n- Tenants that completed at least one investigation\n- Tenants that achieved activation\n\nNote: This endpoint requires admin permissions in production.",
- "operationId": "get_activation_funnel_api_v1_analytics_activation_funnel_get",
- "security": [
- {
- "APIKeyHeader": []
+ "summary": "Create User",
+ "description": "Create a new user.\n\nRequires admin scope.",
+ "operationId": "create_user_api_v1_users__post",
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/CreateUserRequest"
+ }
+ }
},
- {
- "HTTPBearer": []
- }
- ],
- "parameters": [
- {
- "name": "days",
- "in": "query",
- "required": false,
- "schema": {
- "type": "integer",
- "maximum": 365,
- "minimum": 1,
- "description": "Look back period in days",
- "default": 90,
- "title": "Days"
- },
- "description": "Look back period in days"
- }
- ],
+ "required": true
+ },
"responses": {
- "200": {
+ "201": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ActivationFunnelResponse"
+ "$ref": "#/components/schemas/UserResponse"
}
}
}
@@ -4546,20 +4904,35 @@
}
}
}
- }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
}
},
- "/api/v1/analytics/refresh": {
- "post": {
+ "/api/v1/users/me": {
+ "get": {
"tags": [
- "analytics"
+ "users"
],
- "summary": "Refresh Weekly Stats",
- "description": "Refresh the weekly usage statistics materialized view.\n\nThis is typically called by a scheduled job but can be triggered manually.",
- "operationId": "refresh_weekly_stats_api_v1_analytics_refresh_post",
+ "summary": "Get Current User",
+ "description": "Get the current authenticated user's profile.",
+ "operationId": "get_current_user_api_v1_users_me_get",
"responses": {
- "204": {
- "description": "Successful Response"
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/UserResponse"
+ }
+ }
+ }
}
},
"security": [
@@ -4572,36 +4945,96 @@
]
}
},
- "/api/v1/lineage/providers": {
+ "/api/v1/users/org-members": {
"get": {
"tags": [
- "lineage"
+ "users"
],
- "summary": "List Providers",
- "description": "List all available lineage providers.\n\nReturns the configuration schema for each provider, which can be used\nto dynamically generate connection forms in the frontend.",
- "operationId": "list_providers_api_v1_lineage_providers_get",
+ "summary": "List Org Members",
+ "description": "List all members of the current organization (JWT auth).",
+ "operationId": "list_org_members_api_v1_users_org_members_get",
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/LineageProvidersResponse"
+ "items": {
+ "$ref": "#/components/schemas/OrgMemberResponse"
+ },
+ "type": "array",
+ "title": "Response List Org Members Api V1 Users Org Members Get"
}
}
}
}
- }
+ },
+ "security": [
+ {
+ "HTTPBearer": []
+ }
+ ]
}
},
- "/api/v1/lineage/upstream": {
+ "/api/v1/users/invite": {
+ "post": {
+ "tags": [
+ "users"
+ ],
+ "summary": "Invite User",
+ "description": "Invite a user to the organization (admin only).\n\nIf user exists, adds them to the org. If not, creates a new user.",
+ "operationId": "invite_user_api_v1_users_invite_post",
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/InviteUserRequest"
+ }
+ }
+ },
+ "required": true
+ },
+ "responses": {
+ "201": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "additionalProperties": {
+ "type": "string"
+ },
+ "type": "object",
+ "title": "Response Invite User Api V1 Users Invite Post"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ },
+ "security": [
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/users/{user_id}": {
"get": {
"tags": [
- "lineage"
+ "users"
],
- "summary": "Get Upstream",
- "description": "Get upstream (parent) datasets.\n\nReturns datasets that feed into the specified dataset.",
- "operationId": "get_upstream_api_v1_lineage_upstream_get",
+ "summary": "Get User",
+ "description": "Get a specific user.",
+ "operationId": "get_user_api_v1_users__user_id__get",
"security": [
{
"APIKeyHeader": []
@@ -4612,77 +5045,14 @@
],
"parameters": [
{
- "name": "dataset",
- "in": "query",
+ "name": "user_id",
+ "in": "path",
"required": true,
"schema": {
"type": "string",
- "description": "Dataset identifier (platform://name)",
- "title": "Dataset"
- },
- "description": "Dataset identifier (platform://name)"
- },
- {
- "name": "depth",
- "in": "query",
- "required": false,
- "schema": {
- "type": "integer",
- "maximum": 10,
- "minimum": 1,
- "description": "Depth of lineage traversal",
- "default": 1,
- "title": "Depth"
- },
- "description": "Depth of lineage traversal"
- },
- {
- "name": "provider",
- "in": "query",
- "required": false,
- "schema": {
- "type": "string",
- "description": "Lineage provider to use",
- "default": "dbt",
- "title": "Provider"
- },
- "description": "Lineage provider to use"
- },
- {
- "name": "manifest_path",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Path to dbt manifest.json",
- "title": "Manifest Path"
- },
- "description": "Path to dbt manifest.json"
- },
- {
- "name": "base_url",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Base URL for API-based providers",
- "title": "Base Url"
- },
- "description": "Base URL for API-based providers"
+ "format": "uuid",
+ "title": "User Id"
+ }
}
],
"responses": {
@@ -4691,7 +5061,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/UpstreamResponse"
+ "$ref": "#/components/schemas/UserResponse"
}
}
}
@@ -4707,16 +5077,14 @@
}
}
}
- }
- },
- "/api/v1/lineage/downstream": {
- "get": {
+ },
+ "patch": {
"tags": [
- "lineage"
+ "users"
],
- "summary": "Get Downstream",
- "description": "Get downstream (child) datasets.\n\nReturns datasets that depend on the specified dataset.",
- "operationId": "get_downstream_api_v1_lineage_downstream_get",
+ "summary": "Update User",
+ "description": "Update a user.\n\nRequires admin scope.",
+ "operationId": "update_user_api_v1_users__user_id__patch",
"security": [
{
"APIKeyHeader": []
@@ -4727,86 +5095,33 @@
],
"parameters": [
{
- "name": "dataset",
- "in": "query",
+ "name": "user_id",
+ "in": "path",
"required": true,
"schema": {
"type": "string",
- "description": "Dataset identifier (platform://name)",
- "title": "Dataset"
- },
- "description": "Dataset identifier (platform://name)"
- },
- {
- "name": "depth",
- "in": "query",
- "required": false,
- "schema": {
- "type": "integer",
- "maximum": 10,
- "minimum": 1,
- "description": "Depth of lineage traversal",
- "default": 1,
- "title": "Depth"
- },
- "description": "Depth of lineage traversal"
- },
- {
- "name": "provider",
- "in": "query",
- "required": false,
- "schema": {
- "type": "string",
- "description": "Lineage provider to use",
- "default": "dbt",
- "title": "Provider"
- },
- "description": "Lineage provider to use"
- },
- {
- "name": "manifest_path",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Path to dbt manifest.json",
- "title": "Manifest Path"
- },
- "description": "Path to dbt manifest.json"
- },
- {
- "name": "base_url",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Base URL for API-based providers",
- "title": "Base Url"
- },
- "description": "Base URL for API-based providers"
+ "format": "uuid",
+ "title": "User Id"
+ }
}
],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/UpdateUserRequest"
+ }
+ }
+ }
+ },
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DownstreamResponse"
+ "$ref": "#/components/schemas/UserResponse"
}
}
}
@@ -4822,16 +5137,14 @@
}
}
}
- }
- },
- "/api/v1/lineage/graph": {
- "get": {
+ },
+ "delete": {
"tags": [
- "lineage"
+ "users"
],
- "summary": "Get Lineage Graph",
- "description": "Get full lineage graph around a dataset.\n\nReturns a graph structure with datasets, edges, and jobs.",
- "operationId": "get_lineage_graph_api_v1_lineage_graph_get",
+ "summary": "Deactivate User",
+ "description": "Deactivate a user (soft delete).\n\nRequires admin scope. Users cannot delete themselves.",
+ "operationId": "deactivate_user_api_v1_users__user_id__delete",
"security": [
{
"APIKeyHeader": []
@@ -4842,100 +5155,79 @@
],
"parameters": [
{
- "name": "dataset",
- "in": "query",
+ "name": "user_id",
+ "in": "path",
"required": true,
"schema": {
"type": "string",
- "description": "Dataset identifier (platform://name)",
- "title": "Dataset"
- },
- "description": "Dataset identifier (platform://name)"
- },
- {
- "name": "upstream_depth",
- "in": "query",
- "required": false,
- "schema": {
- "type": "integer",
- "maximum": 10,
- "minimum": 0,
- "description": "Upstream traversal depth",
- "default": 3,
- "title": "Upstream Depth"
- },
- "description": "Upstream traversal depth"
+ "format": "uuid",
+ "title": "User Id"
+ }
+ }
+ ],
+ "responses": {
+ "204": {
+ "description": "Successful Response"
},
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/users/{user_id}/role": {
+ "patch": {
+ "tags": [
+ "users"
+ ],
+ "summary": "Update Member Role",
+ "description": "Update a member's role in the organization (admin only).",
+ "operationId": "update_member_role_api_v1_users__user_id__role_patch",
+ "security": [
{
- "name": "downstream_depth",
- "in": "query",
- "required": false,
- "schema": {
- "type": "integer",
- "maximum": 10,
- "minimum": 0,
- "description": "Downstream traversal depth",
- "default": 3,
- "title": "Downstream Depth"
- },
- "description": "Downstream traversal depth"
- },
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
{
- "name": "provider",
- "in": "query",
- "required": false,
+ "name": "user_id",
+ "in": "path",
+ "required": true,
"schema": {
"type": "string",
- "description": "Lineage provider to use",
- "default": "dbt",
- "title": "Provider"
- },
- "description": "Lineage provider to use"
- },
- {
- "name": "manifest_path",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Path to dbt manifest.json",
- "title": "Manifest Path"
- },
- "description": "Path to dbt manifest.json"
- },
- {
- "name": "base_url",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Base URL for API-based providers",
- "title": "Base Url"
- },
- "description": "Base URL for API-based providers"
+ "format": "uuid",
+ "title": "User Id"
+ }
}
],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/UpdateRoleRequest"
+ }
+ }
+ }
+ },
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/LineageGraphResponse"
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ },
+ "title": "Response Update Member Role Api V1 Users User Id Role Patch"
}
}
}
@@ -4953,126 +5245,139 @@
}
}
},
- "/api/v1/lineage/column-lineage": {
- "get": {
+ "/api/v1/users/{user_id}/remove": {
+ "post": {
"tags": [
- "lineage"
+ "users"
],
- "summary": "Get Column Lineage",
- "description": "Get column-level lineage.\n\nReturns the source columns that feed into the specified column.\nNot all providers support column lineage.",
- "operationId": "get_column_lineage_api_v1_lineage_column_lineage_get",
+ "summary": "Remove Org Member",
+ "description": "Remove a member from the organization (admin only).",
+ "operationId": "remove_org_member_api_v1_users__user_id__remove_post",
"security": [
- {
- "APIKeyHeader": []
- },
{
"HTTPBearer": []
}
],
"parameters": [
{
- "name": "dataset",
- "in": "query",
+ "name": "user_id",
+ "in": "path",
"required": true,
"schema": {
"type": "string",
- "description": "Dataset identifier (platform://name)",
- "title": "Dataset"
- },
- "description": "Dataset identifier (platform://name)"
+ "format": "uuid",
+ "title": "User Id"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ },
+ "title": "Response Remove Org Member Api V1 Users User Id Remove Post"
+ }
+ }
+ }
},
- {
- "name": "column",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string",
- "description": "Column name to trace",
- "title": "Column"
- },
- "description": "Column name to trace"
- },
- {
- "name": "provider",
- "in": "query",
- "required": false,
- "schema": {
- "type": "string",
- "description": "Lineage provider to use",
- "default": "dbt",
- "title": "Provider"
- },
- "description": "Lineage provider to use"
- },
- {
- "name": "manifest_path",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Path to dbt manifest.json",
- "title": "Manifest Path"
- },
- "description": "Path to dbt manifest.json"
- },
- {
- "name": "base_url",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
}
- ],
- "description": "Base URL for API-based providers",
- "title": "Base Url"
- },
- "description": "Base URL for API-based providers"
+ }
+ }
}
+ }
+ }
+ },
+ "/api/v1/dashboard/": {
+ "get": {
+ "tags": [
+ "dashboard"
],
+ "summary": "Get Dashboard",
+ "description": "Get dashboard overview for the current tenant.",
+ "operationId": "get_dashboard_api_v1_dashboard__get",
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ColumnLineageListResponse"
+ "$ref": "#/components/schemas/DashboardResponse"
}
}
}
+ }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
},
- "422": {
- "description": "Validation Error",
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/dashboard/stats": {
+ "get": {
+ "tags": [
+ "dashboard"
+ ],
+ "summary": "Get Stats",
+ "description": "Get just the dashboard statistics.",
+ "operationId": "get_stats_api_v1_dashboard_stats_get",
+ "responses": {
+ "200": {
+ "description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/HTTPValidationError"
+ "$ref": "#/components/schemas/DashboardStats"
}
}
}
}
- }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
}
},
- "/api/v1/lineage/job/{job_id}": {
+ "/api/v1/usage/metrics": {
"get": {
"tags": [
- "lineage"
+ "usage"
],
- "summary": "Get Job",
- "description": "Get job details.\n\nReturns information about a job that produces or consumes datasets.",
- "operationId": "get_job_api_v1_lineage_job__job_id__get",
+ "summary": "Get Usage Metrics",
+ "description": "Get current usage metrics for tenant.",
+ "operationId": "get_usage_metrics_api_v1_usage_metrics_get",
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/UsageMetricsResponse"
+ }
+ }
+ }
+ }
+ },
"security": [
{
"APIKeyHeader": []
@@ -5080,49 +5385,42 @@
{
"HTTPBearer": []
}
+ ]
+ }
+ },
+ "/api/v1/analytics/weekly-usage": {
+ "get": {
+ "tags": [
+ "analytics"
],
- "parameters": [
+ "summary": "Get Weekly Usage",
+ "description": "Get weekly usage statistics for the current tenant.\n\nReturns aggregated metrics per week including:\n- Issues created\n- Investigations started and completed\n- Issues resolved\n- Active teams count\n- Issue resolution rate",
+ "operationId": "get_weekly_usage_api_v1_analytics_weekly_usage_get",
+ "security": [
{
- "name": "job_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string",
- "title": "Job Id"
- }
+ "APIKeyHeader": []
},
{
- "name": "provider",
- "in": "query",
- "required": false,
- "schema": {
- "type": "string",
- "description": "Lineage provider to use",
- "default": "dbt",
- "title": "Provider"
- },
- "description": "Lineage provider to use"
- },
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
{
- "name": "manifest_path",
+ "name": "weeks",
"in": "query",
"required": false,
"schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Path to dbt manifest.json",
- "title": "Manifest Path"
+ "type": "integer",
+ "maximum": 52,
+ "minimum": 1,
+ "description": "Number of weeks to return",
+ "default": 4,
+ "title": "Weeks"
},
- "description": "Path to dbt manifest.json"
+ "description": "Number of weeks to return"
},
{
- "name": "base_url",
+ "name": "team_id",
"in": "query",
"required": false,
"schema": {
@@ -5134,10 +5432,10 @@
"type": "null"
}
],
- "description": "Base URL for API-based providers",
- "title": "Base Url"
+ "description": "Filter by team ID",
+ "title": "Team Id"
},
- "description": "Base URL for API-based providers"
+ "description": "Filter by team ID"
}
],
"responses": {
@@ -5146,7 +5444,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/JobResponse"
+ "$ref": "#/components/schemas/WeeklyUsageListResponse"
}
}
}
@@ -5164,14 +5462,26 @@
}
}
},
- "/api/v1/lineage/job/{job_id}/runs": {
+ "/api/v1/analytics/activation": {
"get": {
"tags": [
- "lineage"
+ "analytics"
],
- "summary": "Get Job Runs",
- "description": "Get recent runs of a job.\n\nReturns execution history for the specified job.",
- "operationId": "get_job_runs_api_v1_lineage_job__job_id__runs_get",
+ "summary": "Get Activation Status",
+ "description": "Get activation status for the current tenant.\n\nActivation is defined as having both:\n- Created at least one issue\n- Completed at least one investigation\n\nWithin the first 7 days of account creation.",
+ "operationId": "get_activation_status_api_v1_analytics_activation_get",
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ActivationStatusResponse"
+ }
+ }
+ }
+ }
+ },
"security": [
{
"APIKeyHeader": []
@@ -5179,37 +5489,158 @@
{
"HTTPBearer": []
}
+ ]
+ }
+ },
+ "/api/v1/analytics/activation/funnel": {
+ "get": {
+ "tags": [
+ "analytics"
],
- "parameters": [
+ "summary": "Get Activation Funnel",
+ "description": "Get activation funnel statistics.\n\nReturns aggregated funnel metrics showing:\n- Total tenants created in period\n- Tenants that created at least one issue\n- Tenants that completed at least one investigation\n- Tenants that achieved activation\n\nNote: This endpoint requires admin permissions in production.",
+ "operationId": "get_activation_funnel_api_v1_analytics_activation_funnel_get",
+ "security": [
{
- "name": "job_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string",
- "title": "Job Id"
- }
+ "APIKeyHeader": []
},
{
- "name": "limit",
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "days",
"in": "query",
"required": false,
"schema": {
"type": "integer",
- "maximum": 100,
+ "maximum": 365,
"minimum": 1,
- "description": "Maximum runs to return",
- "default": 10,
- "title": "Limit"
+ "description": "Look back period in days",
+ "default": 90,
+ "title": "Days"
},
- "description": "Maximum runs to return"
- },
- {
- "name": "provider",
- "in": "query",
- "required": false,
- "schema": {
- "type": "string",
+ "description": "Look back period in days"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ActivationFunnelResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/analytics/refresh": {
+ "post": {
+ "tags": [
+ "analytics"
+ ],
+ "summary": "Refresh Weekly Stats",
+ "description": "Refresh the weekly usage statistics materialized view.\n\nThis is typically called by a scheduled job but can be triggered manually.",
+ "operationId": "refresh_weekly_stats_api_v1_analytics_refresh_post",
+ "responses": {
+ "204": {
+ "description": "Successful Response"
+ }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/lineage/providers": {
+ "get": {
+ "tags": [
+ "lineage"
+ ],
+ "summary": "List Providers",
+ "description": "List all available lineage providers.\n\nReturns the configuration schema for each provider, which can be used\nto dynamically generate connection forms in the frontend.",
+ "operationId": "list_providers_api_v1_lineage_providers_get",
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/LineageProvidersResponse"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/lineage/upstream": {
+ "get": {
+ "tags": [
+ "lineage"
+ ],
+ "summary": "Get Upstream",
+ "description": "Get upstream (parent) datasets.\n\nReturns datasets that feed into the specified dataset.",
+ "operationId": "get_upstream_api_v1_lineage_upstream_get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "dataset",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "description": "Dataset identifier (platform://name)",
+ "title": "Dataset"
+ },
+ "description": "Dataset identifier (platform://name)"
+ },
+ {
+ "name": "depth",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 10,
+ "minimum": 1,
+ "description": "Depth of lineage traversal",
+ "default": 1,
+ "title": "Depth"
+ },
+ "description": "Depth of lineage traversal"
+ },
+ {
+ "name": "provider",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string",
"description": "Lineage provider to use",
"default": "dbt",
"title": "Provider"
@@ -5259,7 +5690,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/JobRunsResponse"
+ "$ref": "#/components/schemas/UpstreamResponse"
}
}
}
@@ -5277,14 +5708,14 @@
}
}
},
- "/api/v1/lineage/search": {
+ "/api/v1/lineage/downstream": {
"get": {
"tags": [
"lineage"
],
- "summary": "Search Datasets",
- "description": "Search for datasets by name or description.\n\nReturns datasets matching the search query.",
- "operationId": "search_datasets_api_v1_lineage_search_get",
+ "summary": "Get Downstream",
+ "description": "Get downstream (child) datasets.\n\nReturns datasets that depend on the specified dataset.",
+ "operationId": "get_downstream_api_v1_lineage_downstream_get",
"security": [
{
"APIKeyHeader": []
@@ -5295,30 +5726,29 @@
],
"parameters": [
{
- "name": "q",
+ "name": "dataset",
"in": "query",
"required": true,
"schema": {
"type": "string",
- "minLength": 1,
- "description": "Search query",
- "title": "Q"
+ "description": "Dataset identifier (platform://name)",
+ "title": "Dataset"
},
- "description": "Search query"
+ "description": "Dataset identifier (platform://name)"
},
{
- "name": "limit",
+ "name": "depth",
"in": "query",
"required": false,
"schema": {
"type": "integer",
- "maximum": 100,
+ "maximum": 10,
"minimum": 1,
- "description": "Maximum results",
- "default": 20,
- "title": "Limit"
+ "description": "Depth of lineage traversal",
+ "default": 1,
+ "title": "Depth"
},
- "description": "Maximum results"
+ "description": "Depth of lineage traversal"
},
{
"name": "provider",
@@ -5375,7 +5805,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SearchResultsResponse"
+ "$ref": "#/components/schemas/DownstreamResponse"
}
}
}
@@ -5393,14 +5823,14 @@
}
}
},
- "/api/v1/lineage/datasets": {
+ "/api/v1/lineage/graph": {
"get": {
"tags": [
"lineage"
],
- "summary": "List Datasets",
- "description": "List datasets with optional filters.\n\nReturns datasets from the lineage provider.",
- "operationId": "list_datasets_api_v1_lineage_datasets_get",
+ "summary": "Get Lineage Graph",
+ "description": "Get full lineage graph around a dataset.\n\nReturns a graph structure with datasets, edges, and jobs.",
+ "operationId": "get_lineage_graph_api_v1_lineage_graph_get",
"security": [
{
"APIKeyHeader": []
@@ -5411,72 +5841,43 @@
],
"parameters": [
{
- "name": "platform",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Filter by platform",
- "title": "Platform"
- },
- "description": "Filter by platform"
- },
- {
- "name": "database",
+ "name": "dataset",
"in": "query",
- "required": false,
+ "required": true,
"schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Filter by database",
- "title": "Database"
+ "type": "string",
+ "description": "Dataset identifier (platform://name)",
+ "title": "Dataset"
},
- "description": "Filter by database"
+ "description": "Dataset identifier (platform://name)"
},
{
- "name": "schema",
+ "name": "upstream_depth",
"in": "query",
"required": false,
"schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Filter by schema",
- "title": "Schema"
+ "type": "integer",
+ "maximum": 10,
+ "minimum": 0,
+ "description": "Upstream traversal depth",
+ "default": 3,
+ "title": "Upstream Depth"
},
- "description": "Filter by schema"
+ "description": "Upstream traversal depth"
},
{
- "name": "limit",
+ "name": "downstream_depth",
"in": "query",
"required": false,
"schema": {
"type": "integer",
- "maximum": 1000,
- "minimum": 1,
- "description": "Maximum results",
- "default": 100,
- "title": "Limit"
+ "maximum": 10,
+ "minimum": 0,
+ "description": "Downstream traversal depth",
+ "default": 3,
+ "title": "Downstream Depth"
},
- "description": "Maximum results"
+ "description": "Downstream traversal depth"
},
{
"name": "provider",
@@ -5533,7 +5934,1270 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SearchResultsResponse"
+ "$ref": "#/components/schemas/dataing__entrypoints__api__routes__lineage__LineageGraphResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/lineage/column-lineage": {
+ "get": {
+ "tags": [
+ "lineage"
+ ],
+ "summary": "Get Column Lineage",
+ "description": "Get column-level lineage.\n\nReturns the source columns that feed into the specified column.\nNot all providers support column lineage.",
+ "operationId": "get_column_lineage_api_v1_lineage_column_lineage_get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "dataset",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "description": "Dataset identifier (platform://name)",
+ "title": "Dataset"
+ },
+ "description": "Dataset identifier (platform://name)"
+ },
+ {
+ "name": "column",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "description": "Column name to trace",
+ "title": "Column"
+ },
+ "description": "Column name to trace"
+ },
+ {
+ "name": "provider",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string",
+ "description": "Lineage provider to use",
+ "default": "dbt",
+ "title": "Provider"
+ },
+ "description": "Lineage provider to use"
+ },
+ {
+ "name": "manifest_path",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Path to dbt manifest.json",
+ "title": "Manifest Path"
+ },
+ "description": "Path to dbt manifest.json"
+ },
+ {
+ "name": "base_url",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Base URL for API-based providers",
+ "title": "Base Url"
+ },
+ "description": "Base URL for API-based providers"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ColumnLineageListResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/lineage/job/{job_id}": {
+ "get": {
+ "tags": [
+ "lineage"
+ ],
+ "summary": "Get Job",
+ "description": "Get job details.\n\nReturns information about a job that produces or consumes datasets.",
+ "operationId": "get_job_api_v1_lineage_job__job_id__get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "job_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "title": "Job Id"
+ }
+ },
+ {
+ "name": "provider",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string",
+ "description": "Lineage provider to use",
+ "default": "dbt",
+ "title": "Provider"
+ },
+ "description": "Lineage provider to use"
+ },
+ {
+ "name": "manifest_path",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Path to dbt manifest.json",
+ "title": "Manifest Path"
+ },
+ "description": "Path to dbt manifest.json"
+ },
+ {
+ "name": "base_url",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Base URL for API-based providers",
+ "title": "Base Url"
+ },
+ "description": "Base URL for API-based providers"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/JobResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/lineage/job/{job_id}/runs": {
+ "get": {
+ "tags": [
+ "lineage"
+ ],
+ "summary": "Get Job Runs",
+ "description": "Get recent runs of a job.\n\nReturns execution history for the specified job.",
+ "operationId": "get_job_runs_api_v1_lineage_job__job_id__runs_get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "job_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "title": "Job Id"
+ }
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 100,
+ "minimum": 1,
+ "description": "Maximum runs to return",
+ "default": 10,
+ "title": "Limit"
+ },
+ "description": "Maximum runs to return"
+ },
+ {
+ "name": "provider",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string",
+ "description": "Lineage provider to use",
+ "default": "dbt",
+ "title": "Provider"
+ },
+ "description": "Lineage provider to use"
+ },
+ {
+ "name": "manifest_path",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Path to dbt manifest.json",
+ "title": "Manifest Path"
+ },
+ "description": "Path to dbt manifest.json"
+ },
+ {
+ "name": "base_url",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Base URL for API-based providers",
+ "title": "Base Url"
+ },
+ "description": "Base URL for API-based providers"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/JobRunsResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/lineage/search": {
+ "get": {
+ "tags": [
+ "lineage"
+ ],
+ "summary": "Search Datasets",
+ "description": "Search for datasets by name or description.\n\nReturns datasets matching the search query.",
+ "operationId": "search_datasets_api_v1_lineage_search_get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "q",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "minLength": 1,
+ "description": "Search query",
+ "title": "Q"
+ },
+ "description": "Search query"
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 100,
+ "minimum": 1,
+ "description": "Maximum results",
+ "default": 20,
+ "title": "Limit"
+ },
+ "description": "Maximum results"
+ },
+ {
+ "name": "provider",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string",
+ "description": "Lineage provider to use",
+ "default": "dbt",
+ "title": "Provider"
+ },
+ "description": "Lineage provider to use"
+ },
+ {
+ "name": "manifest_path",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Path to dbt manifest.json",
+ "title": "Manifest Path"
+ },
+ "description": "Path to dbt manifest.json"
+ },
+ {
+ "name": "base_url",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Base URL for API-based providers",
+ "title": "Base Url"
+ },
+ "description": "Base URL for API-based providers"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/SearchResultsResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/lineage/datasets": {
+ "get": {
+ "tags": [
+ "lineage"
+ ],
+ "summary": "List Datasets",
+ "description": "List datasets with optional filters.\n\nReturns datasets from the lineage provider.",
+ "operationId": "list_datasets_api_v1_lineage_datasets_get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "platform",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Filter by platform",
+ "title": "Platform"
+ },
+ "description": "Filter by platform"
+ },
+ {
+ "name": "database",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Filter by database",
+ "title": "Database"
+ },
+ "description": "Filter by database"
+ },
+ {
+ "name": "schema",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Filter by schema",
+ "title": "Schema"
+ },
+ "description": "Filter by schema"
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 1000,
+ "minimum": 1,
+ "description": "Maximum results",
+ "default": 100,
+ "title": "Limit"
+ },
+ "description": "Maximum results"
+ },
+ {
+ "name": "provider",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string",
+ "description": "Lineage provider to use",
+ "default": "dbt",
+ "title": "Provider"
+ },
+ "description": "Lineage provider to use"
+ },
+ {
+ "name": "manifest_path",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Path to dbt manifest.json",
+ "title": "Manifest Path"
+ },
+ "description": "Path to dbt manifest.json"
+ },
+ {
+ "name": "base_url",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Base URL for API-based providers",
+ "title": "Base Url"
+ },
+ "description": "Base URL for API-based providers"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/SearchResultsResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/lineage/dataset/{dataset_id}": {
+ "get": {
+ "tags": [
+ "lineage"
+ ],
+ "summary": "Get Dataset",
+ "description": "Get dataset details.\n\nReturns metadata for a specific dataset.",
+ "operationId": "get_dataset_api_v1_lineage_dataset__dataset_id__get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "dataset_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "title": "Dataset Id"
+ }
+ },
+ {
+ "name": "provider",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string",
+ "description": "Lineage provider to use",
+ "default": "dbt",
+ "title": "Provider"
+ },
+ "description": "Lineage provider to use"
+ },
+ {
+ "name": "manifest_path",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Path to dbt manifest.json",
+ "title": "Manifest Path"
+ },
+ "description": "Path to dbt manifest.json"
+ },
+ {
+ "name": "base_url",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Base URL for API-based providers",
+ "title": "Base Url"
+ },
+ "description": "Base URL for API-based providers"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/DatasetResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/context/bundles": {
+ "post": {
+ "tags": [
+ "context"
+ ],
+ "summary": "Create Bundle",
+ "description": "Create a context bundle for the given assets.\n\nThis endpoint resolves asset references to concrete datasources,\ngathers context (lineage, operational facts, anomalies), and returns\na cacheable bundle.\n\nIf any asset reference is ambiguous (matches multiple datasources),\nreturns 409 with candidates and a hint for disambiguation.\n\nThe response includes:\n- bundle_id: Unique identifier for this bundle\n- resolved_assets: Per-asset resolution with datasource binding\n- bundle_hash: Server-computed cache key (also returned as ETag header)\n- Context data (lineage, operational, anomalies) based on request flags",
+ "operationId": "create_bundle_api_v1_context_bundles_post",
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/CreateBundleRequest"
+ }
+ }
+ },
+ "required": true
+ },
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ContextBundleResponse"
+ }
+ }
+ }
+ },
+ "409": {
+ "description": "Ambiguous assets - multiple datasources match",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/AmbiguousAssetsResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/context/bundles/{bundle_id}": {
+ "get": {
+ "tags": [
+ "context"
+ ],
+ "summary": "Get Bundle",
+ "description": "Get an existing context bundle by ID.\n\nReturns the bundle if found and belongs to the authenticated tenant.",
+ "operationId": "get_bundle_api_v1_context_bundles__bundle_id__get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "bundle_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "title": "Bundle Id"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ContextBundleResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/context/diff": {
+ "post": {
+ "tags": [
+ "context"
+ ],
+ "summary": "Compute Diff",
+ "description": "Compute metric difference over a time window.\n\nCompares the current value of a metric to its historical value\nbased on the specified time window.\n\nSupported metrics:\n- row_count: Number of rows in the dataset\n- null_rate: Percentage of null values\n- distinct_count: Number of distinct values\n- freshness: Time since last update\n\nReturns the current and previous values, absolute delta,\npercentage change, and trend direction.",
+ "operationId": "compute_diff_api_v1_context_diff_post",
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/DiffRequest"
+ }
+ }
+ },
+ "required": true
+ },
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/DiffResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/context/explain": {
+ "post": {
+ "tags": [
+ "context"
+ ],
+ "summary": "Explain Context",
+ "description": "Get an AI-powered explanation of the context bundle.\n\nAnalyzes the assets, lineage, and anomalies in the bundle\nand provides a natural language explanation with insights\nand recommendations.\n\nFocus areas:\n- anomalies: Focus on detected data quality issues\n- lineage: Focus on data dependencies and flow\n- data_quality: General data quality assessment",
+ "operationId": "explain_context_api_v1_context_explain_post",
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ExplainRequest"
+ }
+ }
+ },
+ "required": true
+ },
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ExplainResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/notifications": {
+ "get": {
+ "tags": [
+ "notifications"
+ ],
+ "summary": "List Notifications",
+ "description": "List notifications for the current user.\n\nUses cursor-based pagination for efficient traversal.\nCursor format: base64(created_at|id)",
+ "operationId": "list_notifications_api_v1_notifications_get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 100,
+ "minimum": 1,
+ "description": "Max notifications to return",
+ "default": 50,
+ "title": "Limit"
+ },
+ "description": "Max notifications to return"
+ },
+ {
+ "name": "cursor",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Pagination cursor",
+ "title": "Cursor"
+ },
+ "description": "Pagination cursor"
+ },
+ {
+ "name": "unread_only",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "boolean",
+ "description": "Only return unread notifications",
+ "default": false,
+ "title": "Unread Only"
+ },
+ "description": "Only return unread notifications"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/NotificationListResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/notifications/{notification_id}/read": {
+ "put": {
+ "tags": [
+ "notifications"
+ ],
+ "summary": "Mark Notification Read",
+ "description": "Mark a notification as read.\n\nIdempotent - returns 204 even if already read.\nReturns 404 if notification doesn't exist or belongs to another tenant.",
+ "operationId": "mark_notification_read_api_v1_notifications__notification_id__read_put",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "notification_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Notification Id"
+ }
+ }
+ ],
+ "responses": {
+ "204": {
+ "description": "Successful Response"
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/notifications/read-all": {
+ "post": {
+ "tags": [
+ "notifications"
+ ],
+ "summary": "Mark All Notifications Read",
+ "description": "Mark all notifications as read for the current user.\n\nReturns count of notifications marked and a cursor pointing to\nthe newest marked notification for resumability.",
+ "operationId": "mark_all_notifications_read_api_v1_notifications_read_all_post",
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/MarkAllReadResponse"
+ }
+ }
+ }
+ }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/notifications/unread-count": {
+ "get": {
+ "tags": [
+ "notifications"
+ ],
+ "summary": "Get Unread Count",
+ "description": "Get count of unread notifications for the current user.",
+ "operationId": "get_unread_count_api_v1_notifications_unread_count_get",
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/UnreadCountResponse"
+ }
+ }
+ }
+ }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/notifications/stream": {
+ "get": {
+ "tags": [
+ "notifications"
+ ],
+ "summary": "Notification Stream",
+ "description": "Stream real-time notifications via Server-Sent Events.\n\nBrowser EventSource can't send headers, so JWT is accepted via query param.\nThe auth middleware already handles `?token=` for SSE endpoints.\n\nEvents:\n- `notification`: New notification (includes cursor for resume)\n- `heartbeat`: Keep-alive every 30 seconds\n\nExample:\n GET /notifications/stream?token=&after=\n\nReturns:\n EventSourceResponse with SSE stream.",
+ "operationId": "notification_stream_api_v1_notifications_stream_get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "after",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Resume from notification ID (for reconnect)",
+ "title": "After"
+ },
+ "description": "Resume from notification ID (for reconnect)"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {}
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/v1/investigation-feedback/": {
+ "post": {
+ "tags": [
+ "investigation-feedback"
+ ],
+ "summary": "Submit Feedback",
+ "description": "Submit feedback on a hypothesis, query, evidence, synthesis, or investigation.",
+ "operationId": "submit_feedback_api_v1_investigation_feedback__post",
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/FeedbackCreate"
+ }
+ }
+ },
+ "required": true
+ },
+ "responses": {
+ "201": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/FeedbackResponse"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/investigation-feedback/investigations/{investigation_id}": {
+ "get": {
+ "tags": [
+ "investigation-feedback"
+ ],
+ "summary": "Get Investigation Feedback",
+ "description": "Get current user's feedback for an investigation.\n\nArgs:\n investigation_id: The investigation to get feedback for.\n auth: Authentication context.\n db: Application database.\n\nReturns:\n List of feedback items for the investigation.",
+ "operationId": "get_investigation_feedback_api_v1_investigation_feedback_investigations__investigation_id__get",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "investigation_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Investigation Id"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/FeedbackItem"
+ },
+ "title": "Response Get Investigation Feedback Api V1 Investigation Feedback Investigations Investigation Id Get"
}
}
}
@@ -5551,14 +7215,14 @@
}
}
},
- "/api/v1/lineage/dataset/{dataset_id}": {
+ "/api/v1/datasets/{dataset_id}/schema-comments": {
"get": {
"tags": [
- "lineage"
+ "schema-comments"
],
- "summary": "Get Dataset",
- "description": "Get dataset details.\n\nReturns metadata for a specific dataset.",
- "operationId": "get_dataset_api_v1_lineage_dataset__dataset_id__get",
+ "summary": "List Schema Comments",
+ "description": "List schema comments for a dataset.",
+ "operationId": "list_schema_comments_api_v1_datasets__dataset_id__schema_comments_get",
"security": [
{
"APIKeyHeader": []
@@ -5574,41 +7238,12 @@
"required": true,
"schema": {
"type": "string",
+ "format": "uuid",
"title": "Dataset Id"
}
},
{
- "name": "provider",
- "in": "query",
- "required": false,
- "schema": {
- "type": "string",
- "description": "Lineage provider to use",
- "default": "dbt",
- "title": "Provider"
- },
- "description": "Lineage provider to use"
- },
- {
- "name": "manifest_path",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Path to dbt manifest.json",
- "title": "Manifest Path"
- },
- "description": "Path to dbt manifest.json"
- },
- {
- "name": "base_url",
+ "name": "field_name",
"in": "query",
"required": false,
"schema": {
@@ -5620,10 +7255,8 @@
"type": "null"
}
],
- "description": "Base URL for API-based providers",
- "title": "Base Url"
- },
- "description": "Base URL for API-based providers"
+ "title": "Field Name"
+ }
}
],
"responses": {
@@ -5632,7 +7265,11 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DatasetResponse"
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/SchemaCommentResponse"
+ },
+ "title": "Response List Schema Comments Api V1 Datasets Dataset Id Schema Comments Get"
}
}
}
@@ -5648,43 +7285,51 @@
}
}
}
- }
- },
- "/api/v1/context/bundles": {
+ },
"post": {
"tags": [
- "context"
+ "schema-comments"
+ ],
+ "summary": "Create Schema Comment",
+ "description": "Create a schema comment.",
+ "operationId": "create_schema_comment_api_v1_datasets__dataset_id__schema_comments_post",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "dataset_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Dataset Id"
+ }
+ }
],
- "summary": "Create Bundle",
- "description": "Create a context bundle for the given assets.\n\nThis endpoint resolves asset references to concrete datasources,\ngathers context (lineage, operational facts, anomalies), and returns\na cacheable bundle.\n\nIf any asset reference is ambiguous (matches multiple datasources),\nreturns 409 with candidates and a hint for disambiguation.\n\nThe response includes:\n- bundle_id: Unique identifier for this bundle\n- resolved_assets: Per-asset resolution with datasource binding\n- bundle_hash: Server-computed cache key (also returned as ETag header)\n- Context data (lineage, operational, anomalies) based on request flags",
- "operationId": "create_bundle_api_v1_context_bundles_post",
"requestBody": {
+ "required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/CreateBundleRequest"
+ "$ref": "#/components/schemas/SchemaCommentCreate"
}
}
- },
- "required": true
+ }
},
"responses": {
- "200": {
+ "201": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ContextBundleResponse"
- }
- }
- }
- },
- "409": {
- "description": "Ambiguous assets - multiple datasources match",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/AmbiguousAssetsResponse"
+ "$ref": "#/components/schemas/SchemaCommentResponse"
}
}
}
@@ -5699,25 +7344,17 @@
}
}
}
- },
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
- ]
+ }
}
},
- "/api/v1/context/bundles/{bundle_id}": {
- "get": {
+ "/api/v1/datasets/{dataset_id}/schema-comments/{comment_id}": {
+ "patch": {
"tags": [
- "context"
+ "schema-comments"
],
- "summary": "Get Bundle",
- "description": "Get an existing context bundle by ID.\n\nReturns the bundle if found and belongs to the authenticated tenant.",
- "operationId": "get_bundle_api_v1_context_bundles__bundle_id__get",
+ "summary": "Update Schema Comment",
+ "description": "Update a schema comment.",
+ "operationId": "update_schema_comment_api_v1_datasets__dataset_id__schema_comments__comment_id__patch",
"security": [
{
"APIKeyHeader": []
@@ -5728,22 +7365,43 @@
],
"parameters": [
{
- "name": "bundle_id",
+ "name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
- "title": "Bundle Id"
+ "format": "uuid",
+ "title": "Dataset Id"
+ }
+ },
+ {
+ "name": "comment_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Comment Id"
}
}
],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/SchemaCommentUpdate"
+ }
+ }
+ }
+ },
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ContextBundleResponse"
+ "$ref": "#/components/schemas/SchemaCommentResponse"
}
}
}
@@ -5759,33 +7417,100 @@
}
}
}
- }
- },
- "/api/v1/context/diff": {
- "post": {
+ },
+ "delete": {
"tags": [
- "context"
+ "schema-comments"
],
- "summary": "Compute Diff",
- "description": "Compute metric difference over a time window.\n\nCompares the current value of a metric to its historical value\nbased on the specified time window.\n\nSupported metrics:\n- row_count: Number of rows in the dataset\n- null_rate: Percentage of null values\n- distinct_count: Number of distinct values\n- freshness: Time since last update\n\nReturns the current and previous values, absolute delta,\npercentage change, and trend direction.",
- "operationId": "compute_diff_api_v1_context_diff_post",
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/DiffRequest"
+ "summary": "Delete Schema Comment",
+ "description": "Delete a schema comment.",
+ "operationId": "delete_schema_comment_api_v1_datasets__dataset_id__schema_comments__comment_id__delete",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "dataset_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Dataset Id"
+ }
+ },
+ {
+ "name": "comment_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Comment Id"
+ }
+ }
+ ],
+ "responses": {
+ "204": {
+ "description": "Successful Response"
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
}
}
+ }
+ }
+ }
+ },
+ "/api/v1/datasets/{dataset_id}/knowledge-comments": {
+ "get": {
+ "tags": [
+ "knowledge-comments"
+ ],
+ "summary": "List Knowledge Comments",
+ "description": "List knowledge comments for a dataset.",
+ "operationId": "list_knowledge_comments_api_v1_datasets__dataset_id__knowledge_comments_get",
+ "security": [
+ {
+ "APIKeyHeader": []
},
- "required": true
- },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "dataset_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Dataset Id"
+ }
+ }
+ ],
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DiffResponse"
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/KnowledgeCommentResponse"
+ },
+ "title": "Response List Knowledge Comments Api V1 Datasets Dataset Id Knowledge Comments Get"
}
}
}
@@ -5800,7 +7525,15 @@
}
}
}
- },
+ }
+ },
+ "post": {
+ "tags": [
+ "knowledge-comments"
+ ],
+ "summary": "Create Knowledge Comment",
+ "description": "Create a knowledge comment.",
+ "operationId": "create_knowledge_comment_api_v1_datasets__dataset_id__knowledge_comments_post",
"security": [
{
"APIKeyHeader": []
@@ -5808,34 +7541,36 @@
{
"HTTPBearer": []
}
- ]
- }
- },
- "/api/v1/context/explain": {
- "post": {
- "tags": [
- "context"
],
- "summary": "Explain Context",
- "description": "Get an AI-powered explanation of the context bundle.\n\nAnalyzes the assets, lineage, and anomalies in the bundle\nand provides a natural language explanation with insights\nand recommendations.\n\nFocus areas:\n- anomalies: Focus on detected data quality issues\n- lineage: Focus on data dependencies and flow\n- data_quality: General data quality assessment",
- "operationId": "explain_context_api_v1_context_explain_post",
+ "parameters": [
+ {
+ "name": "dataset_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Dataset Id"
+ }
+ }
+ ],
"requestBody": {
+ "required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ExplainRequest"
+ "$ref": "#/components/schemas/KnowledgeCommentCreate"
}
}
- },
- "required": true
+ }
},
"responses": {
- "200": {
+ "201": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/ExplainResponse"
+ "$ref": "#/components/schemas/KnowledgeCommentResponse"
}
}
}
@@ -5850,25 +7585,17 @@
}
}
}
- },
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
- ]
+ }
}
},
- "/api/v1/notifications": {
- "get": {
+ "/api/v1/datasets/{dataset_id}/knowledge-comments/{comment_id}": {
+ "patch": {
"tags": [
- "notifications"
+ "knowledge-comments"
],
- "summary": "List Notifications",
- "description": "List notifications for the current user.\n\nUses cursor-based pagination for efficient traversal.\nCursor format: base64(created_at|id)",
- "operationId": "list_notifications_api_v1_notifications_get",
+ "summary": "Update Knowledge Comment",
+ "description": "Update a knowledge comment.",
+ "operationId": "update_knowledge_comment_api_v1_datasets__dataset_id__knowledge_comments__comment_id__patch",
"security": [
{
"APIKeyHeader": []
@@ -5879,57 +7606,43 @@
],
"parameters": [
{
- "name": "limit",
- "in": "query",
- "required": false,
- "schema": {
- "type": "integer",
- "maximum": 100,
- "minimum": 1,
- "description": "Max notifications to return",
- "default": 50,
- "title": "Limit"
- },
- "description": "Max notifications to return"
- },
- {
- "name": "cursor",
- "in": "query",
- "required": false,
+ "name": "dataset_id",
+ "in": "path",
+ "required": true,
"schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Pagination cursor",
- "title": "Cursor"
- },
- "description": "Pagination cursor"
+ "type": "string",
+ "format": "uuid",
+ "title": "Dataset Id"
+ }
},
{
- "name": "unread_only",
- "in": "query",
- "required": false,
+ "name": "comment_id",
+ "in": "path",
+ "required": true,
"schema": {
- "type": "boolean",
- "description": "Only return unread notifications",
- "default": false,
- "title": "Unread Only"
- },
- "description": "Only return unread notifications"
+ "type": "string",
+ "format": "uuid",
+ "title": "Comment Id"
+ }
}
],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/KnowledgeCommentUpdate"
+ }
+ }
+ }
+ },
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/NotificationListResponse"
+ "$ref": "#/components/schemas/KnowledgeCommentResponse"
}
}
}
@@ -5945,16 +7658,14 @@
}
}
}
- }
- },
- "/api/v1/notifications/{notification_id}/read": {
- "put": {
+ },
+ "delete": {
"tags": [
- "notifications"
+ "knowledge-comments"
],
- "summary": "Mark Notification Read",
- "description": "Mark a notification as read.\n\nIdempotent - returns 204 even if already read.\nReturns 404 if notification doesn't exist or belongs to another tenant.",
- "operationId": "mark_notification_read_api_v1_notifications__notification_id__read_put",
+ "summary": "Delete Knowledge Comment",
+ "description": "Delete a knowledge comment.",
+ "operationId": "delete_knowledge_comment_api_v1_datasets__dataset_id__knowledge_comments__comment_id__delete",
"security": [
{
"APIKeyHeader": []
@@ -5965,13 +7676,23 @@
],
"parameters": [
{
- "name": "notification_id",
+ "name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Notification Id"
+ "title": "Dataset Id"
+ }
+ },
+ {
+ "name": "comment_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Comment Id"
}
}
],
@@ -5992,74 +7713,14 @@
}
}
},
- "/api/v1/notifications/read-all": {
+ "/api/v1/comments/{comment_type}/{comment_id}/vote": {
"post": {
"tags": [
- "notifications"
- ],
- "summary": "Mark All Notifications Read",
- "description": "Mark all notifications as read for the current user.\n\nReturns count of notifications marked and a cursor pointing to\nthe newest marked notification for resumability.",
- "operationId": "mark_all_notifications_read_api_v1_notifications_read_all_post",
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/MarkAllReadResponse"
- }
- }
- }
- }
- },
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
- ]
- }
- },
- "/api/v1/notifications/unread-count": {
- "get": {
- "tags": [
- "notifications"
- ],
- "summary": "Get Unread Count",
- "description": "Get count of unread notifications for the current user.",
- "operationId": "get_unread_count_api_v1_notifications_unread_count_get",
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/UnreadCountResponse"
- }
- }
- }
- }
- },
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
- ]
- }
- },
- "/api/v1/notifications/stream": {
- "get": {
- "tags": [
- "notifications"
+ "comment-votes"
],
- "summary": "Notification Stream",
- "description": "Stream real-time notifications via Server-Sent Events.\n\nBrowser EventSource can't send headers, so JWT is accepted via query param.\nThe auth middleware already handles `?token=` for SSE endpoints.\n\nEvents:\n- `notification`: New notification (includes cursor for resume)\n- `heartbeat`: Keep-alive every 30 seconds\n\nExample:\n GET /notifications/stream?token=&after=\n\nReturns:\n EventSourceResponse with SSE stream.",
- "operationId": "notification_stream_api_v1_notifications_stream_get",
+ "summary": "Vote On Comment",
+ "description": "Vote on a comment.",
+ "operationId": "vote_on_comment_api_v1_comments__comment_type___comment_id__vote_post",
"security": [
{
"APIKeyHeader": []
@@ -6070,32 +7731,42 @@
],
"parameters": [
{
- "name": "after",
- "in": "query",
- "required": false,
+ "name": "comment_type",
+ "in": "path",
+ "required": true,
"schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
+ "enum": [
+ "schema",
+ "knowledge"
],
- "description": "Resume from notification ID (for reconnect)",
- "title": "After"
- },
- "description": "Resume from notification ID (for reconnect)"
+ "type": "string",
+ "title": "Comment Type"
+ }
+ },
+ {
+ "name": "comment_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Comment Id"
+ }
}
],
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {}
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/VoteCreate"
}
}
+ }
+ },
+ "responses": {
+ "204": {
+ "description": "Successful Response"
},
"422": {
"description": "Validation Error",
@@ -6108,36 +7779,50 @@
}
}
}
- }
- },
- "/api/v1/investigation-feedback/": {
- "post": {
+ },
+ "delete": {
"tags": [
- "investigation-feedback"
+ "comment-votes"
],
- "summary": "Submit Feedback",
- "description": "Submit feedback on a hypothesis, query, evidence, synthesis, or investigation.",
- "operationId": "submit_feedback_api_v1_investigation_feedback__post",
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/FeedbackCreate"
- }
+ "summary": "Remove Vote",
+ "description": "Remove vote from a comment.",
+ "operationId": "remove_vote_api_v1_comments__comment_type___comment_id__vote_delete",
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "comment_type",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "enum": [
+ "schema",
+ "knowledge"
+ ],
+ "type": "string",
+ "title": "Comment Type"
}
},
- "required": true
- },
- "responses": {
- "201": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/FeedbackResponse"
- }
- }
+ {
+ "name": "comment_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Comment Id"
}
+ }
+ ],
+ "responses": {
+ "204": {
+ "description": "Successful Response"
},
"422": {
"description": "Validation Error",
@@ -6149,25 +7834,17 @@
}
}
}
- },
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
- ]
+ }
}
},
- "/api/v1/investigation-feedback/investigations/{investigation_id}": {
+ "/api/v1/sla-policies": {
"get": {
"tags": [
- "investigation-feedback"
+ "sla-policies"
],
- "summary": "Get Investigation Feedback",
- "description": "Get current user's feedback for an investigation.\n\nArgs:\n investigation_id: The investigation to get feedback for.\n auth: Authentication context.\n db: Application database.\n\nReturns:\n List of feedback items for the investigation.",
- "operationId": "get_investigation_feedback_api_v1_investigation_feedback_investigations__investigation_id__get",
+ "summary": "List Sla Policies",
+ "description": "List SLA policies for the tenant.",
+ "operationId": "list_sla_policies_api_v1_sla_policies_get",
"security": [
{
"APIKeyHeader": []
@@ -6178,14 +7855,16 @@
],
"parameters": [
{
- "name": "investigation_id",
- "in": "path",
- "required": true,
+ "name": "include_default",
+ "in": "query",
+ "required": false,
"schema": {
- "type": "string",
- "format": "uuid",
- "title": "Investigation Id"
- }
+ "type": "boolean",
+ "description": "Include default policy",
+ "default": true,
+ "title": "Include Default"
+ },
+ "description": "Include default policy"
}
],
"responses": {
@@ -6194,11 +7873,7 @@
"content": {
"application/json": {
"schema": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/FeedbackItem"
- },
- "title": "Response Get Investigation Feedback Api V1 Investigation Feedback Investigations Investigation Id Get"
+ "$ref": "#/components/schemas/SLAPolicyListResponse"
}
}
}
@@ -6214,16 +7889,14 @@
}
}
}
- }
- },
- "/api/v1/datasets/{dataset_id}/schema-comments": {
- "get": {
+ },
+ "post": {
"tags": [
- "schema-comments"
+ "sla-policies"
],
- "summary": "List Schema Comments",
- "description": "List schema comments for a dataset.",
- "operationId": "list_schema_comments_api_v1_datasets__dataset_id__schema_comments_get",
+ "summary": "Create Sla Policy",
+ "description": "Create a new SLA policy.\n\nRequires admin scope. If is_default is true, clears any existing default.",
+ "operationId": "create_sla_policy_api_v1_sla_policies_post",
"security": [
{
"APIKeyHeader": []
@@ -6232,45 +7905,23 @@
"HTTPBearer": []
}
],
- "parameters": [
- {
- "name": "dataset_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string",
- "format": "uuid",
- "title": "Dataset Id"
- }
- },
- {
- "name": "field_name",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "title": "Field Name"
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/SLAPolicyCreate"
+ }
}
}
- ],
+ },
"responses": {
- "200": {
+ "201": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/SchemaCommentResponse"
- },
- "title": "Response List Schema Comments Api V1 Datasets Dataset Id Schema Comments Get"
+ "$ref": "#/components/schemas/SLAPolicyResponse"
}
}
}
@@ -6286,14 +7937,54 @@
}
}
}
- },
- "post": {
+ }
+ },
+ "/api/v1/sla-policies/default": {
+ "get": {
"tags": [
- "schema-comments"
+ "sla-policies"
],
- "summary": "Create Schema Comment",
- "description": "Create a schema comment.",
- "operationId": "create_schema_comment_api_v1_datasets__dataset_id__schema_comments_post",
+ "summary": "Get Default Sla Policy",
+ "description": "Get the default SLA policy for the tenant.\n\nReturns None if no default policy is configured.",
+ "operationId": "get_default_sla_policy_api_v1_sla_policies_default_get",
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "anyOf": [
+ {
+ "$ref": "#/components/schemas/SLAPolicyResponse"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Response Get Default Sla Policy Api V1 Sla Policies Default Get"
+ }
+ }
+ }
+ }
+ },
+ "security": [
+ {
+ "APIKeyHeader": []
+ },
+ {
+ "HTTPBearer": []
+ }
+ ]
+ }
+ },
+ "/api/v1/sla-policies/{policy_id}": {
+ "get": {
+ "tags": [
+ "sla-policies"
+ ],
+ "summary": "Get Sla Policy",
+ "description": "Get an SLA policy by ID.",
+ "operationId": "get_sla_policy_api_v1_sla_policies__policy_id__get",
"security": [
{
"APIKeyHeader": []
@@ -6304,33 +7995,23 @@
],
"parameters": [
{
- "name": "dataset_id",
+ "name": "policy_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Dataset Id"
+ "title": "Policy Id"
}
}
],
- "requestBody": {
- "required": true,
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/SchemaCommentCreate"
- }
- }
- }
- },
"responses": {
- "201": {
+ "200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SchemaCommentResponse"
+ "$ref": "#/components/schemas/SLAPolicyResponse"
}
}
}
@@ -6346,16 +8027,14 @@
}
}
}
- }
- },
- "/api/v1/datasets/{dataset_id}/schema-comments/{comment_id}": {
+ },
"patch": {
"tags": [
- "schema-comments"
+ "sla-policies"
],
- "summary": "Update Schema Comment",
- "description": "Update a schema comment.",
- "operationId": "update_schema_comment_api_v1_datasets__dataset_id__schema_comments__comment_id__patch",
+ "summary": "Update Sla Policy",
+ "description": "Update an SLA policy.\n\nRequires admin scope. If is_default is set to true, clears any existing default.",
+ "operationId": "update_sla_policy_api_v1_sla_policies__policy_id__patch",
"security": [
{
"APIKeyHeader": []
@@ -6366,23 +8045,13 @@
],
"parameters": [
{
- "name": "dataset_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string",
- "format": "uuid",
- "title": "Dataset Id"
- }
- },
- {
- "name": "comment_id",
+ "name": "policy_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Comment Id"
+ "title": "Policy Id"
}
}
],
@@ -6391,7 +8060,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SchemaCommentUpdate"
+ "$ref": "#/components/schemas/SLAPolicyUpdate"
}
}
}
@@ -6402,7 +8071,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SchemaCommentResponse"
+ "$ref": "#/components/schemas/SLAPolicyResponse"
}
}
}
@@ -6421,11 +8090,11 @@
},
"delete": {
"tags": [
- "schema-comments"
+ "sla-policies"
],
- "summary": "Delete Schema Comment",
- "description": "Delete a schema comment.",
- "operationId": "delete_schema_comment_api_v1_datasets__dataset_id__schema_comments__comment_id__delete",
+ "summary": "Delete Sla Policy",
+ "description": "Delete an SLA policy.\n\nRequires admin scope. Issues using this policy will have sla_policy_id set to NULL.",
+ "operationId": "delete_sla_policy_api_v1_sla_policies__policy_id__delete",
"security": [
{
"APIKeyHeader": []
@@ -6436,23 +8105,13 @@
],
"parameters": [
{
- "name": "dataset_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string",
- "format": "uuid",
- "title": "Dataset Id"
- }
- },
- {
- "name": "comment_id",
+ "name": "policy_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Comment Id"
+ "title": "Policy Id"
}
}
],
@@ -6473,14 +8132,14 @@
}
}
},
- "/api/v1/datasets/{dataset_id}/knowledge-comments": {
- "get": {
+ "/api/v1/integrations/webhook-generic": {
+ "post": {
"tags": [
- "knowledge-comments"
+ "integrations"
],
- "summary": "List Knowledge Comments",
- "description": "List knowledge comments for a dataset.",
- "operationId": "list_knowledge_comments_api_v1_datasets__dataset_id__knowledge_comments_get",
+ "summary": "Receive Generic Webhook",
+ "description": "Receive a generic webhook to create an issue.\n\nThis endpoint allows external systems to create issues via HTTP webhook.\nRequests must be signed with HMAC-SHA256 using the shared secret.\n\nOptional JSON Schema validation can be configured via:\n- WEBHOOK_JSON_SCHEMA environment variable (inline JSON schema)\n- X-JSON-Schema header (base64-encoded JSON schema, overrides env config)\n\nIdempotency: If source_provider and source_external_id are provided,\nduplicate webhooks will return the existing issue instead of creating\na new one.",
+ "operationId": "receive_generic_webhook_api_v1_integrations_webhook_generic_post",
"security": [
{
"APIKeyHeader": []
@@ -6491,27 +8150,47 @@
],
"parameters": [
{
- "name": "dataset_id",
- "in": "path",
- "required": true,
+ "name": "x-webhook-signature",
+ "in": "header",
+ "required": false,
"schema": {
- "type": "string",
- "format": "uuid",
- "title": "Dataset Id"
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "X-Webhook-Signature"
}
+ },
+ {
+ "name": "x-json-schema",
+ "in": "header",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "description": "Base64-encoded JSON Schema",
+ "title": "X-Json-Schema"
+ },
+ "description": "Base64-encoded JSON Schema"
}
],
"responses": {
- "200": {
+ "201": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/KnowledgeCommentResponse"
- },
- "title": "Response List Knowledge Comments Api V1 Datasets Dataset Id Knowledge Comments Get"
+ "$ref": "#/components/schemas/WebhookIssueResponse"
}
}
}
@@ -6527,14 +8206,16 @@
}
}
}
- },
+ }
+ },
+ "/api/v1/dataset-repo-mappings": {
"post": {
"tags": [
- "knowledge-comments"
+ "dataset-repo-mappings"
],
- "summary": "Create Knowledge Comment",
- "description": "Create a knowledge comment.",
- "operationId": "create_knowledge_comment_api_v1_datasets__dataset_id__knowledge_comments_post",
+ "summary": "Create Repo Mapping",
+ "description": "Create a dataset-to-repository mapping.",
+ "operationId": "create_repo_mapping_api_v1_dataset_repo_mappings_post",
"security": [
{
"APIKeyHeader": []
@@ -6543,35 +8224,23 @@
"HTTPBearer": []
}
],
- "parameters": [
- {
- "name": "dataset_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string",
- "format": "uuid",
- "title": "Dataset Id"
- }
- }
- ],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/KnowledgeCommentCreate"
+ "$ref": "#/components/schemas/CreateRepoMappingRequest"
}
}
}
},
"responses": {
- "201": {
+ "200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/KnowledgeCommentResponse"
+ "$ref": "#/components/schemas/RepoMappingResponse"
}
}
}
@@ -6587,16 +8256,14 @@
}
}
}
- }
- },
- "/api/v1/datasets/{dataset_id}/knowledge-comments/{comment_id}": {
- "patch": {
+ },
+ "get": {
"tags": [
- "knowledge-comments"
+ "dataset-repo-mappings"
],
- "summary": "Update Knowledge Comment",
- "description": "Update a knowledge comment.",
- "operationId": "update_knowledge_comment_api_v1_datasets__dataset_id__knowledge_comments__comment_id__patch",
+ "summary": "List Repo Mappings",
+ "description": "List dataset-to-repository mappings with optional filters.",
+ "operationId": "list_repo_mappings_api_v1_dataset_repo_mappings_get",
"security": [
{
"APIKeyHeader": []
@@ -6607,99 +8274,71 @@
],
"parameters": [
{
- "name": "dataset_id",
- "in": "path",
- "required": true,
+ "name": "source",
+ "in": "query",
+ "required": false,
"schema": {
- "type": "string",
- "format": "uuid",
- "title": "Dataset Id"
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Source"
}
},
{
- "name": "comment_id",
- "in": "path",
- "required": true,
+ "name": "confirmed",
+ "in": "query",
+ "required": false,
"schema": {
- "type": "string",
- "format": "uuid",
- "title": "Comment Id"
- }
- }
- ],
- "requestBody": {
- "required": true,
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/KnowledgeCommentUpdate"
- }
- }
- }
- },
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/KnowledgeCommentResponse"
- }
- }
- }
- },
- "422": {
- "description": "Validation Error",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/HTTPValidationError"
+ "anyOf": [
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "null"
}
- }
+ ],
+ "title": "Confirmed"
}
- }
- }
- },
- "delete": {
- "tags": [
- "knowledge-comments"
- ],
- "summary": "Delete Knowledge Comment",
- "description": "Delete a knowledge comment.",
- "operationId": "delete_knowledge_comment_api_v1_datasets__dataset_id__knowledge_comments__comment_id__delete",
- "security": [
- {
- "APIKeyHeader": []
},
{
- "HTTPBearer": []
- }
- ],
- "parameters": [
- {
- "name": "dataset_id",
- "in": "path",
- "required": true,
+ "name": "limit",
+ "in": "query",
+ "required": false,
"schema": {
- "type": "string",
- "format": "uuid",
- "title": "Dataset Id"
+ "type": "integer",
+ "maximum": 200,
+ "minimum": 1,
+ "default": 50,
+ "title": "Limit"
}
},
{
- "name": "comment_id",
- "in": "path",
- "required": true,
+ "name": "offset",
+ "in": "query",
+ "required": false,
"schema": {
- "type": "string",
- "format": "uuid",
- "title": "Comment Id"
+ "type": "integer",
+ "minimum": 0,
+ "default": 0,
+ "title": "Offset"
}
}
],
"responses": {
- "204": {
- "description": "Successful Response"
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RepoMappingListResponse"
+ }
+ }
+ }
},
"422": {
"description": "Validation Error",
@@ -6714,14 +8353,14 @@
}
}
},
- "/api/v1/comments/{comment_type}/{comment_id}/vote": {
- "post": {
+ "/api/v1/dataset-repo-mappings/{mapping_id}": {
+ "put": {
"tags": [
- "comment-votes"
+ "dataset-repo-mappings"
],
- "summary": "Vote On Comment",
- "description": "Vote on a comment.",
- "operationId": "vote_on_comment_api_v1_comments__comment_type___comment_id__vote_post",
+ "summary": "Update Repo Mapping",
+ "description": "Update a dataset-to-repository mapping.",
+ "operationId": "update_repo_mapping_api_v1_dataset_repo_mappings__mapping_id__put",
"security": [
{
"APIKeyHeader": []
@@ -6732,26 +8371,13 @@
],
"parameters": [
{
- "name": "comment_type",
- "in": "path",
- "required": true,
- "schema": {
- "enum": [
- "schema",
- "knowledge"
- ],
- "type": "string",
- "title": "Comment Type"
- }
- },
- {
- "name": "comment_id",
+ "name": "mapping_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Comment Id"
+ "title": "Mapping Id"
}
}
],
@@ -6760,14 +8386,21 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/VoteCreate"
+ "$ref": "#/components/schemas/UpdateRepoMappingRequest"
}
}
}
},
"responses": {
- "204": {
- "description": "Successful Response"
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RepoMappingResponse"
+ }
+ }
+ }
},
"422": {
"description": "Validation Error",
@@ -6783,11 +8416,11 @@
},
"delete": {
"tags": [
- "comment-votes"
+ "dataset-repo-mappings"
],
- "summary": "Remove Vote",
- "description": "Remove vote from a comment.",
- "operationId": "remove_vote_api_v1_comments__comment_type___comment_id__vote_delete",
+ "summary": "Delete Repo Mapping",
+ "description": "Delete a dataset-to-repository mapping.",
+ "operationId": "delete_repo_mapping_api_v1_dataset_repo_mappings__mapping_id__delete",
"security": [
{
"APIKeyHeader": []
@@ -6798,75 +8431,15 @@
],
"parameters": [
{
- "name": "comment_type",
- "in": "path",
- "required": true,
- "schema": {
- "enum": [
- "schema",
- "knowledge"
- ],
- "type": "string",
- "title": "Comment Type"
- }
- },
- {
- "name": "comment_id",
+ "name": "mapping_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Comment Id"
- }
- }
- ],
- "responses": {
- "204": {
- "description": "Successful Response"
- },
- "422": {
- "description": "Validation Error",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/HTTPValidationError"
- }
- }
+ "title": "Mapping Id"
}
}
- }
- }
- },
- "/api/v1/sla-policies": {
- "get": {
- "tags": [
- "sla-policies"
- ],
- "summary": "List Sla Policies",
- "description": "List SLA policies for the tenant.",
- "operationId": "list_sla_policies_api_v1_sla_policies_get",
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
- ],
- "parameters": [
- {
- "name": "include_default",
- "in": "query",
- "required": false,
- "schema": {
- "type": "boolean",
- "description": "Include default policy",
- "default": true,
- "title": "Include Default"
- },
- "description": "Include default policy"
- }
],
"responses": {
"200": {
@@ -6874,7 +8447,11 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SLAPolicyListResponse"
+ "type": "object",
+ "additionalProperties": {
+ "type": "boolean"
+ },
+ "title": "Response Delete Repo Mapping Api V1 Dataset Repo Mappings Mapping Id Delete"
}
}
}
@@ -6890,39 +8467,33 @@
}
}
}
- },
+ }
+ },
+ "/api/v1/dataset-repo-mappings/bulk": {
"post": {
"tags": [
- "sla-policies"
- ],
- "summary": "Create Sla Policy",
- "description": "Create a new SLA policy.\n\nRequires admin scope. If is_default is true, clears any existing default.",
- "operationId": "create_sla_policy_api_v1_sla_policies_post",
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
+ "dataset-repo-mappings"
],
+ "summary": "Bulk Import Repo Mappings",
+ "description": "Bulk import dataset-to-repository mappings.",
+ "operationId": "bulk_import_repo_mappings_api_v1_dataset_repo_mappings_bulk_post",
"requestBody": {
- "required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SLAPolicyCreate"
+ "$ref": "#/components/schemas/BulkImportRequest"
}
}
- }
+ },
+ "required": true
},
"responses": {
- "201": {
+ "200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SLAPolicyResponse"
+ "$ref": "#/components/schemas/BulkImportResponse"
}
}
}
@@ -6937,36 +8508,6 @@
}
}
}
- }
- }
- },
- "/api/v1/sla-policies/default": {
- "get": {
- "tags": [
- "sla-policies"
- ],
- "summary": "Get Default Sla Policy",
- "description": "Get the default SLA policy for the tenant.\n\nReturns None if no default policy is configured.",
- "operationId": "get_default_sla_policy_api_v1_sla_policies_default_get",
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "anyOf": [
- {
- "$ref": "#/components/schemas/SLAPolicyResponse"
- },
- {
- "type": "null"
- }
- ],
- "title": "Response Get Default Sla Policy Api V1 Sla Policies Default Get"
- }
- }
- }
- }
},
"security": [
{
@@ -6978,14 +8519,14 @@
]
}
},
- "/api/v1/sla-policies/{policy_id}": {
- "get": {
+ "/api/v1/dataset-repo-mappings/import-dbt-manifest": {
+ "post": {
"tags": [
- "sla-policies"
+ "dataset-repo-mappings"
],
- "summary": "Get Sla Policy",
- "description": "Get an SLA policy by ID.",
- "operationId": "get_sla_policy_api_v1_sla_policies__policy_id__get",
+ "summary": "Import Dbt Manifest",
+ "description": "Import dataset-to-repository mappings from a dbt manifest.json.",
+ "operationId": "import_dbt_manifest_api_v1_dataset_repo_mappings_import_dbt_manifest_post",
"security": [
{
"APIKeyHeader": []
@@ -6996,72 +8537,48 @@
],
"parameters": [
{
- "name": "policy_id",
- "in": "path",
+ "name": "repo_owner",
+ "in": "query",
"required": true,
"schema": {
"type": "string",
- "format": "uuid",
- "title": "Policy Id"
- }
- }
- ],
- "responses": {
- "200": {
- "description": "Successful Response",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/SLAPolicyResponse"
- }
- }
- }
- },
- "422": {
- "description": "Validation Error",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/HTTPValidationError"
- }
- }
- }
- }
- }
- },
- "patch": {
- "tags": [
- "sla-policies"
- ],
- "summary": "Update Sla Policy",
- "description": "Update an SLA policy.\n\nRequires admin scope. If is_default is set to true, clears any existing default.",
- "operationId": "update_sla_policy_api_v1_sla_policies__policy_id__patch",
- "security": [
- {
- "APIKeyHeader": []
+ "minLength": 1,
+ "title": "Repo Owner"
+ }
},
{
- "HTTPBearer": []
- }
- ],
- "parameters": [
- {
- "name": "policy_id",
- "in": "path",
+ "name": "repo_name",
+ "in": "query",
"required": true,
"schema": {
"type": "string",
- "format": "uuid",
- "title": "Policy Id"
+ "minLength": 1,
+ "title": "Repo Name"
+ }
+ },
+ {
+ "name": "branch",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Branch"
}
}
],
"requestBody": {
"required": true,
"content": {
- "application/json": {
+ "multipart/form-data": {
"schema": {
- "$ref": "#/components/schemas/SLAPolicyUpdate"
+ "$ref": "#/components/schemas/Body_import_dbt_manifest_api_v1_dataset_repo_mappings_import_dbt_manifest_post"
}
}
}
@@ -7072,7 +8589,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/SLAPolicyResponse"
+ "$ref": "#/components/schemas/DbtManifestImportResponse"
}
}
}
@@ -7088,14 +8605,16 @@
}
}
}
- },
- "delete": {
+ }
+ },
+ "/api/v1/dataset-repo-mappings/suggestions": {
+ "get": {
"tags": [
- "sla-policies"
+ "dataset-repo-mappings"
],
- "summary": "Delete Sla Policy",
- "description": "Delete an SLA policy.\n\nRequires admin scope. Issues using this policy will have sla_policy_id set to NULL.",
- "operationId": "delete_sla_policy_api_v1_sla_policies__policy_id__delete",
+ "summary": "List Suggestions",
+ "description": "List unconfirmed (suggested) dataset-to-repository mappings.",
+ "operationId": "list_suggestions_api_v1_dataset_repo_mappings_suggestions_get",
"security": [
{
"APIKeyHeader": []
@@ -7106,19 +8625,39 @@
],
"parameters": [
{
- "name": "policy_id",
- "in": "path",
- "required": true,
+ "name": "limit",
+ "in": "query",
+ "required": false,
"schema": {
- "type": "string",
- "format": "uuid",
- "title": "Policy Id"
+ "type": "integer",
+ "maximum": 200,
+ "minimum": 1,
+ "default": 50,
+ "title": "Limit"
+ }
+ },
+ {
+ "name": "offset",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "minimum": 0,
+ "default": 0,
+ "title": "Offset"
}
}
],
"responses": {
- "204": {
- "description": "Successful Response"
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RepoMappingListResponse"
+ }
+ }
+ }
},
"422": {
"description": "Validation Error",
@@ -7133,14 +8672,14 @@
}
}
},
- "/api/v1/integrations/webhook-generic": {
+ "/api/v1/dataset-repo-mappings/{mapping_id}/confirm": {
"post": {
"tags": [
- "integrations"
+ "dataset-repo-mappings"
],
- "summary": "Receive Generic Webhook",
- "description": "Receive a generic webhook to create an issue.\n\nThis endpoint allows external systems to create issues via HTTP webhook.\nRequests must be signed with HMAC-SHA256 using the shared secret.\n\nOptional JSON Schema validation can be configured via:\n- WEBHOOK_JSON_SCHEMA environment variable (inline JSON schema)\n- X-JSON-Schema header (base64-encoded JSON schema, overrides env config)\n\nIdempotency: If source_provider and source_external_id are provided,\nduplicate webhooks will return the existing issue instead of creating\na new one.",
- "operationId": "receive_generic_webhook_api_v1_integrations_webhook_generic_post",
+ "summary": "Confirm Suggestion",
+ "description": "Confirm a suggested mapping, promoting it to explicit.",
+ "operationId": "confirm_suggestion_api_v1_dataset_repo_mappings__mapping_id__confirm_post",
"security": [
{
"APIKeyHeader": []
@@ -7151,47 +8690,23 @@
],
"parameters": [
{
- "name": "x-webhook-signature",
- "in": "header",
- "required": false,
+ "name": "mapping_id",
+ "in": "path",
+ "required": true,
"schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "title": "X-Webhook-Signature"
+ "type": "string",
+ "format": "uuid",
+ "title": "Mapping Id"
}
- },
- {
- "name": "x-json-schema",
- "in": "header",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "Base64-encoded JSON Schema",
- "title": "X-Json-Schema"
- },
- "description": "Base64-encoded JSON Schema"
}
],
"responses": {
- "201": {
+ "200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/WebhookIssueResponse"
+ "$ref": "#/components/schemas/RepoMappingResponse"
}
}
}
@@ -7209,14 +8724,14 @@
}
}
},
- "/api/v1/dataset-repo-mappings": {
+ "/api/v1/dataset-repo-mappings/{mapping_id}/dismiss": {
"post": {
"tags": [
"dataset-repo-mappings"
],
- "summary": "Create Repo Mapping",
- "description": "Create a dataset-to-repository mapping.",
- "operationId": "create_repo_mapping_api_v1_dataset_repo_mappings_post",
+ "summary": "Dismiss Suggestion",
+ "description": "Dismiss (delete) a suggested mapping.",
+ "operationId": "dismiss_suggestion_api_v1_dataset_repo_mappings__mapping_id__dismiss_post",
"security": [
{
"APIKeyHeader": []
@@ -7225,23 +8740,29 @@
"HTTPBearer": []
}
],
- "requestBody": {
- "required": true,
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/CreateRepoMappingRequest"
- }
+ "parameters": [
+ {
+ "name": "mapping_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Mapping Id"
}
}
- },
+ ],
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/RepoMappingResponse"
+ "type": "object",
+ "additionalProperties": {
+ "type": "boolean"
+ },
+ "title": "Response Dismiss Suggestion Api V1 Dataset Repo Mappings Mapping Id Dismiss Post"
}
}
}
@@ -7257,14 +8778,16 @@
}
}
}
- },
+ }
+ },
+ "/api/v1/datasets/{dataset_id}/repo": {
"get": {
"tags": [
- "dataset-repo-mappings"
+ "datasets"
],
- "summary": "List Repo Mappings",
- "description": "List dataset-to-repository mappings with optional filters.",
- "operationId": "list_repo_mappings_api_v1_dataset_repo_mappings_get",
+ "summary": "Resolve Dataset Repo",
+ "description": "Resolve the best repository mapping for a dataset.\n\nReturns 200 with primary=null when no mapping found (never 404).\nThis enables graceful degradation in investigations.",
+ "operationId": "resolve_dataset_repo_api_v1_datasets__dataset_id__repo_get",
"security": [
{
"APIKeyHeader": []
@@ -7275,58 +8798,22 @@
],
"parameters": [
{
- "name": "source",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "title": "Source"
- }
- },
- {
- "name": "confirmed",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "boolean"
- },
- {
- "type": "null"
- }
- ],
- "title": "Confirmed"
- }
- },
- {
- "name": "limit",
- "in": "query",
- "required": false,
+ "name": "dataset_id",
+ "in": "path",
+ "required": true,
"schema": {
- "type": "integer",
- "maximum": 200,
- "minimum": 1,
- "default": 50,
- "title": "Limit"
+ "type": "string",
+ "title": "Dataset Id"
}
},
{
- "name": "offset",
+ "name": "include_all",
"in": "query",
"required": false,
"schema": {
- "type": "integer",
- "minimum": 0,
- "default": 0,
- "title": "Offset"
+ "type": "boolean",
+ "default": false,
+ "title": "Include All"
}
}
],
@@ -7336,7 +8823,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/RepoMappingListResponse"
+ "$ref": "#/components/schemas/DatasetRepoResponse"
}
}
}
@@ -7354,14 +8841,14 @@
}
}
},
- "/api/v1/dataset-repo-mappings/{mapping_id}": {
- "put": {
+ "/api/v1/git/repos": {
+ "post": {
"tags": [
- "dataset-repo-mappings"
+ "git"
],
- "summary": "Update Repo Mapping",
- "description": "Update a dataset-to-repository mapping.",
- "operationId": "update_repo_mapping_api_v1_dataset_repo_mappings__mapping_id__put",
+ "summary": "Connect Git Repo",
+ "description": "Connect a new git repository for pipeline change tracking.",
+ "operationId": "connect_git_repo_api_v1_git_repos_post",
"security": [
{
"APIKeyHeader": []
@@ -7370,24 +8857,12 @@
"HTTPBearer": []
}
],
- "parameters": [
- {
- "name": "mapping_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string",
- "format": "uuid",
- "title": "Mapping Id"
- }
- }
- ],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/UpdateRepoMappingRequest"
+ "$ref": "#/components/schemas/ConnectGitRepoRequest"
}
}
}
@@ -7398,7 +8873,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/RepoMappingResponse"
+ "$ref": "#/components/schemas/GitRepoResponse"
}
}
}
@@ -7415,13 +8890,13 @@
}
}
},
- "delete": {
+ "get": {
"tags": [
- "dataset-repo-mappings"
+ "git"
],
- "summary": "Delete Repo Mapping",
- "description": "Delete a dataset-to-repository mapping.",
- "operationId": "delete_repo_mapping_api_v1_dataset_repo_mappings__mapping_id__delete",
+ "summary": "List Git Repos",
+ "description": "List connected git repositories.",
+ "operationId": "list_git_repos_api_v1_git_repos_get",
"security": [
{
"APIKeyHeader": []
@@ -7432,13 +8907,43 @@
],
"parameters": [
{
- "name": "mapping_id",
- "in": "path",
- "required": true,
+ "name": "provider",
+ "in": "query",
+ "required": false,
"schema": {
- "type": "string",
- "format": "uuid",
- "title": "Mapping Id"
+ "anyOf": [
+ {
+ "type": "string",
+ "pattern": "^(github|gitlab|bitbucket)$"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Provider"
+ }
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 200,
+ "minimum": 1,
+ "default": 50,
+ "title": "Limit"
+ }
+ },
+ {
+ "name": "offset",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "minimum": 0,
+ "default": 0,
+ "title": "Offset"
}
}
],
@@ -7448,11 +8953,7 @@
"content": {
"application/json": {
"schema": {
- "type": "object",
- "additionalProperties": {
- "type": "boolean"
- },
- "title": "Response Delete Repo Mapping Api V1 Dataset Repo Mappings Mapping Id Delete"
+ "$ref": "#/components/schemas/GitRepoListResponse"
}
}
}
@@ -7470,31 +8971,41 @@
}
}
},
- "/api/v1/dataset-repo-mappings/bulk": {
- "post": {
+ "/api/v1/git/repos/{repo_id}": {
+ "get": {
"tags": [
- "dataset-repo-mappings"
+ "git"
],
- "summary": "Bulk Import Repo Mappings",
- "description": "Bulk import dataset-to-repository mappings.",
- "operationId": "bulk_import_repo_mappings_api_v1_dataset_repo_mappings_bulk_post",
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/BulkImportRequest"
- }
- }
+ "summary": "Get Git Repo",
+ "description": "Get a single git repository by ID.",
+ "operationId": "get_git_repo_api_v1_git_repos__repo_id__get",
+ "security": [
+ {
+ "APIKeyHeader": []
},
- "required": true
- },
+ {
+ "HTTPBearer": []
+ }
+ ],
+ "parameters": [
+ {
+ "name": "repo_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Repo Id"
+ }
+ }
+ ],
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/BulkImportResponse"
+ "$ref": "#/components/schemas/GitRepoResponse"
}
}
}
@@ -7509,25 +9020,15 @@
}
}
}
- },
- "security": [
- {
- "APIKeyHeader": []
- },
- {
- "HTTPBearer": []
- }
- ]
- }
- },
- "/api/v1/dataset-repo-mappings/import-dbt-manifest": {
- "post": {
+ }
+ },
+ "put": {
"tags": [
- "dataset-repo-mappings"
+ "git"
],
- "summary": "Import Dbt Manifest",
- "description": "Import dataset-to-repository mappings from a dbt manifest.json.",
- "operationId": "import_dbt_manifest_api_v1_dataset_repo_mappings_import_dbt_manifest_post",
+ "summary": "Update Git Repo",
+ "description": "Update a git repository's settings.",
+ "operationId": "update_git_repo_api_v1_git_repos__repo_id__put",
"security": [
{
"APIKeyHeader": []
@@ -7538,48 +9039,22 @@
],
"parameters": [
{
- "name": "repo_owner",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string",
- "minLength": 1,
- "title": "Repo Owner"
- }
- },
- {
- "name": "repo_name",
- "in": "query",
+ "name": "repo_id",
+ "in": "path",
"required": true,
"schema": {
"type": "string",
- "minLength": 1,
- "title": "Repo Name"
- }
- },
- {
- "name": "branch",
- "in": "query",
- "required": false,
- "schema": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "title": "Branch"
+ "format": "uuid",
+ "title": "Repo Id"
}
}
],
"requestBody": {
"required": true,
"content": {
- "multipart/form-data": {
+ "application/json": {
"schema": {
- "$ref": "#/components/schemas/Body_import_dbt_manifest_api_v1_dataset_repo_mappings_import_dbt_manifest_post"
+ "$ref": "#/components/schemas/UpdateGitRepoRequest"
}
}
}
@@ -7590,7 +9065,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DbtManifestImportResponse"
+ "$ref": "#/components/schemas/GitRepoResponse"
}
}
}
@@ -7606,16 +9081,14 @@
}
}
}
- }
- },
- "/api/v1/dataset-repo-mappings/suggestions": {
- "get": {
+ },
+ "delete": {
"tags": [
- "dataset-repo-mappings"
+ "git"
],
- "summary": "List Suggestions",
- "description": "List unconfirmed (suggested) dataset-to-repository mappings.",
- "operationId": "list_suggestions_api_v1_dataset_repo_mappings_suggestions_get",
+ "summary": "Delete Git Repo",
+ "description": "Disconnect a git repository (cascades to code_changes).",
+ "operationId": "delete_git_repo_api_v1_git_repos__repo_id__delete",
"security": [
{
"APIKeyHeader": []
@@ -7626,26 +9099,13 @@
],
"parameters": [
{
- "name": "limit",
- "in": "query",
- "required": false,
- "schema": {
- "type": "integer",
- "maximum": 200,
- "minimum": 1,
- "default": 50,
- "title": "Limit"
- }
- },
- {
- "name": "offset",
- "in": "query",
- "required": false,
+ "name": "repo_id",
+ "in": "path",
+ "required": true,
"schema": {
- "type": "integer",
- "minimum": 0,
- "default": 0,
- "title": "Offset"
+ "type": "string",
+ "format": "uuid",
+ "title": "Repo Id"
}
}
],
@@ -7655,7 +9115,11 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/RepoMappingListResponse"
+ "type": "object",
+ "additionalProperties": {
+ "type": "boolean"
+ },
+ "title": "Response Delete Git Repo Api V1 Git Repos Repo Id Delete"
}
}
}
@@ -7673,14 +9137,14 @@
}
}
},
- "/api/v1/dataset-repo-mappings/{mapping_id}/confirm": {
+ "/api/v1/git/repos/{repo_id}/sync": {
"post": {
"tags": [
- "dataset-repo-mappings"
+ "git"
],
- "summary": "Confirm Suggestion",
- "description": "Confirm a suggested mapping, promoting it to explicit.",
- "operationId": "confirm_suggestion_api_v1_dataset_repo_mappings__mapping_id__confirm_post",
+ "summary": "Trigger Sync",
+ "description": "Trigger an immediate sync for a repository.\n\nReturns 202 Accepted; sync runs in the background.",
+ "operationId": "trigger_sync_api_v1_git_repos__repo_id__sync_post",
"security": [
{
"APIKeyHeader": []
@@ -7691,23 +9155,23 @@
],
"parameters": [
{
- "name": "mapping_id",
+ "name": "repo_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Mapping Id"
+ "title": "Repo Id"
}
}
],
"responses": {
- "200": {
+ "202": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/RepoMappingResponse"
+ "$ref": "#/components/schemas/SyncTriggerResponse"
}
}
}
@@ -7725,14 +9189,14 @@
}
}
},
- "/api/v1/dataset-repo-mappings/{mapping_id}/dismiss": {
- "post": {
+ "/api/v1/git/repos/{repo_id}/changes": {
+ "get": {
"tags": [
- "dataset-repo-mappings"
+ "git"
],
- "summary": "Dismiss Suggestion",
- "description": "Dismiss (delete) a suggested mapping.",
- "operationId": "dismiss_suggestion_api_v1_dataset_repo_mappings__mapping_id__dismiss_post",
+ "summary": "List Repo Changes",
+ "description": "List code changes for a repository with optional time range filter.",
+ "operationId": "list_repo_changes_api_v1_git_repos__repo_id__changes_get",
"security": [
{
"APIKeyHeader": []
@@ -7743,13 +9207,70 @@
],
"parameters": [
{
- "name": "mapping_id",
+ "name": "repo_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid",
- "title": "Mapping Id"
+ "title": "Repo Id"
+ }
+ },
+ {
+ "name": "since",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string",
+ "format": "date-time"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Since"
+ }
+ },
+ {
+ "name": "until",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string",
+ "format": "date-time"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Until"
+ }
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 500,
+ "minimum": 1,
+ "default": 100,
+ "title": "Limit"
+ }
+ },
+ {
+ "name": "offset",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "minimum": 0,
+ "default": 0,
+ "title": "Offset"
}
}
],
@@ -7759,11 +9280,7 @@
"content": {
"application/json": {
"schema": {
- "type": "object",
- "additionalProperties": {
- "type": "boolean"
- },
- "title": "Response Dismiss Suggestion Api V1 Dataset Repo Mappings Mapping Id Dismiss Post"
+ "$ref": "#/components/schemas/CodeChangeListResponse"
}
}
}
@@ -7781,14 +9298,14 @@
}
}
},
- "/api/v1/datasets/{dataset_id}/repo": {
+ "/api/v1/git/changes/by-asset": {
"get": {
"tags": [
- "datasets"
+ "git"
],
- "summary": "Resolve Dataset Repo",
- "description": "Resolve the best repository mapping for a dataset.\n\nReturns 200 with primary=null when no mapping found (never 404).\nThis enables graceful degradation in investigations.",
- "operationId": "resolve_dataset_repo_api_v1_datasets__dataset_id__repo_get",
+ "summary": "Find Changes By Asset",
+ "description": "Find code changes affecting a given asset across all tenant repos.\n\nThis endpoint is called by the investigation agent to correlate\ndata anomalies with code changes.",
+ "operationId": "find_changes_by_asset_api_v1_git_changes_by_asset_get",
"security": [
{
"APIKeyHeader": []
@@ -7799,22 +9316,58 @@
],
"parameters": [
{
- "name": "dataset_id",
- "in": "path",
+ "name": "asset_name",
+ "in": "query",
"required": true,
"schema": {
"type": "string",
- "title": "Dataset Id"
+ "title": "Asset Name"
}
},
{
- "name": "include_all",
+ "name": "since",
"in": "query",
"required": false,
"schema": {
- "type": "boolean",
- "default": false,
- "title": "Include All"
+ "anyOf": [
+ {
+ "type": "string",
+ "format": "date-time"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Since"
+ }
+ },
+ {
+ "name": "until",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "anyOf": [
+ {
+ "type": "string",
+ "format": "date-time"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Until"
+ }
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer",
+ "maximum": 200,
+ "minimum": 1,
+ "default": 50,
+ "title": "Limit"
}
}
],
@@ -7824,7 +9377,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DatasetRepoResponse"
+ "$ref": "#/components/schemas/CodeChangeListResponse"
}
}
}
@@ -9815,6 +11368,20 @@
],
"title": "Body_import_dbt_manifest_api_v1_dataset_repo_mappings_import_dbt_manifest_post"
},
+ "Body_import_snapshot_archive_api_v1_investigations_import_post": {
+ "properties": {
+ "file": {
+ "type": "string",
+ "format": "binary",
+ "title": "File"
+ }
+ },
+ "type": "object",
+ "required": [
+ "file"
+ ],
+ "title": "Body_import_snapshot_archive_api_v1_investigations_import_post"
+ },
"BranchStateResponse": {
"properties": {
"branch_id": {
@@ -10103,6 +11670,226 @@
"title": "ChainVerificationResponse",
"description": "Response from evidence chain verification."
},
+ "CodeChangeListResponse": {
+ "properties": {
+ "items": {
+ "items": {
+ "$ref": "#/components/schemas/CodeChangeResponse"
+ },
+ "type": "array",
+ "title": "Items"
+ },
+ "total": {
+ "type": "integer",
+ "title": "Total"
+ }
+ },
+ "type": "object",
+ "required": [
+ "items",
+ "total"
+ ],
+ "title": "CodeChangeListResponse",
+ "description": "Response for a list of code changes."
+ },
+ "CodeChangeResponse": {
+ "properties": {
+ "id": {
+ "type": "string",
+ "title": "Id"
+ },
+ "repo_id": {
+ "type": "string",
+ "title": "Repo Id"
+ },
+ "commit_hash": {
+ "type": "string",
+ "title": "Commit Hash"
+ },
+ "author_name": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Author Name"
+ },
+ "author_email": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Author Email"
+ },
+ "message": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Message"
+ },
+ "committed_at": {
+ "anyOf": [
+ {
+ "type": "string",
+ "format": "date-time"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Committed At"
+ },
+ "affected_assets": {
+ "items": {
+ "additionalProperties": true,
+ "type": "object"
+ },
+ "type": "array",
+ "title": "Affected Assets"
+ },
+ "files_changed": {
+ "anyOf": [
+ {
+ "items": {
+ "type": "string"
+ },
+ "type": "array"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Files Changed"
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time",
+ "title": "Created At"
+ }
+ },
+ "type": "object",
+ "required": [
+ "id",
+ "repo_id",
+ "commit_hash",
+ "author_name",
+ "author_email",
+ "message",
+ "committed_at",
+ "affected_assets",
+ "files_changed",
+ "created_at"
+ ],
+ "title": "CodeChangeResponse",
+ "description": "Response for a code change (commit)."
+ },
+ "CodifyFormat": {
+ "type": "string",
+ "enum": [
+ "gx",
+ "dbt",
+ "soda",
+ "sql"
+ ],
+ "title": "CodifyFormat",
+ "description": "Output format for test generation."
+ },
+ "CodifyRequest": {
+ "properties": {
+ "format": {
+ "$ref": "#/components/schemas/CodifyFormat",
+ "default": "sql"
+ }
+ },
+ "type": "object",
+ "title": "CodifyRequest",
+ "description": "Request body for codifying an investigation."
+ },
+ "CodifyResponse": {
+ "properties": {
+ "investigation_id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Investigation Id"
+ },
+ "format": {
+ "type": "string",
+ "title": "Format"
+ },
+ "content": {
+ "type": "string",
+ "title": "Content"
+ },
+ "tests": {
+ "items": {
+ "$ref": "#/components/schemas/CodifyTestResponse"
+ },
+ "type": "array",
+ "title": "Tests"
+ },
+ "confidence": {
+ "type": "number",
+ "title": "Confidence"
+ }
+ },
+ "type": "object",
+ "required": [
+ "investigation_id",
+ "format",
+ "content",
+ "tests",
+ "confidence"
+ ],
+ "title": "CodifyResponse",
+ "description": "Response for codifying an investigation."
+ },
+ "CodifyTestResponse": {
+ "properties": {
+ "test_type": {
+ "type": "string",
+ "title": "Test Type"
+ },
+ "column": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Column"
+ },
+ "table": {
+ "type": "string",
+ "title": "Table"
+ },
+ "description": {
+ "type": "string",
+ "title": "Description"
+ }
+ },
+ "type": "object",
+ "required": [
+ "test_type",
+ "table",
+ "description"
+ ],
+ "title": "CodifyTestResponse",
+ "description": "A single test extracted from the investigation."
+ },
"ColumnLineageListResponse": {
"properties": {
"lineage": {
@@ -10128,17 +11915,63 @@
},
"target_column": {
"type": "string",
- "title": "Target Column"
+ "title": "Target Column"
+ },
+ "source_dataset": {
+ "type": "string",
+ "title": "Source Dataset"
+ },
+ "source_column": {
+ "type": "string",
+ "title": "Source Column"
+ },
+ "transformation": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Transformation"
+ },
+ "confidence": {
+ "type": "number",
+ "title": "Confidence",
+ "default": 1.0
+ }
+ },
+ "type": "object",
+ "required": [
+ "target_dataset",
+ "target_column",
+ "source_dataset",
+ "source_column"
+ ],
+ "title": "ColumnLineageResponse",
+ "description": "Response for column lineage."
+ },
+ "ConnectGitRepoRequest": {
+ "properties": {
+ "name": {
+ "type": "string",
+ "maxLength": 200,
+ "minLength": 1,
+ "title": "Name"
},
- "source_dataset": {
+ "url": {
"type": "string",
- "title": "Source Dataset"
+ "maxLength": 500,
+ "minLength": 1,
+ "title": "Url"
},
- "source_column": {
+ "provider": {
"type": "string",
- "title": "Source Column"
+ "pattern": "^(github|gitlab|bitbucket)$",
+ "title": "Provider"
},
- "transformation": {
+ "access_token": {
"anyOf": [
{
"type": "string"
@@ -10147,23 +11980,36 @@
"type": "null"
}
],
- "title": "Transformation"
+ "title": "Access Token"
},
- "confidence": {
- "type": "number",
- "title": "Confidence",
- "default": 1.0
+ "tracked_paths": {
+ "anyOf": [
+ {
+ "items": {
+ "type": "string"
+ },
+ "type": "array"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Tracked Paths"
+ },
+ "default_branch": {
+ "type": "string",
+ "title": "Default Branch",
+ "default": "main"
}
},
"type": "object",
"required": [
- "target_dataset",
- "target_column",
- "source_dataset",
- "source_column"
+ "name",
+ "url",
+ "provider"
],
- "title": "ColumnLineageResponse",
- "description": "Response for column lineage."
+ "title": "ConnectGitRepoRequest",
+ "description": "Request to connect a git repository."
},
"ContextBundleResponse": {
"properties": {
@@ -10192,7 +12038,7 @@
"lineage": {
"anyOf": [
{
- "$ref": "#/components/schemas/dataing__entrypoints__api__routes__bundles__LineageGraphResponse"
+ "$ref": "#/components/schemas/LineageGraphResponse"
},
{
"type": "null"
@@ -10435,6 +12281,70 @@
"title": "CreateRepoMappingRequest",
"description": "Request to create a dataset-to-repository mapping."
},
+ "CreateSessionRequest": {
+ "properties": {
+ "parent_investigation_id": {
+ "anyOf": [
+ {
+ "type": "string",
+ "format": "uuid"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Parent Investigation Id",
+ "description": "Optional parent investigation to link to"
+ },
+ "title": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Title",
+ "description": "Optional session title"
+ },
+ "metadata": {
+ "additionalProperties": true,
+ "type": "object",
+ "title": "Metadata"
+ }
+ },
+ "type": "object",
+ "title": "CreateSessionRequest",
+ "description": "Request to create a new assistant session."
+ },
+ "CreateSessionResponse": {
+ "properties": {
+ "session_id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Session Id"
+ },
+ "investigation_id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Investigation Id"
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time",
+ "title": "Created At"
+ }
+ },
+ "type": "object",
+ "required": [
+ "session_id",
+ "investigation_id",
+ "created_at"
+ ],
+ "title": "CreateSessionResponse",
+ "description": "Response from creating a session."
+ },
"CreateUserRequest": {
"properties": {
"email": {
@@ -11325,6 +13235,15 @@
"title": "ExplainResponse",
"description": "Response for explanation."
},
+ "ExportFormat": {
+ "type": "string",
+ "enum": [
+ "json",
+ "markdown"
+ ],
+ "title": "ExportFormat",
+ "description": "Export format options."
+ },
"FeedbackCreate": {
"properties": {
"target_type": {
@@ -11335,7 +13254,8 @@
"evidence",
"synthesis",
"investigation",
- "recommendation"
+ "recommendation",
+ "assistant_message"
],
"title": "Target Type"
},
@@ -11344,8 +13264,15 @@
"title": "Target Id"
},
"investigation_id": {
- "type": "string",
- "format": "uuid",
+ "anyOf": [
+ {
+ "type": "string",
+ "format": "uuid"
+ },
+ {
+ "type": "null"
+ }
+ ],
"title": "Investigation Id"
},
"rating": {
@@ -11383,7 +13310,6 @@
"required": [
"target_type",
"target_id",
- "investigation_id",
"rating"
],
"title": "FeedbackCreate",
@@ -11470,6 +13396,126 @@
"title": "FeedbackResponse",
"description": "Response after submitting feedback."
},
+ "GitRepoListResponse": {
+ "properties": {
+ "items": {
+ "items": {
+ "$ref": "#/components/schemas/GitRepoResponse"
+ },
+ "type": "array",
+ "title": "Items"
+ },
+ "total": {
+ "type": "integer",
+ "title": "Total"
+ }
+ },
+ "type": "object",
+ "required": [
+ "items",
+ "total"
+ ],
+ "title": "GitRepoListResponse",
+ "description": "Response for a list of git repositories."
+ },
+ "GitRepoResponse": {
+ "properties": {
+ "id": {
+ "type": "string",
+ "title": "Id"
+ },
+ "name": {
+ "type": "string",
+ "title": "Name"
+ },
+ "url": {
+ "type": "string",
+ "title": "Url"
+ },
+ "provider": {
+ "type": "string",
+ "title": "Provider"
+ },
+ "tracked_paths": {
+ "anyOf": [
+ {
+ "items": {
+ "type": "string"
+ },
+ "type": "array"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Tracked Paths"
+ },
+ "default_branch": {
+ "type": "string",
+ "title": "Default Branch"
+ },
+ "last_sync_at": {
+ "anyOf": [
+ {
+ "type": "string",
+ "format": "date-time"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Last Sync At"
+ },
+ "sync_status": {
+ "type": "string",
+ "title": "Sync Status"
+ },
+ "sync_error": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Sync Error"
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time",
+ "title": "Created At"
+ },
+ "updated_at": {
+ "anyOf": [
+ {
+ "type": "string",
+ "format": "date-time"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Updated At"
+ }
+ },
+ "type": "object",
+ "required": [
+ "id",
+ "name",
+ "url",
+ "provider",
+ "tracked_paths",
+ "default_branch",
+ "last_sync_at",
+ "sync_status",
+ "sync_error",
+ "created_at",
+ "updated_at"
+ ],
+ "title": "GitRepoResponse",
+ "description": "Response for a git repository."
+ },
"HTTPValidationError": {
"properties": {
"detail": {
@@ -11483,6 +13529,41 @@
"type": "object",
"title": "HTTPValidationError"
},
+ "ImportSnapshotResponse": {
+ "properties": {
+ "investigation_id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Investigation Id"
+ },
+ "status": {
+ "type": "string",
+ "title": "Status",
+ "default": "imported"
+ },
+ "original_investigation_id": {
+ "type": "string",
+ "title": "Original Investigation Id"
+ },
+ "evidence_count": {
+ "type": "integer",
+ "title": "Evidence Count"
+ },
+ "is_replay": {
+ "type": "boolean",
+ "title": "Is Replay",
+ "default": true
+ }
+ },
+ "type": "object",
+ "required": [
+ "investigation_id",
+ "original_investigation_id",
+ "evidence_count"
+ ],
+ "title": "ImportSnapshotResponse",
+ "description": "Response for importing a snapshot archive."
+ },
"InvestigationListItem": {
"properties": {
"investigation_id": {
@@ -12768,40 +14849,35 @@
"LineageGraphResponse": {
"properties": {
"root": {
- "type": "string",
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
"title": "Root"
},
"datasets": {
"additionalProperties": {
- "$ref": "#/components/schemas/DatasetResponse"
+ "additionalProperties": true,
+ "type": "object"
},
"type": "object",
"title": "Datasets"
},
"edges": {
"items": {
- "$ref": "#/components/schemas/LineageEdgeResponse"
+ "$ref": "#/components/schemas/LineageEdge"
},
"type": "array",
"title": "Edges"
- },
- "jobs": {
- "additionalProperties": {
- "$ref": "#/components/schemas/JobResponse"
- },
- "type": "object",
- "title": "Jobs"
- }
- },
- "type": "object",
- "required": [
- "root",
- "datasets",
- "edges",
- "jobs"
- ],
+ }
+ },
+ "type": "object",
"title": "LineageGraphResponse",
- "description": "Response for a lineage graph."
+ "description": "Lineage graph response."
},
"LineageProviderResponse": {
"properties": {
@@ -12856,6 +14932,23 @@
"title": "LineageProvidersResponse",
"description": "Response for listing lineage providers."
},
+ "ListSessionsResponse": {
+ "properties": {
+ "sessions": {
+ "items": {
+ "$ref": "#/components/schemas/SessionSummary"
+ },
+ "type": "array",
+ "title": "Sessions"
+ }
+ },
+ "type": "object",
+ "required": [
+ "sessions"
+ ],
+ "title": "ListSessionsResponse",
+ "description": "Response from listing sessions."
+ },
"LoginRequest": {
"properties": {
"email": {
@@ -12943,6 +15036,73 @@
"title": "MatchedPatternResponse",
"description": "A pattern that was matched during investigation."
},
+ "MessageResponse": {
+ "properties": {
+ "id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Id"
+ },
+ "role": {
+ "$ref": "#/components/schemas/MessageRole"
+ },
+ "content": {
+ "type": "string",
+ "title": "Content"
+ },
+ "tool_calls": {
+ "anyOf": [
+ {
+ "items": {
+ "additionalProperties": true,
+ "type": "object"
+ },
+ "type": "array"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Tool Calls"
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time",
+ "title": "Created At"
+ },
+ "token_count": {
+ "anyOf": [
+ {
+ "type": "integer"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Token Count"
+ }
+ },
+ "type": "object",
+ "required": [
+ "id",
+ "role",
+ "content",
+ "created_at"
+ ],
+ "title": "MessageResponse",
+ "description": "A message in a session."
+ },
+ "MessageRole": {
+ "type": "string",
+ "enum": [
+ "user",
+ "assistant",
+ "system",
+ "tool"
+ ],
+ "title": "MessageRole",
+ "description": "Message role types."
+ },
"ModifyRequest": {
"properties": {
"comment": {
@@ -13152,6 +15312,112 @@
"title": "OrgMemberResponse",
"description": "Response for an org member."
},
+ "PageContext": {
+ "properties": {
+ "route": {
+ "type": "string",
+ "title": "Route"
+ },
+ "route_pattern": {
+ "type": "string",
+ "title": "Route Pattern"
+ },
+ "route_params": {
+ "additionalProperties": {
+ "type": "string"
+ },
+ "type": "object",
+ "title": "Route Params"
+ },
+ "page_type": {
+ "type": "string",
+ "title": "Page Type"
+ },
+ "page_title": {
+ "type": "string",
+ "title": "Page Title"
+ },
+ "page_data": {
+ "additionalProperties": true,
+ "type": "object",
+ "title": "Page Data"
+ },
+ "errors": {
+ "items": {
+ "$ref": "#/components/schemas/PageContextError"
+ },
+ "type": "array",
+ "title": "Errors"
+ }
+ },
+ "type": "object",
+ "required": [
+ "route",
+ "route_pattern",
+ "page_type",
+ "page_title"
+ ],
+ "title": "PageContext",
+ "description": "Context about the page the user is currently viewing."
+ },
+ "PageContextError": {
+ "properties": {
+ "type": {
+ "type": "string",
+ "title": "Type",
+ "description": "Error type: api, react, or console"
+ },
+ "message": {
+ "type": "string",
+ "title": "Message"
+ },
+ "status": {
+ "anyOf": [
+ {
+ "type": "integer"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Status"
+ },
+ "url": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Url"
+ },
+ "timestamp": {
+ "type": "integer",
+ "title": "Timestamp"
+ },
+ "stack_preview": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Stack Preview"
+ }
+ },
+ "type": "object",
+ "required": [
+ "type",
+ "message",
+ "timestamp"
+ ],
+ "title": "PageContextError",
+ "description": "A frontend error captured by the error bus."
+ },
"PasswordResetConfirm": {
"properties": {
"token": {
@@ -13470,6 +15736,64 @@
"title": "QueryResponse",
"description": "Response for query execution."
},
+ "RecentCatchResponse": {
+ "properties": {
+ "test_id": {
+ "type": "string",
+ "title": "Test Id"
+ },
+ "run_at": {
+ "type": "string",
+ "title": "Run At"
+ },
+ "failure_message": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Failure Message"
+ },
+ "test_type": {
+ "type": "string",
+ "title": "Test Type"
+ },
+ "table": {
+ "type": "string",
+ "title": "Table"
+ },
+ "column": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Column"
+ },
+ "investigation_id": {
+ "type": "string",
+ "title": "Investigation Id"
+ }
+ },
+ "type": "object",
+ "required": [
+ "test_id",
+ "run_at",
+ "failure_message",
+ "test_type",
+ "table",
+ "column",
+ "investigation_id"
+ ],
+ "title": "RecentCatchResponse",
+ "description": "A recent test failure catch."
+ },
"RecentInvestigation": {
"properties": {
"id": {
@@ -14427,21 +16751,140 @@
"properties": {
"status": {
"type": "string",
- "title": "Status"
+ "title": "Status"
+ },
+ "investigation_id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Investigation Id"
+ }
+ },
+ "type": "object",
+ "required": [
+ "status",
+ "investigation_id"
+ ],
+ "title": "SendMessageResponse",
+ "description": "Response for sending a message."
+ },
+ "SessionDetailResponse": {
+ "properties": {
+ "id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Id"
+ },
+ "investigation_id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Investigation Id"
+ },
+ "title": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Title"
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time",
+ "title": "Created At"
+ },
+ "last_activity": {
+ "type": "string",
+ "format": "date-time",
+ "title": "Last Activity"
+ },
+ "token_count": {
+ "type": "integer",
+ "title": "Token Count"
+ },
+ "messages": {
+ "items": {
+ "$ref": "#/components/schemas/MessageResponse"
+ },
+ "type": "array",
+ "title": "Messages"
+ },
+ "parent_investigation_id": {
+ "anyOf": [
+ {
+ "type": "string",
+ "format": "uuid"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Parent Investigation Id"
+ }
+ },
+ "type": "object",
+ "required": [
+ "id",
+ "investigation_id",
+ "title",
+ "created_at",
+ "last_activity",
+ "token_count",
+ "messages"
+ ],
+ "title": "SessionDetailResponse",
+ "description": "Full session details with messages."
+ },
+ "SessionSummary": {
+ "properties": {
+ "id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Id"
+ },
+ "title": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Title"
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time",
+ "title": "Created At"
},
- "investigation_id": {
+ "last_activity": {
"type": "string",
- "format": "uuid",
- "title": "Investigation Id"
+ "format": "date-time",
+ "title": "Last Activity"
+ },
+ "message_count": {
+ "type": "integer",
+ "title": "Message Count"
+ },
+ "token_count": {
+ "type": "integer",
+ "title": "Token Count"
}
},
"type": "object",
"required": [
- "status",
- "investigation_id"
+ "id",
+ "title",
+ "created_at",
+ "last_activity",
+ "message_count",
+ "token_count"
],
- "title": "SendMessageResponse",
- "description": "Response for sending a message."
+ "title": "SessionSummary",
+ "description": "Summary of a session for listing."
},
"SeverityOverride": {
"properties": {
@@ -14486,6 +16929,76 @@
"title": "SeverityOverride",
"description": "Override SLA times for a specific severity."
},
+ "SnapshotCheckpointParam": {
+ "type": "string",
+ "enum": [
+ "start",
+ "hypothesis_generated",
+ "evidence_collected",
+ "complete",
+ "failed"
+ ],
+ "title": "SnapshotCheckpointParam",
+ "description": "Valid checkpoint values for snapshot download."
+ },
+ "SnapshotListItem": {
+ "properties": {
+ "checkpoint": {
+ "type": "string",
+ "title": "Checkpoint"
+ },
+ "captured_at": {
+ "type": "string",
+ "title": "Captured At"
+ },
+ "storage_path": {
+ "type": "string",
+ "title": "Storage Path"
+ },
+ "size_bytes": {
+ "anyOf": [
+ {
+ "type": "integer"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Size Bytes"
+ }
+ },
+ "type": "object",
+ "required": [
+ "checkpoint",
+ "captured_at",
+ "storage_path"
+ ],
+ "title": "SnapshotListItem",
+ "description": "Snapshot metadata for listing."
+ },
+ "SnapshotListResponse": {
+ "properties": {
+ "investigation_id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Investigation Id"
+ },
+ "snapshots": {
+ "items": {
+ "$ref": "#/components/schemas/SnapshotListItem"
+ },
+ "type": "array",
+ "title": "Snapshots"
+ }
+ },
+ "type": "object",
+ "required": [
+ "investigation_id",
+ "snapshots"
+ ],
+ "title": "SnapshotListResponse",
+ "description": "Response for listing available snapshots."
+ },
"SourceTypeResponse": {
"properties": {
"type": {
@@ -14712,6 +17225,25 @@
"title": "SyncResponse",
"description": "Response for schema sync."
},
+ "SyncTriggerResponse": {
+ "properties": {
+ "message": {
+ "type": "string",
+ "title": "Message"
+ },
+ "sync_status": {
+ "type": "string",
+ "title": "Sync Status"
+ }
+ },
+ "type": "object",
+ "required": [
+ "message",
+ "sync_status"
+ ],
+ "title": "SyncTriggerResponse",
+ "description": "Response for sync trigger."
+ },
"TagCreate": {
"properties": {
"name": {
@@ -15457,6 +17989,32 @@
"title": "TemporalStatusResponse",
"description": "Status response for Temporal-based investigations."
},
+ "TestAdoptionRequest": {
+ "properties": {
+ "test_id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Test Id"
+ },
+ "adopted_by": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Adopted By"
+ }
+ },
+ "type": "object",
+ "required": [
+ "test_id"
+ ],
+ "title": "TestAdoptionRequest",
+ "description": "Request to mark a test as adopted."
+ },
"TestConnectionRequest": {
"properties": {
"type": {
@@ -15518,6 +18076,76 @@
"title": "TestConnectionResponse",
"description": "Response for testing a connection."
},
+ "TestRunResultRequest": {
+ "properties": {
+ "test_id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Test Id"
+ },
+ "passed": {
+ "type": "boolean",
+ "title": "Passed"
+ },
+ "failure_message": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Failure Message"
+ }
+ },
+ "type": "object",
+ "required": [
+ "test_id",
+ "passed"
+ ],
+ "title": "TestRunResultRequest",
+ "description": "Request to record a test run result."
+ },
+ "TestTrackingStatsResponse": {
+ "properties": {
+ "tests_generated": {
+ "type": "integer",
+ "title": "Tests Generated"
+ },
+ "tests_adopted": {
+ "type": "integer",
+ "title": "Tests Adopted"
+ },
+ "tests_run": {
+ "type": "integer",
+ "title": "Tests Run"
+ },
+ "issues_caught": {
+ "type": "integer",
+ "title": "Issues Caught"
+ },
+ "adoption_rate": {
+ "type": "number",
+ "title": "Adoption Rate"
+ },
+ "effectiveness_rate": {
+ "type": "number",
+ "title": "Effectiveness Rate"
+ }
+ },
+ "type": "object",
+ "required": [
+ "tests_generated",
+ "tests_adopted",
+ "tests_run",
+ "issues_caught",
+ "adoption_rate",
+ "effectiveness_rate"
+ ],
+ "title": "TestTrackingStatsResponse",
+ "description": "Test tracking statistics response."
+ },
"TokenResponse": {
"properties": {
"access_token": {
@@ -15597,6 +18225,51 @@
"title": "UnreadCountResponse",
"description": "Unread notification count response."
},
+ "UpdateGitRepoRequest": {
+ "properties": {
+ "name": {
+ "anyOf": [
+ {
+ "type": "string",
+ "maxLength": 200,
+ "minLength": 1
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Name"
+ },
+ "tracked_paths": {
+ "anyOf": [
+ {
+ "items": {
+ "type": "string"
+ },
+ "type": "array"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Tracked Paths"
+ },
+ "default_branch": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Default Branch"
+ }
+ },
+ "type": "object",
+ "title": "UpdateGitRepoRequest",
+ "description": "Request to update a git repository."
+ },
"UpdateRepoMappingRequest": {
"properties": {
"dataset_pattern": {
@@ -16136,38 +18809,51 @@
"title": "WeeklyUsageResponse",
"description": "Weekly usage statistics response."
},
- "dataing__entrypoints__api__routes__bundles__LineageGraphResponse": {
+ "dataing__entrypoints__api__routes__assistant__SendMessageRequest": {
"properties": {
- "root": {
+ "content": {
+ "type": "string",
+ "maxLength": 32000,
+ "minLength": 1,
+ "title": "Content"
+ },
+ "page_context": {
"anyOf": [
{
- "type": "string"
+ "$ref": "#/components/schemas/PageContext"
},
{
"type": "null"
}
- ],
- "title": "Root"
- },
- "datasets": {
- "additionalProperties": {
- "additionalProperties": true,
- "type": "object"
- },
- "type": "object",
- "title": "Datasets"
+ ]
+ }
+ },
+ "type": "object",
+ "required": [
+ "content"
+ ],
+ "title": "SendMessageRequest",
+ "description": "Request to send a message."
+ },
+ "dataing__entrypoints__api__routes__assistant__SendMessageResponse": {
+ "properties": {
+ "message_id": {
+ "type": "string",
+ "format": "uuid",
+ "title": "Message Id"
},
- "edges": {
- "items": {
- "$ref": "#/components/schemas/LineageEdge"
- },
- "type": "array",
- "title": "Edges"
+ "status": {
+ "type": "string",
+ "title": "Status",
+ "default": "processing"
}
},
"type": "object",
- "title": "LineageGraphResponse",
- "description": "Lineage graph response."
+ "required": [
+ "message_id"
+ ],
+ "title": "SendMessageResponse",
+ "description": "Response from sending a message."
},
"dataing__entrypoints__api__routes__credentials__TestConnectionResponse": {
"properties": {
@@ -16204,6 +18890,44 @@
],
"title": "TestConnectionResponse",
"description": "Response for testing credentials."
+ },
+ "dataing__entrypoints__api__routes__lineage__LineageGraphResponse": {
+ "properties": {
+ "root": {
+ "type": "string",
+ "title": "Root"
+ },
+ "datasets": {
+ "additionalProperties": {
+ "$ref": "#/components/schemas/DatasetResponse"
+ },
+ "type": "object",
+ "title": "Datasets"
+ },
+ "edges": {
+ "items": {
+ "$ref": "#/components/schemas/LineageEdgeResponse"
+ },
+ "type": "array",
+ "title": "Edges"
+ },
+ "jobs": {
+ "additionalProperties": {
+ "$ref": "#/components/schemas/JobResponse"
+ },
+ "type": "object",
+ "title": "Jobs"
+ }
+ },
+ "type": "object",
+ "required": [
+ "root",
+ "datasets",
+ "edges",
+ "jobs"
+ ],
+ "title": "LineageGraphResponse",
+ "description": "Response for a lineage graph."
}
},
"securitySchemes": {
diff --git a/python-packages/dataing/pyproject.toml b/python-packages/dataing/pyproject.toml
index 1bd7ce0b5..3b80e5458 100644
--- a/python-packages/dataing/pyproject.toml
+++ b/python-packages/dataing/pyproject.toml
@@ -34,6 +34,7 @@ dependencies = [
"redis>=5.0.0",
"jsonschema>=4.20.0",
"rfc8785>=0.1.4",
+ "docker>=7.0.0",
]
[project.optional-dependencies]
diff --git a/python-packages/dataing/src/dataing/adapters/investigation_feedback/types.py b/python-packages/dataing/src/dataing/adapters/investigation_feedback/types.py
index f7abcca52..25076d9cc 100644
--- a/python-packages/dataing/src/dataing/adapters/investigation_feedback/types.py
+++ b/python-packages/dataing/src/dataing/adapters/investigation_feedback/types.py
@@ -42,6 +42,7 @@ class EventType(Enum):
FEEDBACK_INVESTIGATION = "feedback.investigation"
FEEDBACK_RECOMMENDATION = "feedback.recommendation"
FEEDBACK_FIX = "feedback.fix"
+ FEEDBACK_ASSISTANT_MESSAGE = "feedback.assistant_message"
# Fix events
FIX_PROPOSED = "fix.proposed"
diff --git a/python-packages/dataing/src/dataing/agents/assistant.py b/python-packages/dataing/src/dataing/agents/assistant.py
new file mode 100644
index 000000000..496518cf7
--- /dev/null
+++ b/python-packages/dataing/src/dataing/agents/assistant.py
@@ -0,0 +1,483 @@
+"""DataingAssistant - Unified AI assistant for Dataing platform.
+
+Provides help with infrastructure debugging, data questions, and investigation support.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from bond import BondAgent, StreamHandlers
+from pydantic_ai.models.anthropic import AnthropicModel
+from pydantic_ai.providers.anthropic import AnthropicProvider
+from pydantic_ai.tools import Tool
+
+from dataing.agents.tools.docker import (
+ find_unhealthy_docker_containers,
+ get_docker_container_health,
+ get_docker_container_stats,
+ get_docker_container_status,
+ list_docker_containers,
+)
+from dataing.agents.tools.local_files import (
+ list_directory,
+ read_local_file,
+ search_in_files,
+)
+from dataing.agents.tools.log_providers import (
+ LocalFileLogProvider,
+ LogProviderConfig,
+)
+from dataing.agents.tools.log_providers.base import LogSource
+
+if TYPE_CHECKING:
+ from uuid import UUID
+
+logger = logging.getLogger(__name__)
+
+# System prompt for the Dataing Assistant
+ASSISTANT_SYSTEM_PROMPT = """You are the Dataing Assistant, an AI helper for the Dataing platform.
+
+## CRITICAL: Investigation Before Advice
+
+You have tools. USE THEM. Never give generic troubleshooting advice.
+
+When a user asks about infrastructure, data, or debugging:
+1. FIRST use your tools to investigate the actual system state
+2. THEN provide findings based on evidence you gathered
+3. NEVER respond with generic advice like "check your config" - that's useless
+
+## Your Capabilities
+
+- **Infrastructure debugging** - Docker containers, logs, config files
+- **Data questions** - Connected datasources, schemas
+- **Investigation support** - Context on investigations, findings
+- **Code understanding** - Local files, search, git history
+
+## Investigation Methodology
+
+For debugging questions, follow this pattern:
+
+1. **Observe** - Use tools to see actual state:
+ - `list_docker_containers()` - What's running?
+ - `get_docker_container_status("name")` - Container details
+ - `find_unhealthy_docker_containers()` - Any problems?
+
+2. **Gather evidence** - Read relevant files:
+ - `list_directory("demo/")` - What config files exist?
+ - `read_local_file("path")` - Read the actual config
+ - `search_in_files("keyword", "directory")` - Find related code
+
+3. **Analyze** - Look for discrepancies between:
+ - What the config says should happen
+ - What's actually happening (container state, logs)
+
+4. **Report** - Share specific findings with evidence
+
+## Allowed Directories
+
+Use these paths (NOT "." which will be blocked):
+- `demo/` - Docker configs, init scripts, fixtures
+- `python-packages/` - Backend Python code
+- `frontend/` - Frontend React code
+- `docs/` - Documentation
+
+## Page Awareness
+
+You receive context about what page the user is currently viewing in the Dataing UI.
+Use this to:
+- Answer "what am I looking at?" by describing the page and its data
+- Diagnose frontend errors from the "Recent Frontend Errors" section
+- Provide page-specific help based on the page type
+- Reference relevant entities (investigation IDs, dataset names, etc.) without asking
+
+## Response Format
+
+- Use markdown formatting
+- Code blocks with language hints
+- Show what you found, not generic advice
+- Include specific file paths and line numbers when relevant
+"""
+
+
+class DataingAssistant:
+ """Unified AI assistant for Dataing platform.
+
+ Provides help with:
+ - Infrastructure debugging (Docker, logs, config files)
+ - Data questions via connected datasources
+ - Investigation context and findings
+ - Git history and code understanding
+ """
+
+ def __init__(
+ self,
+ api_key: str,
+ tenant_id: UUID | str,
+ *,
+ model: str = "claude-sonnet-4-20250514",
+ repo_path: str | Path = ".",
+ github_token: str | None = None,
+ log_directories: list[str] | None = None,
+ max_retries: int = 2,
+ ) -> None:
+ """Initialize the Dataing Assistant.
+
+ Args:
+ api_key: Anthropic API key.
+ tenant_id: Tenant ID for multi-tenancy isolation.
+ model: LLM model to use (default: Claude Sonnet for speed).
+ repo_path: Path to local git repository.
+ github_token: Optional GitHub token for git tools.
+ log_directories: Directories to scan for log files.
+ max_retries: Max retries on LLM errors.
+ """
+ self._tenant_id = str(tenant_id)
+ self._repo_path = Path(repo_path)
+ self._github_token = github_token
+
+ # Setup LLM provider
+ provider = AnthropicProvider(api_key=api_key)
+ self._model = AnthropicModel(model, provider=provider)
+
+ # Setup log provider for local files
+ self._log_provider = self._create_log_provider(log_directories or [])
+
+ # Build tool list
+ tools = self._build_tools()
+ logger.info(f"DataingAssistant initialized with {len(tools)} tools")
+ for tool in tools:
+ logger.info(f" - Tool: {tool.name}")
+
+ # Create the agent
+ self._agent: BondAgent[str, None] = BondAgent(
+ name="dataing-assistant",
+ instructions=ASSISTANT_SYSTEM_PROMPT,
+ model=self._model,
+ toolsets=[tools],
+ max_retries=max_retries,
+ )
+
+ def _create_log_provider(self, log_directories: list[str]) -> LocalFileLogProvider:
+ """Create a local file log provider.
+
+ Args:
+ log_directories: Directories to scan for logs.
+
+ Returns:
+ Configured LocalFileLogProvider.
+ """
+ config = LogProviderConfig(
+ source=LogSource.LOCAL_FILE,
+ name="Local Logs",
+ settings={"directories": log_directories},
+ )
+ return LocalFileLogProvider(
+ config=config,
+ log_directories=[Path(d) for d in log_directories],
+ )
+
+ def _build_tools(self) -> list[Tool[Any]]:
+ """Build the list of tools for the assistant.
+
+ Returns:
+ List of PydanticAI Tool instances.
+ """
+ tools: list[Tool[Any]] = []
+
+ # Local file tools
+ tools.append(Tool(read_local_file))
+ tools.append(Tool(search_in_files))
+ tools.append(Tool(list_directory))
+
+ # Docker tools
+ tools.append(Tool(list_docker_containers))
+ tools.append(Tool(get_docker_container_status))
+ tools.append(Tool(get_docker_container_health))
+ tools.append(Tool(get_docker_container_stats))
+ tools.append(Tool(find_unhealthy_docker_containers))
+
+ # Log tools
+ tools.append(Tool(self._get_logs))
+ tools.append(Tool(self._search_logs))
+ tools.append(Tool(self._get_recent_errors))
+
+ # Git tools (from bond-agent) - loaded lazily if available
+ git_tools = self._load_git_tools()
+ tools.extend(git_tools)
+
+ return tools
+
+ def _load_git_tools(self) -> list[Tool[Any]]:
+ """Load git tools from bond-agent if available.
+
+ Returns:
+ List of git-related tools.
+ """
+ tools: list[Tool[Any]] = []
+
+ try:
+ from bond.tools.githunter import githunter_toolset
+
+ # githunter_toolset is already a list of tools
+ tools.extend(githunter_toolset)
+ logger.info("Loaded githunter toolset")
+ except ImportError:
+ logger.debug("githunter tools not available")
+ except Exception as e:
+ logger.warning(f"Failed to load githunter tools: {e}")
+
+ if self._github_token:
+ try:
+ from bond.tools.github import github_toolset
+
+ # github_toolset is already a list of tools
+ tools.extend(github_toolset)
+ logger.info("Loaded github toolset")
+ except ImportError:
+ logger.debug("github tools not available")
+ except Exception as e:
+ logger.warning(f"Failed to load github tools: {e}")
+
+ return tools
+
+ async def _get_logs(
+ self,
+ source: str,
+ max_entries: int = 50,
+ filter_pattern: str | None = None,
+ ) -> str:
+ """Get logs from a source file.
+
+ Args:
+ source: Path to the log file.
+ max_entries: Maximum entries to return.
+ filter_pattern: Optional pattern to filter logs.
+
+ Returns:
+ Formatted log entries or error message.
+ """
+ result = await self._log_provider.get_logs(
+ source_id=source,
+ max_entries=max_entries,
+ filter_pattern=filter_pattern,
+ )
+
+ if not result.success:
+ return f"Error reading logs: {result.error}"
+
+ if not result.entries:
+ return f"No log entries found in {source}"
+
+ lines = [f"Logs from {source} ({len(result.entries)} entries):"]
+ for entry in result.entries:
+ ts = entry.timestamp.isoformat() if entry.timestamp else "?"
+ level = f"[{entry.level}]" if entry.level else ""
+ lines.append(f" {ts} {level} {entry.message[:200]}")
+
+ if result.truncated:
+ lines.append(f" ... (truncated, {result.next_token} more available)")
+
+ return "\n".join(lines)
+
+ async def _search_logs(
+ self,
+ pattern: str,
+ source: str | None = None,
+ max_entries: int = 20,
+ ) -> str:
+ """Search logs for a pattern.
+
+ Args:
+ pattern: Search pattern.
+ source: Optional specific log file to search.
+ max_entries: Maximum entries to return.
+
+ Returns:
+ Formatted search results.
+ """
+ result = await self._log_provider.search_logs(
+ pattern=pattern,
+ source_id=source,
+ max_entries=max_entries,
+ )
+
+ if not result.success:
+ return f"Error searching logs: {result.error}"
+
+ if not result.entries:
+ return f"No log entries matching '{pattern}'"
+
+ lines = [f"Found {len(result.entries)} entries matching '{pattern}':"]
+ for entry in result.entries:
+ ts = entry.timestamp.isoformat() if entry.timestamp else "?"
+ src = entry.source or "?"
+ lines.append(f" [{src}] {ts}: {entry.message[:150]}")
+
+ return "\n".join(lines)
+
+ async def _get_recent_errors(
+ self,
+ source: str,
+ max_entries: int = 10,
+ ) -> str:
+ """Get recent errors from a log file.
+
+ Args:
+ source: Path to the log file.
+ max_entries: Maximum errors to return.
+
+ Returns:
+ Formatted error entries.
+ """
+ result = await self._log_provider.get_recent_errors(
+ source_id=source,
+ max_entries=max_entries,
+ )
+
+ if not result.success:
+ return f"Error reading log errors: {result.error}"
+
+ if not result.entries:
+ return f"No errors found in {source}"
+
+ lines = [f"Recent errors from {source} ({len(result.entries)} found):"]
+ for entry in result.entries:
+ ts = entry.timestamp.isoformat() if entry.timestamp else "?"
+ lines.append(f" {ts}: {entry.message[:200]}")
+ # Include context if available
+ ctx_before = entry.metadata.get("context_before", [])
+ ctx_after = entry.metadata.get("context_after", [])
+ if ctx_before:
+ lines.append(f" Context before: {ctx_before[-1][:100]}")
+ if ctx_after:
+ lines.append(f" Context after: {ctx_after[0][:100]}")
+
+ return "\n".join(lines)
+
+ async def ask(
+ self,
+ question: str,
+ *,
+ session_id: str | None = None,
+ handlers: StreamHandlers | None = None,
+ context: dict[str, Any] | None = None,
+ ) -> str:
+ """Ask the assistant a question.
+
+ Args:
+ question: The user's question or request.
+ session_id: Optional session ID for conversation continuity.
+ handlers: Optional streaming handlers for real-time output.
+ context: Optional additional context (e.g., current investigation).
+
+ Returns:
+ The assistant's response.
+
+ Raises:
+ Exception: If the LLM call fails.
+ """
+ # Build user prompt with optional context
+ prompt = question
+ if context:
+ context_str = self._format_context(context)
+ prompt = f"{context_str}\n\nUser question: {question}"
+
+ # Add session context for conversation continuity
+ dynamic_instructions = None
+ if session_id:
+ dynamic_instructions = f"Session ID: {session_id}\nTenant ID: {self._tenant_id}"
+
+ result = await self._agent.ask(
+ prompt,
+ dynamic_instructions=dynamic_instructions,
+ handlers=handlers,
+ )
+
+ return str(result)
+
+ def _format_context(self, context: dict[str, Any]) -> str:
+ """Format additional context for the prompt.
+
+ Args:
+ context: Context dictionary.
+
+ Returns:
+ Formatted context string.
+ """
+ lines = ["## Current Context"]
+
+ # Page context (what the user is currently viewing)
+ if "page_context" in context:
+ pc = context["page_context"]
+ lines.append("")
+ lines.append("### Current Page")
+ if pc.get("page_title"):
+ lines.append(f"- Page: {pc['page_title']}")
+ lines.append(f"- Route: {pc.get('route', 'unknown')}")
+ lines.append(f"- Page type: {pc.get('page_type', 'unknown')}")
+
+ # Include page-specific data
+ page_data = pc.get("page_data", {})
+ for key, value in page_data.items():
+ lines.append(f"- {key}: {value}")
+
+ # Include route params
+ route_params = pc.get("route_params", {})
+ for key, value in route_params.items():
+ lines.append(f"- {key}: {value}")
+
+ # Include recent frontend errors
+ page_errors = pc.get("errors", [])
+ if page_errors:
+ lines.append("")
+ lines.append("### Recent Frontend Errors")
+ for err in page_errors[-5:]:
+ err_type = err.get("type", "unknown").upper()
+ msg = err.get("message", "Unknown error")
+ status = err.get("status")
+ url = err.get("url")
+ prefix = f"[{err_type}]"
+ detail = msg
+ if status:
+ detail = f"HTTP {status}: {msg}"
+ lines.append(f"- {prefix} {detail}")
+ if url:
+ lines.append(f" Endpoint: {url}")
+
+ if "investigation" in context:
+ inv = context["investigation"]
+ lines.append(f"- Investigation: {inv.get('id', 'unknown')}")
+ lines.append(f"- Status: {inv.get('status', 'unknown')}")
+ if inv.get("finding"):
+ lines.append(f"- Finding: {inv['finding'].get('root_cause', 'pending')}")
+
+ if "datasource" in context:
+ ds = context["datasource"]
+ lines.append(f"- Connected to: {ds.get('name', 'unknown')} ({ds.get('type', '')})")
+
+ if "recent_alerts" in context:
+ alerts = context["recent_alerts"]
+ lines.append(f"- Recent alerts: {len(alerts)}")
+
+ return "\n".join(lines)
+
+
+def create_assistant(
+ api_key: str,
+ tenant_id: UUID | str,
+ **kwargs: Any,
+) -> DataingAssistant:
+ """Create a DataingAssistant instance.
+
+ Args:
+ api_key: Anthropic API key.
+ tenant_id: Tenant ID for isolation.
+ **kwargs: Additional arguments passed to DataingAssistant.
+
+ Returns:
+ Configured DataingAssistant instance.
+ """
+ return DataingAssistant(api_key=api_key, tenant_id=tenant_id, **kwargs)
diff --git a/python-packages/dataing/src/dataing/agents/tools/__init__.py b/python-packages/dataing/src/dataing/agents/tools/__init__.py
new file mode 100644
index 000000000..758f22703
--- /dev/null
+++ b/python-packages/dataing/src/dataing/agents/tools/__init__.py
@@ -0,0 +1,18 @@
+"""Assistant tools package.
+
+Provides a unified tool registry for the Dataing Assistant agent.
+"""
+
+from dataing.agents.tools.registry import (
+ ToolCategory,
+ ToolConfig,
+ ToolRegistry,
+ get_default_registry,
+)
+
+__all__ = [
+ "ToolCategory",
+ "ToolConfig",
+ "ToolRegistry",
+ "get_default_registry",
+]
diff --git a/python-packages/dataing/src/dataing/agents/tools/docker.py b/python-packages/dataing/src/dataing/agents/tools/docker.py
new file mode 100644
index 000000000..5d39b96d7
--- /dev/null
+++ b/python-packages/dataing/src/dataing/agents/tools/docker.py
@@ -0,0 +1,624 @@
+"""Docker status tool for the Dataing Assistant.
+
+Provides tools to check Docker container status, health, and resource usage.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from dataclasses import dataclass, field
+from typing import Any
+
+from dataing.agents.tools.registry import ToolCategory, ToolRegistry
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ContainerStatus:
+ """Status information for a Docker container.
+
+ Attributes:
+ id: Container short ID.
+ name: Container name.
+ status: Container state (running, exited, etc.).
+ image: Image name and tag.
+ created: Creation timestamp.
+ started: Start timestamp.
+ ports: Port mappings.
+ health: Health check status.
+ error: Error message if status fetch failed.
+ """
+
+ id: str
+ name: str
+ status: str
+ image: str
+ created: str | None = None
+ started: str | None = None
+ ports: dict[str, Any] = field(default_factory=dict)
+ health: dict[str, Any] = field(default_factory=dict)
+ error: str | None = None
+
+
+@dataclass
+class ContainerSummary:
+ """Summary of a container for listings.
+
+ Attributes:
+ id: Container short ID.
+ name: Container name.
+ status: Container state.
+ image: Image name.
+ """
+
+ id: str
+ name: str
+ status: str
+ image: str
+
+
+@dataclass
+class DockerStatusResult:
+ """Result from Docker status operations.
+
+ Attributes:
+ success: Whether the operation succeeded.
+ containers: List of container summaries.
+ container: Single container status (for get_status).
+ error: Error message if operation failed.
+ """
+
+ success: bool = True
+ containers: list[ContainerSummary] = field(default_factory=list)
+ container: ContainerStatus | None = None
+ error: str | None = None
+
+
+class DockerStatusTool:
+ """Tool for checking Docker container status.
+
+ Provides read-only access to Docker container information:
+ - List all containers with status
+ - Get detailed status for a specific container
+ - Check container health
+ - Get container resource usage
+ """
+
+ def __init__(self, docker_host: str | None = None) -> None:
+ """Initialize the Docker status tool.
+
+ Args:
+ docker_host: Docker host URL. If None, uses environment defaults.
+ """
+ self._docker_host = docker_host
+ self._client: Any = None
+
+ def _get_client(self) -> Any:
+ """Get or create Docker client.
+
+ Returns:
+ Docker client instance.
+
+ Raises:
+ ImportError: If docker package not installed.
+ RuntimeError: If Docker connection fails.
+ """
+ if self._client is not None:
+ return self._client
+
+ try:
+ import docker
+ except ImportError as err:
+ raise ImportError(
+ "docker package is required for Docker tools. " "Install with: pip install docker"
+ ) from err
+
+ try:
+ if self._docker_host:
+ self._client = docker.DockerClient(base_url=self._docker_host)
+ else:
+ # Use default from environment
+ self._client = docker.from_env()
+
+ # Test connection
+ self._client.ping()
+ return self._client
+
+ except Exception as e:
+ raise RuntimeError(f"Failed to connect to Docker: {e}") from e
+
+ async def list_containers(self, include_stopped: bool = True) -> DockerStatusResult:
+ """List all Docker containers with their status.
+
+ Args:
+ include_stopped: Whether to include stopped containers.
+
+ Returns:
+ DockerStatusResult with container list.
+ """
+ try:
+ client = self._get_client()
+
+ loop = asyncio.get_event_loop()
+ containers = await loop.run_in_executor(
+ None, lambda: client.containers.list(all=include_stopped)
+ )
+
+ summaries = [
+ ContainerSummary(
+ id=c.short_id,
+ name=c.name,
+ status=c.status,
+ image=c.image.tags[0] if c.image.tags else "unknown",
+ )
+ for c in containers
+ ]
+
+ return DockerStatusResult(success=True, containers=summaries)
+
+ except ImportError as e:
+ return DockerStatusResult(success=False, error=str(e))
+ except RuntimeError as e:
+ return DockerStatusResult(success=False, error=str(e))
+ except Exception as e:
+ logger.exception("Failed to list containers")
+ return DockerStatusResult(success=False, error=f"Failed to list containers: {e}")
+
+ async def get_container_status(self, container_id: str) -> DockerStatusResult:
+ """Get detailed status for a specific container.
+
+ Args:
+ container_id: Container name or ID.
+
+ Returns:
+ DockerStatusResult with container details.
+ """
+ try:
+ client = self._get_client()
+
+ loop = asyncio.get_event_loop()
+ container = await loop.run_in_executor(
+ None, lambda: client.containers.get(container_id)
+ )
+
+ status = ContainerStatus(
+ id=container.short_id,
+ name=container.name,
+ status=container.status,
+ image=container.image.tags[0] if container.image.tags else "unknown",
+ created=container.attrs.get("Created"),
+ started=container.attrs.get("State", {}).get("StartedAt"),
+ ports=container.attrs.get("NetworkSettings", {}).get("Ports", {}),
+ health=container.attrs.get("State", {}).get("Health", {}),
+ )
+
+ return DockerStatusResult(success=True, container=status)
+
+ except ImportError as e:
+ return DockerStatusResult(success=False, error=str(e))
+ except RuntimeError as e:
+ return DockerStatusResult(success=False, error=str(e))
+ except Exception as e:
+ logger.exception(f"Failed to get container status: {container_id}")
+ return DockerStatusResult(
+ success=False,
+ error=f"Failed to get container status: {e}",
+ container=ContainerStatus(
+ id="", name=container_id, status="unknown", image="", error=str(e)
+ ),
+ )
+
+ async def get_container_health(self, container_id: str) -> dict[str, Any]:
+ """Get health check status for a container.
+
+ Args:
+ container_id: Container name or ID.
+
+ Returns:
+ Health check information dict.
+ """
+ result = await self.get_container_status(container_id)
+
+ if not result.success or not result.container:
+ return {
+ "healthy": False,
+ "error": result.error or "Container not found",
+ }
+
+ health = result.container.health
+
+ if not health:
+ return {
+ "healthy": None, # No health check configured
+ "status": result.container.status,
+ "message": "No health check configured for this container",
+ }
+
+ return {
+ "healthy": health.get("Status") == "healthy",
+ "status": health.get("Status", "unknown"),
+ "failing_streak": health.get("FailingStreak", 0),
+ "log": health.get("Log", [])[-3:], # Last 3 health check results
+ }
+
+ async def get_container_stats(self, container_id: str) -> dict[str, Any]:
+ """Get resource usage statistics for a container.
+
+ Args:
+ container_id: Container name or ID.
+
+ Returns:
+ Resource usage statistics.
+ """
+ try:
+ client = self._get_client()
+
+ loop = asyncio.get_event_loop()
+ container = await loop.run_in_executor(
+ None, lambda: client.containers.get(container_id)
+ )
+
+ if container.status != "running":
+ return {
+ "error": f"Container is not running (status: {container.status})",
+ "container": container_id,
+ }
+
+ # Get stats (non-streaming)
+ stats = await loop.run_in_executor(None, lambda: container.stats(stream=False))
+
+ # Calculate CPU percentage
+ cpu_delta = (
+ stats["cpu_stats"]["cpu_usage"]["total_usage"]
+ - stats["precpu_stats"]["cpu_usage"]["total_usage"]
+ )
+ system_delta = (
+ stats["cpu_stats"]["system_cpu_usage"] - stats["precpu_stats"]["system_cpu_usage"]
+ )
+ cpu_percent = 0.0
+ if system_delta > 0:
+ cpu_percent = (cpu_delta / system_delta) * 100.0
+
+ # Calculate memory usage
+ mem_usage = stats["memory_stats"].get("usage", 0)
+ mem_limit = stats["memory_stats"].get("limit", 1)
+ mem_percent = (mem_usage / mem_limit) * 100.0 if mem_limit > 0 else 0.0
+
+ return {
+ "cpu_percent": round(cpu_percent, 2),
+ "memory_usage_mb": round(mem_usage / (1024 * 1024), 2),
+ "memory_limit_mb": round(mem_limit / (1024 * 1024), 2),
+ "memory_percent": round(mem_percent, 2),
+ "network_rx_bytes": stats.get("networks", {}).get("eth0", {}).get("rx_bytes", 0),
+ "network_tx_bytes": stats.get("networks", {}).get("eth0", {}).get("tx_bytes", 0),
+ }
+
+ except ImportError as e:
+ return {"error": str(e)}
+ except RuntimeError as e:
+ return {"error": str(e)}
+ except Exception as e:
+ logger.exception(f"Failed to get container stats: {container_id}")
+ return {"error": f"Failed to get container stats: {e}"}
+
+ async def find_unhealthy_containers(self) -> list[dict[str, Any]]:
+ """Find all containers that are unhealthy or not running.
+
+ Returns:
+ List of unhealthy container information.
+ """
+ result = await self.list_containers(include_stopped=True)
+
+ if not result.success:
+ return [{"error": result.error}]
+
+ unhealthy = []
+ for container in result.containers:
+ if container.status != "running":
+ unhealthy.append(
+ {
+ "id": container.id,
+ "name": container.name,
+ "status": container.status,
+ "reason": "not_running",
+ }
+ )
+ else:
+ # Check health for running containers
+ health = await self.get_container_health(container.name)
+ if health.get("healthy") is False:
+ unhealthy.append(
+ {
+ "id": container.id,
+ "name": container.name,
+ "status": container.status,
+ "reason": "unhealthy",
+ "health_status": str(health.get("status", "unknown")),
+ }
+ )
+
+ return unhealthy
+
+
+# Default tool instance
+_default_tool: DockerStatusTool | None = None
+
+
+def get_docker_tool(docker_host: str | None = None) -> DockerStatusTool:
+ """Get the Docker status tool instance.
+
+ Args:
+ docker_host: Docker host URL.
+
+ Returns:
+ DockerStatusTool instance.
+ """
+ global _default_tool
+ if _default_tool is None:
+ _default_tool = DockerStatusTool(docker_host=docker_host)
+ return _default_tool
+
+
+# Tool functions for agent registration
+
+
+async def list_docker_containers(include_stopped: bool = True) -> str:
+ """List all Docker containers with their status.
+
+ Args:
+ include_stopped: Whether to include stopped containers (default: True).
+
+ Returns:
+ Formatted string with container list or error message.
+ """
+ logger.info(f"[TOOL CALLED] list_docker_containers: include_stopped={include_stopped}")
+ tool = get_docker_tool()
+ result = await tool.list_containers(include_stopped=include_stopped)
+
+ if not result.success:
+ if "Permission denied" in str(result.error):
+ return (
+ "Cannot access Docker (permission denied). "
+ "Use file reading tools instead:\n"
+ "- Read demo/docker-compose.demo.yml for container configuration\n"
+ "- Read demo/init-pgduckdb.sql for database initialization\n"
+ "- List demo/fixtures/ for data files"
+ )
+ return f"Error listing containers: {result.error}"
+
+ if not result.containers:
+ return "No containers found."
+
+ lines = ["Docker Containers:"]
+ for c in result.containers:
+ status_indicator = "🟢" if c.status == "running" else "🔴"
+ lines.append(f" {status_indicator} {c.name} ({c.id}) - {c.status} [{c.image}]")
+
+ return "\n".join(lines)
+
+
+async def get_docker_container_status(container_id: str) -> str:
+ """Get detailed status for a specific Docker container.
+
+ Args:
+ container_id: Container name or ID.
+
+ Returns:
+ Formatted string with container details or error message.
+ """
+ logger.info(f"[TOOL CALLED] get_docker_container_status: {container_id}")
+ tool = get_docker_tool()
+ result = await tool.get_container_status(container_id)
+
+ if not result.success:
+ return f"Error getting container status: {result.error}"
+
+ if not result.container:
+ return f"Container not found: {container_id}"
+
+ c = result.container
+ lines = [
+ f"Container: {c.name}",
+ f" ID: {c.id}",
+ f" Status: {c.status}",
+ f" Image: {c.image}",
+ ]
+
+ if c.created:
+ lines.append(f" Created: {c.created}")
+ if c.started:
+ lines.append(f" Started: {c.started}")
+ if c.ports:
+ lines.append(f" Ports: {_format_ports(c.ports)}")
+ if c.health:
+ health_status = c.health.get("Status", "unknown")
+ lines.append(f" Health: {health_status}")
+
+ return "\n".join(lines)
+
+
+async def get_docker_container_health(container_id: str) -> str:
+ """Get health check status for a Docker container.
+
+ Args:
+ container_id: Container name or ID.
+
+ Returns:
+ Formatted string with health information or error message.
+ """
+ tool = get_docker_tool()
+ health = await tool.get_container_health(container_id)
+
+ if "error" in health:
+ return f"Error getting health status: {health['error']}"
+
+ if health.get("healthy") is None:
+ return f"Container {container_id}: {health.get('message', 'No health check configured')}"
+
+ status_emoji = "✅" if health.get("healthy") else "❌"
+ lines = [
+ f"{status_emoji} Container {container_id}: {health.get('status', 'unknown')}",
+ ]
+
+ if health.get("failing_streak", 0) > 0:
+ lines.append(f" Failing streak: {health['failing_streak']}")
+
+ if health.get("log"):
+ lines.append(" Recent health checks:")
+ for log_entry in health["log"]:
+ exit_code = log_entry.get("ExitCode", "?")
+ output = log_entry.get("Output", "").strip()[:100]
+ lines.append(f" - Exit {exit_code}: {output}")
+
+ return "\n".join(lines)
+
+
+async def get_docker_container_stats(container_id: str) -> str:
+ """Get resource usage statistics for a Docker container.
+
+ Args:
+ container_id: Container name or ID.
+
+ Returns:
+ Formatted string with resource usage or error message.
+ """
+ tool = get_docker_tool()
+ stats = await tool.get_container_stats(container_id)
+
+ if "error" in stats:
+ return f"Error getting container stats: {stats['error']}"
+
+ lines = [
+ f"Resource Usage for {container_id}:",
+ f" CPU: {stats['cpu_percent']}%",
+ f" Memory: {stats['memory_usage_mb']} MB / {stats['memory_limit_mb']} MB "
+ f"({stats['memory_percent']}%)",
+ f" Network RX: {_format_bytes(stats['network_rx_bytes'])}",
+ f" Network TX: {_format_bytes(stats['network_tx_bytes'])}",
+ ]
+
+ return "\n".join(lines)
+
+
+async def find_unhealthy_docker_containers() -> str:
+ """Find all Docker containers that are unhealthy or not running.
+
+ Returns:
+ Formatted string with unhealthy container list or success message.
+ """
+ logger.info("[TOOL CALLED] find_unhealthy_docker_containers")
+ tool = get_docker_tool()
+ unhealthy = await tool.find_unhealthy_containers()
+
+ if not unhealthy:
+ return "✅ All containers are healthy and running."
+
+ if len(unhealthy) == 1 and "error" in unhealthy[0]:
+ error_msg = unhealthy[0]["error"]
+ if "Permission denied" in str(error_msg):
+ return (
+ "Cannot access Docker (permission denied). "
+ "Check container status manually with: docker ps\n"
+ "For initialization issues, read demo/init-pgduckdb.sql"
+ )
+ return f"Error checking containers: {error_msg}"
+
+ lines = ["⚠️ Unhealthy or stopped containers:"]
+ for c in unhealthy:
+ if "error" in c:
+ continue
+ reason = c.get("reason", "unknown")
+ if reason == "not_running":
+ lines.append(f" 🔴 {c['name']} - {c['status']}")
+ else:
+ lines.append(f" ⚠️ {c['name']} - health: {c.get('health_status', 'unhealthy')}")
+
+ return "\n".join(lines)
+
+
+def _format_ports(ports: dict[str, Any]) -> str:
+ """Format port mappings for display.
+
+ Args:
+ ports: Port mapping dict from Docker API.
+
+ Returns:
+ Formatted port string.
+ """
+ if not ports:
+ return "none"
+
+ formatted = []
+ for container_port, host_bindings in ports.items():
+ if host_bindings:
+ for binding in host_bindings:
+ host_port = binding.get("HostPort", "?")
+ formatted.append(f"{host_port}->{container_port}")
+ else:
+ formatted.append(container_port)
+
+ return ", ".join(formatted)
+
+
+def _format_bytes(num_bytes: int) -> str:
+ """Format bytes for human-readable display.
+
+ Args:
+ num_bytes: Number of bytes.
+
+ Returns:
+ Human-readable string.
+ """
+ value: float = float(num_bytes)
+ for unit in ["B", "KB", "MB", "GB"]:
+ if abs(value) < 1024.0:
+ return f"{value:.1f} {unit}"
+ value /= 1024.0
+ return f"{value:.1f} TB"
+
+
+def register_docker_tools(registry: ToolRegistry) -> None:
+ """Register Docker status tools with the tool registry.
+
+ Args:
+ registry: Tool registry instance.
+ """
+ from collections.abc import Callable
+
+ tools: list[tuple[str, Callable[..., Any], str]] = [
+ (
+ "list_docker_containers",
+ list_docker_containers,
+ "List all Docker containers with their status",
+ ),
+ (
+ "get_docker_container_status",
+ get_docker_container_status,
+ "Get detailed status for a specific Docker container",
+ ),
+ (
+ "get_docker_container_health",
+ get_docker_container_health,
+ "Get health check status for a Docker container",
+ ),
+ (
+ "get_docker_container_stats",
+ get_docker_container_stats,
+ "Get resource usage statistics for a Docker container",
+ ),
+ (
+ "find_unhealthy_docker_containers",
+ find_unhealthy_docker_containers,
+ "Find all Docker containers that are unhealthy or not running",
+ ),
+ ]
+
+ for name, func, description in tools:
+ registry.register_tool(
+ name=name,
+ category=ToolCategory.DOCKER,
+ description=description,
+ func=func,
+ )
diff --git a/python-packages/dataing/src/dataing/agents/tools/local_files.py b/python-packages/dataing/src/dataing/agents/tools/local_files.py
new file mode 100644
index 000000000..1093a4008
--- /dev/null
+++ b/python-packages/dataing/src/dataing/agents/tools/local_files.py
@@ -0,0 +1,701 @@
+"""Local file reader tool with safety features.
+
+Provides safe read access to local files with:
+- Directory allowlist enforcement
+- Path traversal prevention
+- Sensitive file blocking
+- Size limits
+"""
+
+from __future__ import annotations
+
+import fnmatch
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+
+from pydantic_ai.tools import Tool
+
+from dataing.agents.tools.registry import ToolCategory, get_default_registry
+from dataing.core.parsing import (
+ DataParser,
+ JsonParser,
+ LogParser,
+ TextParser,
+ YamlParser,
+)
+
+logger = logging.getLogger(__name__)
+
+# Maximum file size to read (100KB)
+MAX_FILE_SIZE = 100 * 1024
+
+# Allowed directories (relative to repo root)
+ALLOWED_DIRS = [
+ "python-packages/",
+ "frontend/",
+ "demo/",
+ "docs/",
+]
+
+# Allowed file patterns in root directory
+ALLOWED_ROOT_PATTERNS = [
+ "docker-compose*.yml",
+ "docker-compose*.yaml",
+ "*.md",
+ "justfile",
+ "pyproject.toml",
+ "package.json",
+ "Makefile",
+ ".gitignore",
+]
+
+# Blocked patterns (always rejected)
+BLOCKED_PATTERNS = [
+ ".env",
+ ".env.*",
+ "*.pem",
+ "*.key",
+ "*.crt",
+ "*secret*",
+ "*credential*",
+ "*password*",
+ "*token*",
+ "*.p12",
+ "*.pfx",
+ "id_rsa*",
+ "id_ed25519*",
+ "*.sqlite",
+ "*.db",
+]
+
+
+@dataclass
+class FileReadResult:
+ """Result of reading a file.
+
+ Attributes:
+ success: Whether the read succeeded.
+ content: File content if successful.
+ error: Error message if failed.
+ truncated: Whether content was truncated due to size.
+ file_type: Detected file type.
+ line_count: Number of lines in file.
+ """
+
+ success: bool
+ content: str | None
+ error: str | None = None
+ truncated: bool = False
+ file_type: str | None = None
+ line_count: int | None = None
+
+
+class LocalFileReader:
+ """Safe local file reader with security features.
+
+ Enforces directory allowlists, blocks sensitive files,
+ and prevents path traversal attacks.
+ """
+
+ def __init__(
+ self,
+ repo_root: Path,
+ allowed_dirs: list[str] | None = None,
+ allowed_root_patterns: list[str] | None = None,
+ blocked_patterns: list[str] | None = None,
+ max_file_size: int = MAX_FILE_SIZE,
+ ) -> None:
+ """Initialize the file reader.
+
+ Args:
+ repo_root: Repository root directory.
+ allowed_dirs: Allowed subdirectories (default: ALLOWED_DIRS).
+ allowed_root_patterns: Allowed patterns in root (default: ALLOWED_ROOT_PATTERNS).
+ blocked_patterns: Blocked file patterns (default: BLOCKED_PATTERNS).
+ max_file_size: Maximum file size in bytes.
+ """
+ self.repo_root = repo_root.resolve()
+ self.allowed_dirs = allowed_dirs or ALLOWED_DIRS
+ self.allowed_root_patterns = allowed_root_patterns or ALLOWED_ROOT_PATTERNS
+ self.blocked_patterns = blocked_patterns or BLOCKED_PATTERNS
+ self.max_file_size = max_file_size
+
+ # Initialize parsers
+ self._text_parser = TextParser(max_file_size=max_file_size)
+ self._yaml_parser = YamlParser(max_file_size=max_file_size)
+ self._json_parser = JsonParser(max_file_size=max_file_size)
+ self._log_parser = LogParser(max_file_size=max_file_size)
+ self._data_parser = DataParser(max_file_size=max_file_size)
+
+ def is_path_allowed(self, file_path: str) -> tuple[bool, str | None]:
+ """Check if a path is allowed to be read.
+
+ Args:
+ file_path: Relative or absolute path.
+
+ Returns:
+ Tuple of (is_allowed, error_message).
+ """
+ try:
+ resolved = self._resolve_path(file_path)
+ except ValueError as e:
+ return False, str(e)
+
+ # Check if blocked by pattern
+ filename = resolved.name
+ for pattern in self.blocked_patterns:
+ if fnmatch.fnmatch(filename.lower(), pattern.lower()):
+ alternative = self._suggest_alternative(resolved)
+ msg = f"Cannot read '{filename}' - blocked for security"
+ if alternative:
+ msg += f". Try: {alternative}"
+ return False, msg
+
+ # Check if within allowed directory
+ rel_path = self._get_relative_path(resolved)
+ if rel_path is None:
+ return False, f"Path '{file_path}' is outside the repository"
+
+ # Check if in allowed subdirectory
+ for allowed_dir in self.allowed_dirs:
+ # Handle both "python-packages" and "python-packages/" matching
+ normalized_allowed = allowed_dir.rstrip("/")
+ if rel_path == normalized_allowed or rel_path.startswith(normalized_allowed + "/"):
+ return True, None
+
+ # Check if matches allowed root pattern
+ if "/" not in rel_path:
+ for pattern in self.allowed_root_patterns:
+ if fnmatch.fnmatch(rel_path, pattern):
+ return True, None
+
+ return False, (f"Path '{rel_path}' is not in allowed directories: {self.allowed_dirs}")
+
+ def read_file(
+ self,
+ file_path: str,
+ start_line: int | None = None,
+ end_line: int | None = None,
+ ) -> FileReadResult:
+ """Read a file safely.
+
+ Args:
+ file_path: Relative or absolute path to the file.
+ start_line: Optional 1-indexed start line.
+ end_line: Optional 1-indexed end line.
+
+ Returns:
+ FileReadResult with content or error.
+ """
+ # Validate path
+ is_allowed, error = self.is_path_allowed(file_path)
+ if not is_allowed:
+ logger.warning(f"Blocked file access: {file_path} - {error}")
+ return FileReadResult(success=False, content=None, error=error)
+
+ try:
+ resolved = self._resolve_path(file_path)
+ except ValueError as e:
+ return FileReadResult(success=False, content=None, error=str(e))
+
+ # Check if file exists
+ if not resolved.exists():
+ return FileReadResult(success=False, content=None, error=f"File not found: {file_path}")
+
+ if not resolved.is_file():
+ return FileReadResult(success=False, content=None, error=f"Not a file: {file_path}")
+
+ # Check if symlink points outside allowed area
+ if resolved.is_symlink():
+ real_path = resolved.resolve()
+ is_target_allowed, _ = self.is_path_allowed(str(real_path))
+ if not is_target_allowed:
+ return FileReadResult(
+ success=False,
+ content=None,
+ error="Symlink target is outside allowed directories",
+ )
+
+ # Check file size
+ file_size = resolved.stat().st_size
+ if file_size > self.max_file_size:
+ return FileReadResult(
+ success=False,
+ content=None,
+ error=(
+ f"File too large ({file_size:,} bytes). "
+ f"Max size: {self.max_file_size:,} bytes. "
+ f"Try requesting specific line ranges."
+ ),
+ )
+
+ # Detect file type and parse
+ file_type = self._detect_file_type(resolved)
+
+ try:
+ if start_line or end_line:
+ # Line-range read
+ chunk = self._text_parser.read_file(
+ resolved,
+ start_line=start_line or 1,
+ end_line=end_line,
+ )
+ return FileReadResult(
+ success=True,
+ content=chunk.content,
+ truncated=chunk.truncated,
+ file_type=file_type,
+ line_count=chunk.total_lines,
+ )
+ else:
+ # Full file read
+ content = resolved.read_text(encoding="utf-8", errors="replace")
+ line_count = len(content.splitlines())
+
+ return FileReadResult(
+ success=True,
+ content=content,
+ truncated=False,
+ file_type=file_type,
+ line_count=line_count,
+ )
+
+ except Exception as e:
+ logger.exception(f"Error reading file: {file_path}")
+ return FileReadResult(success=False, content=None, error=f"Error reading file: {e}")
+
+ def search_files(
+ self,
+ pattern: str,
+ directory: str | None = None,
+ max_results: int = 100,
+ ) -> list[tuple[str, int, str]]:
+ """Search for pattern in files.
+
+ Args:
+ pattern: Search pattern (plain text).
+ directory: Optional subdirectory to search.
+ max_results: Maximum results to return.
+
+ Returns:
+ List of (file_path, line_number, line_content) tuples.
+ """
+ results: list[tuple[str, int, str]] = []
+
+ # Determine search root
+ if directory:
+ is_allowed, error = self.is_path_allowed(directory)
+ if not is_allowed:
+ logger.warning(f"Blocked directory search: {directory} - {error}")
+ return []
+ search_root = self._resolve_path(directory)
+ else:
+ search_root = self.repo_root
+
+ # Search files
+ for path in search_root.rglob("*"):
+ if len(results) >= max_results:
+ break
+
+ if not path.is_file():
+ continue
+
+ # Check if allowed
+ rel_path = self._get_relative_path(path)
+ if rel_path is None:
+ continue
+
+ is_allowed, _ = self.is_path_allowed(str(path))
+ if not is_allowed:
+ continue
+
+ # Skip binary files
+ if self._is_binary(path):
+ continue
+
+ # Search in file
+ try:
+ matches = self._text_parser.search_lines(
+ path,
+ pattern,
+ max_results=max_results - len(results),
+ )
+ for line_num, line_content in matches:
+ results.append((rel_path, line_num, line_content))
+ except Exception:
+ continue
+
+ return results
+
+ def list_files(
+ self,
+ directory: str,
+ pattern: str = "*",
+ max_results: int = 100,
+ ) -> list[str]:
+ """List files in a directory.
+
+ Args:
+ directory: Directory to list.
+ pattern: Glob pattern (default: all files).
+ max_results: Maximum results to return.
+
+ Returns:
+ List of relative file paths.
+ """
+ is_allowed, error = self.is_path_allowed(directory)
+ if not is_allowed:
+ logger.warning(f"Blocked directory listing: {directory} - {error}")
+ return []
+
+ try:
+ resolved = self._resolve_path(directory)
+ except ValueError:
+ return []
+
+ if not resolved.is_dir():
+ return []
+
+ results: list[str] = []
+ for path in resolved.glob(pattern):
+ if len(results) >= max_results:
+ break
+
+ rel_path = self._get_relative_path(path)
+ if rel_path is None:
+ continue
+
+ # Check if allowed
+ is_allowed, _ = self.is_path_allowed(str(path))
+ if not is_allowed:
+ continue
+
+ results.append(rel_path)
+
+ return results
+
+ def _resolve_path(self, file_path: str) -> Path:
+ """Resolve a path safely.
+
+ Args:
+ file_path: Relative or absolute path.
+
+ Returns:
+ Resolved absolute path.
+
+ Raises:
+ ValueError: If path escapes repository.
+ """
+ path = Path(file_path)
+
+ # If absolute, use directly
+ if path.is_absolute():
+ resolved = path.resolve()
+ else:
+ resolved = (self.repo_root / path).resolve()
+
+ # Check for path traversal
+ try:
+ resolved.relative_to(self.repo_root)
+ except ValueError:
+ raise ValueError(f"Path traversal detected: {file_path}") from None
+
+ return resolved
+
+ def _get_relative_path(self, path: Path) -> str | None:
+ """Get path relative to repo root.
+
+ Args:
+ path: Absolute path.
+
+ Returns:
+ Relative path string, or None if outside repo.
+ """
+ try:
+ return str(path.relative_to(self.repo_root))
+ except ValueError:
+ return None
+
+ def _detect_file_type(self, path: Path) -> str:
+ """Detect file type from extension.
+
+ Args:
+ path: File path.
+
+ Returns:
+ File type string.
+ """
+ suffix = path.suffix.lower()
+ name = path.name.lower()
+
+ if suffix in (".yml", ".yaml"):
+ return "yaml"
+ elif suffix == ".json":
+ return "json"
+ elif suffix in (".py", ".pyi"):
+ return "python"
+ elif suffix in (".ts", ".tsx", ".js", ".jsx"):
+ return "typescript"
+ elif suffix == ".md":
+ return "markdown"
+ elif suffix == ".sql":
+ return "sql"
+ elif suffix in (".csv", ".tsv"):
+ return "csv"
+ elif suffix == ".parquet":
+ return "parquet"
+ elif suffix == ".log" or "log" in name:
+ return "log"
+ elif suffix == ".toml":
+ return "toml"
+ elif suffix in (".sh", ".bash"):
+ return "shell"
+ elif suffix == ".dockerfile" or name == "dockerfile":
+ return "dockerfile"
+ else:
+ return "text"
+
+ def _is_binary(self, path: Path) -> bool:
+ """Check if a file is binary.
+
+ Args:
+ path: File path.
+
+ Returns:
+ True if binary file.
+ """
+ binary_extensions = {
+ ".png",
+ ".jpg",
+ ".jpeg",
+ ".gif",
+ ".ico",
+ ".svg",
+ ".woff",
+ ".woff2",
+ ".ttf",
+ ".eot",
+ ".pdf",
+ ".zip",
+ ".tar",
+ ".gz",
+ ".bz2",
+ ".xz",
+ ".exe",
+ ".dll",
+ ".so",
+ ".dylib",
+ ".pyc",
+ ".pyo",
+ ".class",
+ ".o",
+ ".a",
+ ".parquet",
+ }
+ return path.suffix.lower() in binary_extensions
+
+ def _suggest_alternative(self, path: Path) -> str | None:
+ """Suggest an alternative to a blocked file.
+
+ Args:
+ path: Blocked file path.
+
+ Returns:
+ Alternative suggestion or None.
+ """
+ name = path.name.lower()
+
+ if name == ".env":
+ # Check for .env.example
+ example = path.parent / ".env.example"
+ if example.exists():
+ return str(example.relative_to(self.repo_root))
+
+ return None
+
+
+# Create singleton reader (initialized lazily)
+_reader: LocalFileReader | None = None
+
+
+def get_file_reader(repo_root: Path | None = None) -> LocalFileReader:
+ """Get the file reader singleton.
+
+ Args:
+ repo_root: Repository root (auto-detected if not provided).
+
+ Returns:
+ LocalFileReader instance.
+ """
+ import os
+
+ global _reader
+ if _reader is None:
+ if repo_root is None:
+ # Check for environment variable first (for Docker)
+ env_root = os.environ.get("DATAING_REPO_ROOT")
+ if env_root:
+ repo_root = Path(env_root)
+ logger.info(f"Using DATAING_REPO_ROOT: {repo_root}")
+ else:
+ # Auto-detect from current file location (for local dev)
+ repo_root = Path(__file__).resolve().parents[5]
+ logger.info(f"Auto-detected repo root: {repo_root}")
+ _reader = LocalFileReader(repo_root)
+ return _reader
+
+
+def reset_file_reader() -> None:
+ """Reset the file reader singleton (for testing)."""
+ global _reader
+ _reader = None
+
+
+# Tool function for agent
+async def read_local_file(
+ file_path: str,
+ start_line: int | None = None,
+ end_line: int | None = None,
+) -> str:
+ """Read a file from the repository.
+
+ Args:
+ file_path: Path relative to repository root.
+ start_line: Optional 1-indexed start line for partial reads.
+ end_line: Optional 1-indexed end line for partial reads.
+
+ Returns:
+ File contents or error message.
+ """
+ logger.info(f"[TOOL CALLED] read_local_file: {file_path}")
+ reader = get_file_reader()
+ result = reader.read_file(file_path, start_line, end_line)
+
+ if result.success:
+ header = f"[{result.file_type}] {file_path}"
+ if result.line_count:
+ header += f" ({result.line_count} lines)"
+ if result.truncated:
+ header += " [TRUNCATED]"
+ logger.info(f"[TOOL RESULT] read_local_file: success, {result.line_count} lines")
+ return f"{header}\n\n{result.content}"
+ else:
+ logger.info(f"[TOOL RESULT] read_local_file: error - {result.error}")
+ return f"Error: {result.error}"
+
+
+async def search_in_files(
+ pattern: str,
+ directory: str | None = None,
+ max_results: int = 50,
+) -> str:
+ """Search for a pattern in repository files.
+
+ Args:
+ pattern: Text pattern to search for.
+ directory: Optional subdirectory to search in.
+ max_results: Maximum results to return (default: 50).
+
+ Returns:
+ Search results or error message.
+ """
+ logger.info(f"[TOOL CALLED] search_in_files: pattern='{pattern}', directory={directory}")
+ reader = get_file_reader()
+ results = reader.search_files(pattern, directory, max_results)
+
+ if not results:
+ logger.info("[TOOL RESULT] search_in_files: no matches")
+ return f"No matches found for '{pattern}'"
+
+ lines = [f"Found {len(results)} matches for '{pattern}':\n"]
+ current_file = None
+
+ for file_path, line_num, content in results:
+ if file_path != current_file:
+ current_file = file_path
+ lines.append(f"\n{file_path}:")
+ lines.append(f" {line_num}: {content[:100]}")
+
+ logger.info(f"[TOOL RESULT] search_in_files: {len(results)} matches")
+ return "\n".join(lines)
+
+
+async def list_directory(
+ directory: str,
+ pattern: str = "*",
+) -> str:
+ """List files in a directory.
+
+ Args:
+ directory: Directory path relative to repository root.
+ ALLOWED directories: python-packages/, frontend/, demo/, docs/
+ Use 'demo/' to find Docker and infrastructure configuration.
+
+ Returns:
+ File listing or error message.
+ """
+ logger.info(f"[TOOL CALLED] list_directory: directory='{directory}', pattern='{pattern}'")
+ reader = get_file_reader()
+
+ # Check if path is allowed first and give helpful error
+ is_allowed, error = reader.is_path_allowed(directory)
+ if not is_allowed:
+ logger.info(f"[TOOL RESULT] list_directory: blocked - {error}")
+ return (
+ f"Cannot list '{directory}': {error}\n\n"
+ f"ALLOWED directories you can list:\n"
+ f" - demo/ (Docker configs, init scripts, fixtures)\n"
+ f" - python-packages/ (backend code)\n"
+ f" - frontend/ (frontend code)\n"
+ f" - docs/ (documentation)\n\n"
+ f"Try: list_directory('demo/') to see infrastructure configuration."
+ )
+
+ files = reader.list_files(directory, pattern)
+
+ if not files:
+ logger.info("[TOOL RESULT] list_directory: no files found")
+ return f"No files found in '{directory}' matching '{pattern}'"
+
+ logger.info(f"[TOOL RESULT] list_directory: {len(files)} files")
+ return f"Files in {directory}:\n" + "\n".join(f" {f}" for f in files)
+
+
+# Create tool instances
+read_file_tool = Tool(read_local_file)
+search_files_tool = Tool(search_in_files)
+list_dir_tool = Tool(list_directory)
+
+
+def register_local_file_tools() -> None:
+ """Register local file tools with the default registry."""
+ registry = get_default_registry()
+
+ registry.register_tool(
+ name="read_local_file",
+ category=ToolCategory.FILES,
+ description="Read a file from the repository with safety checks",
+ func=read_local_file,
+ priority=10,
+ )
+
+ registry.register_tool(
+ name="search_in_files",
+ category=ToolCategory.FILES,
+ description="Search for a pattern across repository files",
+ func=search_in_files,
+ priority=20,
+ )
+
+ registry.register_tool(
+ name="list_directory",
+ category=ToolCategory.FILES,
+ description="List files in a repository directory",
+ func=list_directory,
+ priority=30,
+ )
+
+
+# Toolset for direct use
+local_files_toolset = [read_file_tool, search_files_tool, list_dir_tool]
diff --git a/python-packages/dataing/src/dataing/agents/tools/log_providers/__init__.py b/python-packages/dataing/src/dataing/agents/tools/log_providers/__init__.py
new file mode 100644
index 000000000..63cab069a
--- /dev/null
+++ b/python-packages/dataing/src/dataing/agents/tools/log_providers/__init__.py
@@ -0,0 +1,28 @@
+"""Log provider interface and implementations.
+
+Provides pluggable access to logs from various sources:
+- Local file system
+- Docker containers
+- CloudWatch Logs
+"""
+
+from dataing.agents.tools.log_providers.base import LogProvider, LogProviderConfig
+from dataing.agents.tools.log_providers.docker import DockerLogProvider
+from dataing.agents.tools.log_providers.local import LocalFileLogProvider
+
+__all__ = [
+ "LogProvider",
+ "LogProviderConfig",
+ "LocalFileLogProvider",
+ "DockerLogProvider",
+]
+
+# CloudWatch provider is optional - only available with boto3
+try:
+ from dataing.agents.tools.log_providers.cloudwatch import ( # noqa: F401
+ CloudWatchLogProvider,
+ )
+
+ __all__.append("CloudWatchLogProvider")
+except ImportError:
+ pass
diff --git a/python-packages/dataing/src/dataing/agents/tools/log_providers/base.py b/python-packages/dataing/src/dataing/agents/tools/log_providers/base.py
new file mode 100644
index 000000000..54857ad15
--- /dev/null
+++ b/python-packages/dataing/src/dataing/agents/tools/log_providers/base.py
@@ -0,0 +1,287 @@
+"""Base log provider protocol and types.
+
+Defines the interface for log providers and common types.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import Any, Protocol, runtime_checkable
+
+
+class LogSource(str, Enum):
+ """Types of log sources."""
+
+ LOCAL_FILE = "local_file"
+ DOCKER = "docker"
+ CLOUDWATCH = "cloudwatch"
+ KUBERNETES = "kubernetes"
+
+
+@dataclass
+class LogProviderConfig:
+ """Configuration for a log provider.
+
+ Attributes:
+ source: The type of log source.
+ name: Human-readable name for this provider.
+ enabled: Whether the provider is enabled.
+ settings: Provider-specific settings.
+ """
+
+ source: LogSource
+ name: str
+ enabled: bool = True
+ settings: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class LogEntry:
+ """A single log entry.
+
+ Attributes:
+ timestamp: When the log was produced.
+ message: The log message content.
+ level: Log level (INFO, ERROR, etc.) if detected.
+ source: Where the log came from.
+ metadata: Additional metadata.
+ """
+
+ timestamp: datetime | None
+ message: str
+ level: str | None = None
+ source: str | None = None
+ metadata: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class LogResult:
+ """Result of fetching logs.
+
+ Attributes:
+ entries: Log entries retrieved.
+ source: The source of these logs.
+ truncated: Whether results were truncated.
+ next_token: Token for pagination if available.
+ error: Error message if fetch failed.
+ """
+
+ entries: list[LogEntry]
+ source: str
+ truncated: bool = False
+ next_token: str | None = None
+ error: str | None = None
+
+ @property
+ def success(self) -> bool:
+ """Check if the log fetch was successful."""
+ return self.error is None
+
+
+@runtime_checkable
+class LogProvider(Protocol):
+ """Protocol for log providers.
+
+ All log providers must implement this interface.
+ """
+
+ @property
+ def source_type(self) -> LogSource:
+ """Get the type of log source."""
+ ...
+
+ @property
+ def name(self) -> str:
+ """Get the provider name."""
+ ...
+
+ async def list_sources(self) -> list[str]:
+ """List available log sources.
+
+ Returns:
+ List of source identifiers (file paths, container names, etc.).
+ """
+ ...
+
+ async def get_logs(
+ self,
+ source_id: str,
+ start_time: datetime | None = None,
+ end_time: datetime | None = None,
+ max_entries: int = 100,
+ filter_pattern: str | None = None,
+ next_token: str | None = None,
+ ) -> LogResult:
+ """Get logs from a source.
+
+ Args:
+ source_id: The source identifier.
+ start_time: Start of time range.
+ end_time: End of time range.
+ max_entries: Maximum entries to return.
+ filter_pattern: Pattern to filter logs.
+ next_token: Token for pagination.
+
+ Returns:
+ LogResult with entries or error.
+ """
+ ...
+
+ async def search_logs(
+ self,
+ pattern: str,
+ source_id: str | None = None,
+ start_time: datetime | None = None,
+ end_time: datetime | None = None,
+ max_entries: int = 50,
+ ) -> LogResult:
+ """Search logs for a pattern.
+
+ Args:
+ pattern: Search pattern.
+ source_id: Optional source to search in.
+ start_time: Start of time range.
+ end_time: End of time range.
+ max_entries: Maximum entries to return.
+
+ Returns:
+ LogResult with matching entries.
+ """
+ ...
+
+
+class BaseLogProvider(ABC):
+ """Base class for log providers.
+
+ Provides common functionality for log providers.
+ """
+
+ def __init__(self, config: LogProviderConfig) -> None:
+ """Initialize the provider.
+
+ Args:
+ config: Provider configuration.
+ """
+ self._config = config
+
+ @property
+ @abstractmethod
+ def source_type(self) -> LogSource:
+ """Get the type of log source."""
+ ...
+
+ @property
+ def name(self) -> str:
+ """Get the provider name."""
+ return self._config.name
+
+ @property
+ def enabled(self) -> bool:
+ """Check if the provider is enabled."""
+ return self._config.enabled
+
+ @abstractmethod
+ async def list_sources(self) -> list[str]:
+ """List available log sources."""
+ ...
+
+ @abstractmethod
+ async def get_logs(
+ self,
+ source_id: str,
+ start_time: datetime | None = None,
+ end_time: datetime | None = None,
+ max_entries: int = 100,
+ filter_pattern: str | None = None,
+ next_token: str | None = None,
+ ) -> LogResult:
+ """Get logs from a source."""
+ ...
+
+ async def search_logs(
+ self,
+ pattern: str,
+ source_id: str | None = None,
+ start_time: datetime | None = None,
+ end_time: datetime | None = None,
+ max_entries: int = 50,
+ ) -> LogResult:
+ """Search logs for a pattern.
+
+ Default implementation fetches logs and filters.
+ Subclasses can override for more efficient search.
+ """
+ # If source specified, search in it
+ if source_id:
+ result = await self.get_logs(
+ source_id,
+ start_time=start_time,
+ end_time=end_time,
+ max_entries=max_entries * 10, # Fetch more to filter
+ filter_pattern=pattern,
+ )
+ # Filter client-side if provider doesn't support filter
+ if pattern and result.success:
+ pattern_lower = pattern.lower()
+ result.entries = [
+ e for e in result.entries if self._matches_pattern(e, pattern_lower)
+ ][:max_entries]
+ return result
+
+ # Search across all sources
+ sources = await self.list_sources()
+ all_entries: list[LogEntry] = []
+
+ for src in sources:
+ if len(all_entries) >= max_entries:
+ break
+
+ result = await self.get_logs(
+ src,
+ start_time=start_time,
+ end_time=end_time,
+ max_entries=max_entries - len(all_entries),
+ filter_pattern=pattern,
+ )
+
+ if result.success:
+ # Filter client-side
+ pattern_lower = pattern.lower()
+ matching = [e for e in result.entries if self._matches_pattern(e, pattern_lower)]
+ all_entries.extend(matching)
+
+ return LogResult(
+ entries=all_entries[:max_entries],
+ source="multiple",
+ truncated=len(all_entries) > max_entries,
+ )
+
+ def _matches_pattern(self, entry: LogEntry, pattern_lower: str) -> bool:
+ """Check if entry matches a search pattern.
+
+ Checks message, level, and raw line in metadata.
+
+ Args:
+ entry: Log entry to check.
+ pattern_lower: Lowercase search pattern.
+
+ Returns:
+ True if pattern found in entry.
+ """
+ # Check message
+ if pattern_lower in entry.message.lower():
+ return True
+
+ # Check level
+ if entry.level and pattern_lower in entry.level.lower():
+ return True
+
+ # Check raw line in metadata
+ raw = entry.metadata.get("raw", "")
+ if raw and pattern_lower in raw.lower():
+ return True
+
+ return False
diff --git a/python-packages/dataing/src/dataing/agents/tools/log_providers/cloudwatch.py b/python-packages/dataing/src/dataing/agents/tools/log_providers/cloudwatch.py
new file mode 100644
index 000000000..8d6cf3fc0
--- /dev/null
+++ b/python-packages/dataing/src/dataing/agents/tools/log_providers/cloudwatch.py
@@ -0,0 +1,266 @@
+"""CloudWatch Logs provider.
+
+Reads logs from AWS CloudWatch Logs using IAM role authentication.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from datetime import datetime
+from typing import Any
+
+from dataing.agents.tools.log_providers.base import (
+ BaseLogProvider,
+ LogEntry,
+ LogProviderConfig,
+ LogResult,
+ LogSource,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class CloudWatchLogProvider(BaseLogProvider):
+ """Log provider for AWS CloudWatch Logs.
+
+ Reads logs from CloudWatch using boto3 with IAM role authentication.
+ """
+
+ def __init__(
+ self,
+ config: LogProviderConfig,
+ region_name: str | None = None,
+ log_group_prefix: str | None = None,
+ ) -> None:
+ """Initialize the CloudWatch log provider.
+
+ Args:
+ config: Provider configuration.
+ region_name: AWS region (default: from environment).
+ log_group_prefix: Prefix to filter log groups.
+ """
+ super().__init__(config)
+ self._region = region_name
+ self._log_group_prefix = log_group_prefix
+ self._client: Any = None
+
+ def _get_client(self) -> Any:
+ """Get or create CloudWatch Logs client.
+
+ Returns:
+ boto3 logs client.
+
+ Raises:
+ ImportError: If boto3 not installed.
+ RuntimeError: If AWS connection fails.
+ """
+ if self._client is not None:
+ return self._client
+
+ try:
+ import boto3
+ except ImportError as err:
+ raise ImportError(
+ "boto3 is required for CloudWatch log provider. " "Install with: pip install boto3"
+ ) from err
+
+ try:
+ kwargs: dict[str, Any] = {}
+ if self._region:
+ kwargs["region_name"] = self._region
+
+ self._client = boto3.client("logs", **kwargs)
+ return self._client
+
+ except Exception as e:
+ raise RuntimeError(f"Failed to create CloudWatch client: {e}") from e
+
+ @property
+ def source_type(self) -> LogSource:
+ """Get the source type."""
+ return LogSource.CLOUDWATCH
+
+ async def list_sources(self) -> list[str]:
+ """List available log groups.
+
+ Returns:
+ List of log group names.
+ """
+ try:
+ client = self._get_client()
+
+ loop = asyncio.get_event_loop()
+
+ kwargs: dict[str, Any] = {}
+ if self._log_group_prefix:
+ kwargs["logGroupNamePrefix"] = self._log_group_prefix
+
+ response = await loop.run_in_executor(
+ None, lambda: client.describe_log_groups(**kwargs)
+ )
+
+ return [lg["logGroupName"] for lg in response.get("logGroups", [])]
+
+ except Exception:
+ logger.exception("Failed to list CloudWatch log groups")
+ return []
+
+ async def get_logs(
+ self,
+ source_id: str,
+ start_time: datetime | None = None,
+ end_time: datetime | None = None,
+ max_entries: int = 100,
+ filter_pattern: str | None = None,
+ next_token: str | None = None,
+ ) -> LogResult:
+ """Get logs from a CloudWatch log group.
+
+ Args:
+ source_id: Log group name.
+ start_time: Start of time range.
+ end_time: End of time range.
+ max_entries: Maximum entries to return.
+ filter_pattern: CloudWatch Insights filter pattern.
+ next_token: Token for pagination.
+
+ Returns:
+ LogResult with entries.
+ """
+ try:
+ client = self._get_client()
+ loop = asyncio.get_event_loop()
+
+ # Build request parameters
+ kwargs: dict[str, Any] = {
+ "logGroupName": source_id,
+ "limit": min(max_entries, 10000), # CloudWatch max
+ }
+
+ if start_time:
+ kwargs["startTime"] = int(start_time.timestamp() * 1000)
+ if end_time:
+ kwargs["endTime"] = int(end_time.timestamp() * 1000)
+ if filter_pattern:
+ kwargs["filterPattern"] = filter_pattern
+ if next_token:
+ kwargs["nextToken"] = next_token
+
+ # Fetch events
+ response = await loop.run_in_executor(None, lambda: client.filter_log_events(**kwargs))
+
+ entries: list[LogEntry] = []
+ for event in response.get("events", []):
+ timestamp = None
+ if "timestamp" in event:
+ timestamp = datetime.fromtimestamp(event["timestamp"] / 1000)
+
+ entries.append(
+ LogEntry(
+ timestamp=timestamp,
+ message=event.get("message", ""),
+ source=source_id,
+ metadata={
+ "log_stream": event.get("logStreamName"),
+ "event_id": event.get("eventId"),
+ },
+ )
+ )
+
+ return LogResult(
+ entries=entries,
+ source=source_id,
+ truncated="nextToken" in response,
+ next_token=response.get("nextToken"),
+ )
+
+ except Exception as e:
+ logger.exception(f"Failed to get CloudWatch logs: {source_id}")
+ return LogResult(
+ entries=[],
+ source=source_id,
+ error=f"Failed to get CloudWatch logs: {e}",
+ )
+
+ async def list_log_streams(
+ self,
+ log_group: str,
+ prefix: str | None = None,
+ max_streams: int = 50,
+ ) -> list[dict[str, Any]]:
+ """List log streams in a log group.
+
+ Args:
+ log_group: Log group name.
+ prefix: Stream name prefix to filter.
+ max_streams: Maximum streams to return.
+
+ Returns:
+ List of log stream info dicts.
+ """
+ try:
+ client = self._get_client()
+ loop = asyncio.get_event_loop()
+
+ kwargs: dict[str, Any] = {
+ "logGroupName": log_group,
+ "limit": max_streams,
+ "orderBy": "LastEventTime",
+ "descending": True,
+ }
+
+ if prefix:
+ kwargs["logStreamNamePrefix"] = prefix
+
+ response = await loop.run_in_executor(
+ None, lambda: client.describe_log_streams(**kwargs)
+ )
+
+ return [
+ {
+ "name": stream["logStreamName"],
+ "last_event": datetime.fromtimestamp(
+ stream.get("lastEventTimestamp", 0) / 1000
+ ).isoformat()
+ if stream.get("lastEventTimestamp")
+ else None,
+ "stored_bytes": stream.get("storedBytes", 0),
+ }
+ for stream in response.get("logStreams", [])
+ ]
+
+ except Exception as e:
+ logger.exception(f"Failed to list log streams: {log_group}")
+ return [{"error": str(e)}]
+
+
+def create_cloudwatch_provider(
+ name: str = "CloudWatch",
+ region_name: str | None = None,
+ log_group_prefix: str | None = None,
+) -> CloudWatchLogProvider:
+ """Create a CloudWatch log provider.
+
+ Args:
+ name: Provider name.
+ region_name: AWS region.
+ log_group_prefix: Prefix to filter log groups.
+
+ Returns:
+ Configured CloudWatchLogProvider.
+ """
+ config = LogProviderConfig(
+ source=LogSource.CLOUDWATCH,
+ name=name,
+ settings={
+ "region": region_name,
+ "log_group_prefix": log_group_prefix,
+ },
+ )
+
+ return CloudWatchLogProvider(
+ config=config,
+ region_name=region_name,
+ log_group_prefix=log_group_prefix,
+ )
diff --git a/python-packages/dataing/src/dataing/agents/tools/log_providers/docker.py b/python-packages/dataing/src/dataing/agents/tools/log_providers/docker.py
new file mode 100644
index 000000000..2314a5cab
--- /dev/null
+++ b/python-packages/dataing/src/dataing/agents/tools/log_providers/docker.py
@@ -0,0 +1,323 @@
+"""Docker container log provider.
+
+Reads logs from Docker containers via the Docker API.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import re
+from datetime import datetime
+from typing import Any
+
+from dataing.agents.tools.log_providers.base import (
+ BaseLogProvider,
+ LogEntry,
+ LogProviderConfig,
+ LogResult,
+ LogSource,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class DockerLogProvider(BaseLogProvider):
+ """Log provider for Docker containers.
+
+ Reads logs from Docker containers using the Docker SDK.
+ Supports:
+ - Unix socket connection (default)
+ - TCP connection with optional TLS
+ - Environment-based auto-detection
+ """
+
+ def __init__(
+ self,
+ config: LogProviderConfig,
+ docker_host: str | None = None,
+ ) -> None:
+ """Initialize the Docker log provider.
+
+ Args:
+ config: Provider configuration.
+ docker_host: Docker host URL (default: from environment).
+ """
+ super().__init__(config)
+ self._docker_host = docker_host
+ self._client: Any = None
+
+ def _get_client(self) -> Any:
+ """Get or create Docker client.
+
+ Returns:
+ Docker client instance.
+
+ Raises:
+ ImportError: If docker package not installed.
+ RuntimeError: If Docker connection fails.
+ """
+ if self._client is not None:
+ return self._client
+
+ try:
+ import docker
+ except ImportError as err:
+ raise ImportError(
+ "docker package is required for Docker log provider. "
+ "Install with: pip install docker"
+ ) from err
+
+ try:
+ if self._docker_host:
+ self._client = docker.DockerClient(base_url=self._docker_host)
+ else:
+ # Use default from environment
+ self._client = docker.from_env()
+
+ # Test connection
+ self._client.ping()
+ return self._client
+
+ except Exception as e:
+ raise RuntimeError(f"Failed to connect to Docker: {e}") from e
+
+ @property
+ def source_type(self) -> LogSource:
+ """Get the source type."""
+ return LogSource.DOCKER
+
+ async def list_sources(self) -> list[str]:
+ """List available containers.
+
+ Returns:
+ List of container names/IDs.
+ """
+ try:
+ client = self._get_client()
+
+ # Run in thread pool to avoid blocking
+ loop = asyncio.get_event_loop()
+ containers = await loop.run_in_executor(None, lambda: client.containers.list(all=True))
+
+ return [c.name for c in containers]
+
+ except Exception:
+ logger.exception("Failed to list containers")
+ return []
+
+ async def get_logs(
+ self,
+ source_id: str,
+ start_time: datetime | None = None,
+ end_time: datetime | None = None,
+ max_entries: int = 100,
+ filter_pattern: str | None = None,
+ next_token: str | None = None,
+ ) -> LogResult:
+ """Get logs from a container.
+
+ Args:
+ source_id: Container name or ID.
+ start_time: Start of time range.
+ end_time: End of time range.
+ max_entries: Maximum entries to return.
+ filter_pattern: Pattern to filter logs.
+ next_token: Timestamp to start from.
+
+ Returns:
+ LogResult with entries.
+ """
+ try:
+ client = self._get_client()
+
+ # Get container
+ loop = asyncio.get_event_loop()
+ container = await loop.run_in_executor(None, lambda: client.containers.get(source_id))
+
+ # Build log arguments
+ kwargs: dict[str, Any] = {
+ "timestamps": True,
+ "tail": max_entries * 2, # Get extra for filtering
+ }
+
+ if start_time:
+ kwargs["since"] = start_time
+ if end_time:
+ kwargs["until"] = end_time
+
+ # Get logs
+ logs = await loop.run_in_executor(
+ None, lambda: container.logs(**kwargs).decode("utf-8", errors="replace")
+ )
+
+ # Parse log lines
+ entries: list[LogEntry] = []
+ for line in logs.splitlines():
+ if not line.strip():
+ continue
+
+ entry = self._parse_docker_log_line(line, source_id)
+
+ # Apply pattern filter
+ if filter_pattern:
+ if filter_pattern.lower() not in entry.message.lower():
+ continue
+
+ entries.append(entry)
+
+ if len(entries) >= max_entries:
+ break
+
+ return LogResult(
+ entries=entries,
+ source=source_id,
+ truncated=len(entries) >= max_entries,
+ )
+
+ except Exception as e:
+ logger.exception(f"Failed to get logs for container: {source_id}")
+ return LogResult(
+ entries=[],
+ source=source_id,
+ error=f"Failed to get container logs: {e}",
+ )
+
+ async def get_container_status(self, container_id: str) -> dict[str, Any]:
+ """Get container status information.
+
+ Args:
+ container_id: Container name or ID.
+
+ Returns:
+ Container status dict.
+ """
+ try:
+ client = self._get_client()
+
+ loop = asyncio.get_event_loop()
+ container = await loop.run_in_executor(
+ None, lambda: client.containers.get(container_id)
+ )
+
+ return {
+ "id": container.short_id,
+ "name": container.name,
+ "status": container.status,
+ "image": container.image.tags[0] if container.image.tags else "unknown",
+ "created": container.attrs.get("Created"),
+ "started": container.attrs.get("State", {}).get("StartedAt"),
+ "ports": container.attrs.get("NetworkSettings", {}).get("Ports", {}),
+ "health": container.attrs.get("State", {}).get("Health", {}),
+ }
+
+ except Exception as e:
+ logger.exception(f"Failed to get container status: {container_id}")
+ return {
+ "error": str(e),
+ "container": container_id,
+ }
+
+ async def list_containers_with_status(self) -> list[dict[str, Any]]:
+ """List all containers with their status.
+
+ Returns:
+ List of container status dicts.
+ """
+ try:
+ client = self._get_client()
+
+ loop = asyncio.get_event_loop()
+ containers = await loop.run_in_executor(None, lambda: client.containers.list(all=True))
+
+ return [
+ {
+ "id": c.short_id,
+ "name": c.name,
+ "status": c.status,
+ "image": c.image.tags[0] if c.image.tags else "unknown",
+ }
+ for c in containers
+ ]
+
+ except Exception as e:
+ logger.exception("Failed to list containers with status")
+ return [{"error": str(e)}]
+
+ def _parse_docker_log_line(self, line: str, source: str) -> LogEntry:
+ """Parse a Docker log line.
+
+ Docker logs with timestamps look like:
+ 2024-01-15T10:30:45.123456789Z message
+
+ Args:
+ line: Raw log line.
+ source: Container name.
+
+ Returns:
+ LogEntry.
+ """
+ timestamp = None
+ message = line
+ level = None
+
+ # Try to extract timestamp
+ timestamp_pattern = r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z?)\s+"
+ match = re.match(timestamp_pattern, line)
+ if match:
+ timestamp_str = match.group(1)
+ message = line[match.end() :]
+
+ try:
+ # Handle nanosecond precision
+ if "." in timestamp_str:
+ # Truncate to microseconds
+ parts = timestamp_str.split(".")
+ microseconds = parts[1].rstrip("Z")[:6]
+ timestamp_str = f"{parts[0]}.{microseconds}"
+ timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
+ except ValueError:
+ pass
+
+ # Try to detect log level
+ level_patterns = [
+ (r"\b(DEBUG)\b", "debug"),
+ (r"\b(INFO)\b", "info"),
+ (r"\b(WARN(?:ING)?)\b", "warning"),
+ (r"\b(ERROR)\b", "error"),
+ (r"\b(FATAL|CRITICAL)\b", "critical"),
+ ]
+
+ for pattern, level_name in level_patterns:
+ if re.search(pattern, message, re.IGNORECASE):
+ level = level_name
+ break
+
+ return LogEntry(
+ timestamp=timestamp,
+ message=message.strip(),
+ level=level,
+ source=source,
+ )
+
+
+def create_docker_provider(
+ name: str = "Docker",
+ docker_host: str | None = None,
+) -> DockerLogProvider:
+ """Create a Docker log provider.
+
+ Args:
+ name: Provider name.
+ docker_host: Docker host URL.
+
+ Returns:
+ Configured DockerLogProvider.
+ """
+ config = LogProviderConfig(
+ source=LogSource.DOCKER,
+ name=name,
+ settings={"docker_host": docker_host},
+ )
+
+ return DockerLogProvider(config=config, docker_host=docker_host)
diff --git a/python-packages/dataing/src/dataing/agents/tools/log_providers/local.py b/python-packages/dataing/src/dataing/agents/tools/log_providers/local.py
new file mode 100644
index 000000000..485e316a8
--- /dev/null
+++ b/python-packages/dataing/src/dataing/agents/tools/log_providers/local.py
@@ -0,0 +1,287 @@
+"""Local file log provider.
+
+Reads logs from local files with automatic rotation detection.
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import datetime
+from pathlib import Path
+
+from dataing.agents.tools.log_providers.base import (
+ BaseLogProvider,
+ LogEntry,
+ LogProviderConfig,
+ LogResult,
+ LogSource,
+)
+from dataing.core.parsing.log_parser import LogLevel, LogParser
+
+logger = logging.getLogger(__name__)
+
+
+class LocalFileLogProvider(BaseLogProvider):
+ """Log provider for local files.
+
+ Reads logs from local filesystem with support for:
+ - Single files and directories
+ - Log rotation (*.log, *.log.1, etc.)
+ - Multiple log formats
+ """
+
+ def __init__(
+ self,
+ config: LogProviderConfig,
+ log_directories: list[Path] | None = None,
+ log_patterns: list[str] | None = None,
+ ) -> None:
+ """Initialize the local file log provider.
+
+ Args:
+ config: Provider configuration.
+ log_directories: Directories to scan for logs.
+ log_patterns: Glob patterns for log files.
+ """
+ super().__init__(config)
+ self._log_dirs = log_directories or []
+ self._log_patterns = log_patterns or ["*.log", "*.log.*"]
+ self._parser = LogParser()
+
+ @property
+ def source_type(self) -> LogSource:
+ """Get the source type."""
+ return LogSource.LOCAL_FILE
+
+ def add_log_directory(self, directory: Path) -> None:
+ """Add a directory to scan for logs.
+
+ Args:
+ directory: Directory path.
+ """
+ if directory not in self._log_dirs:
+ self._log_dirs.append(directory)
+
+ async def list_sources(self) -> list[str]:
+ """List available log files.
+
+ Returns:
+ List of log file paths.
+ """
+ sources: list[str] = []
+
+ for log_dir in self._log_dirs:
+ if not log_dir.exists():
+ continue
+
+ for pattern in self._log_patterns:
+ for path in log_dir.glob(pattern):
+ if path.is_file():
+ sources.append(str(path))
+
+ # Sort by modification time (newest first)
+ sources.sort(key=lambda p: Path(p).stat().st_mtime, reverse=True)
+
+ return sources
+
+ async def get_logs(
+ self,
+ source_id: str,
+ start_time: datetime | None = None,
+ end_time: datetime | None = None,
+ max_entries: int = 100,
+ filter_pattern: str | None = None,
+ next_token: str | None = None,
+ ) -> LogResult:
+ """Get logs from a file.
+
+ Args:
+ source_id: Path to the log file.
+ start_time: Start of time range.
+ end_time: End of time range.
+ max_entries: Maximum entries to return.
+ filter_pattern: Pattern to filter logs.
+ next_token: Line number to start from.
+
+ Returns:
+ LogResult with entries.
+ """
+ path = Path(source_id)
+
+ if not path.exists():
+ return LogResult(
+ entries=[],
+ source=source_id,
+ error=f"Log file not found: {source_id}",
+ )
+
+ if not path.is_file():
+ return LogResult(
+ entries=[],
+ source=source_id,
+ error=f"Not a file: {source_id}",
+ )
+
+ try:
+ # Determine start line from token
+ start_line = int(next_token) if next_token else 1
+
+ # Parse the log file
+ parsed_entries = self._parser.parse_file(
+ path,
+ max_entries=max_entries * 2, # Get extra for filtering
+ start_line=start_line,
+ )
+
+ # Convert to LogEntry and filter
+ entries: list[LogEntry] = []
+ last_processed_line = start_line
+
+ for entry in parsed_entries:
+ # Track every line we process (for truncation logic)
+ last_processed_line = entry.line_number
+
+ # Apply time filters
+ if start_time and entry.timestamp and entry.timestamp < start_time:
+ continue
+ if end_time and entry.timestamp and entry.timestamp > end_time:
+ continue
+
+ # Apply pattern filter (check both message and raw line)
+ if filter_pattern:
+ pattern_lower = filter_pattern.lower()
+ if (
+ pattern_lower not in entry.message.lower()
+ and pattern_lower not in entry.raw.lower()
+ ):
+ continue
+
+ entries.append(
+ LogEntry(
+ timestamp=entry.timestamp,
+ message=entry.message,
+ level=entry.level.value if entry.level != LogLevel.UNKNOWN else None,
+ source=source_id,
+ metadata={
+ "line_number": entry.line_number,
+ "raw": entry.raw,
+ },
+ )
+ )
+
+ if len(entries) >= max_entries:
+ break
+
+ # Determine if there are more entries
+ total_lines = self._parser.get_summary(path).get("total_lines", 0)
+ hit_max_entries = len(entries) >= max_entries
+ reached_eof = last_processed_line >= total_lines
+ truncated = hit_max_entries or not reached_eof
+
+ return LogResult(
+ entries=entries,
+ source=source_id,
+ truncated=truncated,
+ next_token=str(last_processed_line + 1) if truncated else None,
+ )
+
+ except Exception as e:
+ logger.exception(f"Error reading log file: {source_id}")
+ return LogResult(
+ entries=[],
+ source=source_id,
+ error=f"Error reading log file: {e}",
+ )
+
+ async def get_recent_errors(
+ self,
+ source_id: str,
+ max_entries: int = 50,
+ context_lines: int = 2,
+ ) -> LogResult:
+ """Get recent errors from a log file with context.
+
+ Args:
+ source_id: Path to the log file.
+ max_entries: Maximum errors to return.
+ context_lines: Lines of context around each error.
+
+ Returns:
+ LogResult with error entries and context.
+ """
+ path = Path(source_id)
+
+ if not path.exists():
+ return LogResult(
+ entries=[],
+ source=source_id,
+ error=f"Log file not found: {source_id}",
+ )
+
+ try:
+ errors = self._parser.find_errors(
+ path,
+ max_results=max_entries,
+ context_lines=context_lines,
+ )
+
+ entries: list[LogEntry] = []
+ for error_data in errors:
+ entry = error_data["entry"]
+ entries.append(
+ LogEntry(
+ timestamp=entry.timestamp,
+ message=entry.message,
+ level=entry.level.value,
+ source=source_id,
+ metadata={
+ "line_number": entry.line_number,
+ "context_before": error_data["context_before"],
+ "context_after": error_data["context_after"],
+ },
+ )
+ )
+
+ return LogResult(
+ entries=entries,
+ source=source_id,
+ )
+
+ except Exception as e:
+ logger.exception(f"Error finding errors in: {source_id}")
+ return LogResult(
+ entries=[],
+ source=source_id,
+ error=f"Error finding errors: {e}",
+ )
+
+
+def create_local_provider(
+ name: str = "Local Files",
+ directories: list[str] | None = None,
+ patterns: list[str] | None = None,
+) -> LocalFileLogProvider:
+ """Create a local file log provider.
+
+ Args:
+ name: Provider name.
+ directories: Directories to scan.
+ patterns: Glob patterns for log files.
+
+ Returns:
+ Configured LocalFileLogProvider.
+ """
+ config = LogProviderConfig(
+ source=LogSource.LOCAL_FILE,
+ name=name,
+ settings={
+ "directories": directories or [],
+ "patterns": patterns or ["*.log", "*.log.*"],
+ },
+ )
+
+ return LocalFileLogProvider(
+ config=config,
+ log_directories=[Path(d) for d in (directories or [])],
+ log_patterns=patterns,
+ )
diff --git a/python-packages/dataing/src/dataing/agents/tools/registry.py b/python-packages/dataing/src/dataing/agents/tools/registry.py
new file mode 100644
index 000000000..5b3ad8a63
--- /dev/null
+++ b/python-packages/dataing/src/dataing/agents/tools/registry.py
@@ -0,0 +1,299 @@
+"""Unified tool registry for the Dataing Assistant.
+
+This module provides a central registry for all tools available to the
+Dataing Assistant agent. Tools are organized by category and can be
+enabled/disabled per tenant.
+
+Usage:
+ registry = get_default_registry()
+ tools = registry.get_enabled_tools(tenant_id)
+
+ # Create agent with tools
+ agent = BondAgent(
+ name="assistant",
+ toolsets=tools,
+ ...
+ )
+"""
+
+from __future__ import annotations
+
+import logging
+from collections.abc import Callable
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
+from uuid import UUID
+
+from pydantic_ai.tools import Tool
+
+if TYPE_CHECKING:
+ pass
+
+logger = logging.getLogger(__name__)
+
+
+class ToolCategory(str, Enum):
+ """Categories for assistant tools."""
+
+ FILES = "files"
+ GIT = "git"
+ DOCKER = "docker"
+ LOGS = "logs"
+ DATASOURCE = "datasource"
+ ENVIRONMENT = "environment"
+
+
+@runtime_checkable
+class ToolProtocol(Protocol):
+ """Protocol for tool functions."""
+
+ async def __call__(self, *args: Any, **kwargs: Any) -> Any:
+ """Execute the tool."""
+ ...
+
+
+@dataclass
+class ToolConfig:
+ """Configuration for a registered tool.
+
+ Attributes:
+ name: Unique tool name.
+ category: Tool category for grouping.
+ description: Human-readable description.
+ tool: The PydanticAI Tool instance.
+ enabled_by_default: Whether the tool is enabled by default.
+ requires_auth: Whether the tool requires authentication.
+ priority: Tool priority within category (lower = higher priority).
+ """
+
+ name: str
+ category: ToolCategory
+ description: str
+ tool: Tool[Any]
+ enabled_by_default: bool = True
+ requires_auth: bool = True
+ priority: int = 100
+
+
+@dataclass
+class TenantToolConfig:
+ """Per-tenant tool configuration.
+
+ Attributes:
+ enabled_tools: Set of explicitly enabled tool names.
+ disabled_tools: Set of explicitly disabled tool names.
+ tool_limits: Per-tool rate limits or restrictions.
+ """
+
+ enabled_tools: set[str] = field(default_factory=set)
+ disabled_tools: set[str] = field(default_factory=set)
+ tool_limits: dict[str, Any] = field(default_factory=dict)
+
+
+class ToolRegistry:
+ """Central registry for all assistant tools.
+
+ The registry manages tool registration, per-tenant configuration,
+ and provides tools to the BondAgent based on tenant settings.
+ """
+
+ def __init__(self) -> None:
+ """Initialize the tool registry."""
+ self._tools: dict[str, ToolConfig] = {}
+ self._tenant_configs: dict[UUID, TenantToolConfig] = {}
+ self._category_tools: dict[ToolCategory, list[str]] = {cat: [] for cat in ToolCategory}
+
+ def register(self, config: ToolConfig) -> None:
+ """Register a tool with the registry.
+
+ Args:
+ config: Tool configuration.
+
+ Raises:
+ ValueError: If a tool with the same name is already registered.
+ """
+ if config.name in self._tools:
+ raise ValueError(f"Tool '{config.name}' is already registered")
+
+ self._tools[config.name] = config
+ self._category_tools[config.category].append(config.name)
+ self._category_tools[config.category].sort(key=lambda n: self._tools[n].priority)
+ logger.debug(f"Registered tool: {config.name} ({config.category})")
+
+ def register_tool(
+ self,
+ name: str,
+ category: ToolCategory,
+ description: str,
+ func: Callable[..., Any],
+ *,
+ enabled_by_default: bool = True,
+ requires_auth: bool = True,
+ priority: int = 100,
+ ) -> None:
+ """Register a tool function with the registry.
+
+ Args:
+ name: Unique tool name.
+ category: Tool category.
+ description: Human-readable description.
+ func: The tool function.
+ enabled_by_default: Whether enabled by default.
+ requires_auth: Whether requires authentication.
+ priority: Tool priority.
+ """
+ tool = Tool(func)
+ config = ToolConfig(
+ name=name,
+ category=category,
+ description=description,
+ tool=tool,
+ enabled_by_default=enabled_by_default,
+ requires_auth=requires_auth,
+ priority=priority,
+ )
+ self.register(config)
+
+ def get_tool(self, name: str) -> ToolConfig | None:
+ """Get a tool configuration by name.
+
+ Args:
+ name: Tool name.
+
+ Returns:
+ Tool configuration or None if not found.
+ """
+ return self._tools.get(name)
+
+ def get_tools_by_category(self, category: ToolCategory) -> list[ToolConfig]:
+ """Get all tools in a category.
+
+ Args:
+ category: Tool category.
+
+ Returns:
+ List of tool configurations.
+ """
+ return [self._tools[name] for name in self._category_tools[category]]
+
+ def get_all_tools(self) -> list[ToolConfig]:
+ """Get all registered tools.
+
+ Returns:
+ List of all tool configurations.
+ """
+ return list(self._tools.values())
+
+ def set_tenant_config(self, tenant_id: UUID, config: TenantToolConfig) -> None:
+ """Set tool configuration for a tenant.
+
+ Args:
+ tenant_id: Tenant UUID.
+ config: Tenant-specific tool configuration.
+ """
+ self._tenant_configs[tenant_id] = config
+
+ def get_tenant_config(self, tenant_id: UUID) -> TenantToolConfig:
+ """Get tool configuration for a tenant.
+
+ Args:
+ tenant_id: Tenant UUID.
+
+ Returns:
+ Tenant-specific configuration (or default if not set).
+ """
+ return self._tenant_configs.get(tenant_id, TenantToolConfig())
+
+ def is_tool_enabled(self, name: str, tenant_id: UUID | None = None) -> bool:
+ """Check if a tool is enabled for a tenant.
+
+ Args:
+ name: Tool name.
+ tenant_id: Optional tenant UUID.
+
+ Returns:
+ True if the tool is enabled.
+ """
+ tool = self._tools.get(name)
+ if tool is None:
+ return False
+
+ if tenant_id is None:
+ return tool.enabled_by_default
+
+ config = self.get_tenant_config(tenant_id)
+
+ # Explicit enable/disable takes precedence
+ if name in config.disabled_tools:
+ return False
+ if name in config.enabled_tools:
+ return True
+
+ return tool.enabled_by_default
+
+ def get_enabled_tools(
+ self,
+ tenant_id: UUID | None = None,
+ categories: list[ToolCategory] | None = None,
+ ) -> list[Tool[Any]]:
+ """Get all enabled tools for a tenant.
+
+ Args:
+ tenant_id: Optional tenant UUID.
+ categories: Optional list of categories to filter.
+
+ Returns:
+ List of PydanticAI Tool instances.
+ """
+ tools = []
+ for name, config in self._tools.items():
+ if categories and config.category not in categories:
+ continue
+ if self.is_tool_enabled(name, tenant_id):
+ tools.append(config.tool)
+ return tools
+
+ def enable_tool(self, tenant_id: UUID, name: str) -> None:
+ """Enable a tool for a tenant.
+
+ Args:
+ tenant_id: Tenant UUID.
+ name: Tool name.
+ """
+ config = self._tenant_configs.setdefault(tenant_id, TenantToolConfig())
+ config.enabled_tools.add(name)
+ config.disabled_tools.discard(name)
+
+ def disable_tool(self, tenant_id: UUID, name: str) -> None:
+ """Disable a tool for a tenant.
+
+ Args:
+ tenant_id: Tenant UUID.
+ name: Tool name.
+ """
+ config = self._tenant_configs.setdefault(tenant_id, TenantToolConfig())
+ config.disabled_tools.add(name)
+ config.enabled_tools.discard(name)
+
+
+# Singleton registry instance
+_default_registry: ToolRegistry | None = None
+
+
+def get_default_registry() -> ToolRegistry:
+ """Get the default tool registry singleton.
+
+ Returns:
+ The default ToolRegistry instance.
+ """
+ global _default_registry
+ if _default_registry is None:
+ _default_registry = ToolRegistry()
+ return _default_registry
+
+
+def reset_registry() -> None:
+ """Reset the default registry (for testing)."""
+ global _default_registry
+ _default_registry = None
diff --git a/python-packages/dataing/src/dataing/core/parsing/__init__.py b/python-packages/dataing/src/dataing/core/parsing/__init__.py
new file mode 100644
index 000000000..126bf9599
--- /dev/null
+++ b/python-packages/dataing/src/dataing/core/parsing/__init__.py
@@ -0,0 +1,22 @@
+"""Centralized file parsers for the Dataing Assistant.
+
+This module provides unified parsing utilities for different file types,
+with safe defaults and consistent interfaces.
+"""
+
+from dataing.core.parsing.data_parser import DataParser, SampleResult
+from dataing.core.parsing.json_parser import JsonParser
+from dataing.core.parsing.log_parser import LogEntry, LogParser
+from dataing.core.parsing.text_parser import TextChunk, TextParser
+from dataing.core.parsing.yaml_parser import YamlParser
+
+__all__ = [
+ "DataParser",
+ "JsonParser",
+ "LogParser",
+ "LogEntry",
+ "SampleResult",
+ "TextParser",
+ "TextChunk",
+ "YamlParser",
+]
diff --git a/python-packages/dataing/src/dataing/core/parsing/data_parser.py b/python-packages/dataing/src/dataing/core/parsing/data_parser.py
new file mode 100644
index 000000000..2962b94cd
--- /dev/null
+++ b/python-packages/dataing/src/dataing/core/parsing/data_parser.py
@@ -0,0 +1,492 @@
+"""Data file parser for CSV and Parquet sampling.
+
+Provides utilities for reading samples from data files
+without loading entire datasets into memory.
+"""
+
+from __future__ import annotations
+
+import csv
+import logging
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class SampleResult:
+ """Result of sampling a data file.
+
+ Attributes:
+ columns: List of column names.
+ rows: Sample rows as list of dicts.
+ total_rows: Total row count (if known).
+ file_size: File size in bytes.
+ format: Detected file format.
+ schema: Column types if available.
+ truncated: Whether sample was truncated.
+ """
+
+ columns: list[str]
+ rows: list[dict[str, Any]]
+ total_rows: int | None
+ file_size: int
+ format: str
+ schema: dict[str, str] = field(default_factory=dict)
+ truncated: bool = False
+
+
+class DataParser:
+ """Parser for data files (CSV, Parquet).
+
+ Provides efficient sampling of data files without
+ loading entire datasets into memory.
+ """
+
+ MAX_FILE_SIZE = 100 * 1024 * 1024 # 100 MB
+ DEFAULT_SAMPLE_ROWS = 100
+ MAX_SAMPLE_ROWS = 1000
+
+ def __init__(
+ self,
+ max_file_size: int = MAX_FILE_SIZE,
+ default_sample_rows: int = DEFAULT_SAMPLE_ROWS,
+ ) -> None:
+ """Initialize the data parser.
+
+ Args:
+ max_file_size: Maximum file size in bytes.
+ default_sample_rows: Default number of rows to sample.
+ """
+ self.max_file_size = max_file_size
+ self.default_sample_rows = default_sample_rows
+
+ def sample_file(
+ self,
+ path: Path | str,
+ n_rows: int | None = None,
+ columns: list[str] | None = None,
+ ) -> SampleResult:
+ """Sample rows from a data file.
+
+ Automatically detects file format and uses appropriate parser.
+
+ Args:
+ path: Path to the data file.
+ n_rows: Number of rows to sample (default: default_sample_rows).
+ columns: Specific columns to include (default: all).
+
+ Returns:
+ SampleResult with sample data and metadata.
+
+ Raises:
+ FileNotFoundError: If file doesn't exist.
+ ValueError: If file exceeds size limit or format unsupported.
+ """
+ path = Path(path)
+ n_rows = min(n_rows or self.default_sample_rows, self.MAX_SAMPLE_ROWS)
+
+ # Check file size
+ file_size = path.stat().st_size
+ if file_size > self.max_file_size:
+ raise ValueError(
+ f"Data file exceeds size limit: {file_size:,} > {self.max_file_size:,} bytes"
+ )
+
+ # Detect format and parse
+ suffix = path.suffix.lower()
+ if suffix == ".csv":
+ return self._sample_csv(path, n_rows, columns, file_size)
+ elif suffix == ".tsv":
+ return self._sample_csv(path, n_rows, columns, file_size, delimiter="\t")
+ elif suffix == ".parquet":
+ return self._sample_parquet(path, n_rows, columns, file_size)
+ else:
+ raise ValueError(f"Unsupported data file format: {suffix}")
+
+ def get_schema(self, path: Path | str) -> dict[str, str]:
+ """Get column schema from a data file.
+
+ Args:
+ path: Path to the data file.
+
+ Returns:
+ Dict mapping column names to type descriptions.
+ """
+ path = Path(path)
+ suffix = path.suffix.lower()
+
+ if suffix in (".csv", ".tsv"):
+ return self._get_csv_schema(path, delimiter="\t" if suffix == ".tsv" else ",")
+ elif suffix == ".parquet":
+ return self._get_parquet_schema(path)
+ else:
+ raise ValueError(f"Unsupported data file format: {suffix}")
+
+ def count_rows(self, path: Path | str) -> int:
+ """Count rows in a data file.
+
+ Args:
+ path: Path to the data file.
+
+ Returns:
+ Number of rows.
+ """
+ path = Path(path)
+ suffix = path.suffix.lower()
+
+ if suffix in (".csv", ".tsv"):
+ return self._count_csv_rows(path)
+ elif suffix == ".parquet":
+ return self._count_parquet_rows(path)
+ else:
+ raise ValueError(f"Unsupported data file format: {suffix}")
+
+ def _sample_csv(
+ self,
+ path: Path,
+ n_rows: int,
+ columns: list[str] | None,
+ file_size: int,
+ delimiter: str = ",",
+ ) -> SampleResult:
+ """Sample rows from a CSV file.
+
+ Args:
+ path: Path to the CSV file.
+ n_rows: Number of rows to sample.
+ columns: Columns to include.
+ file_size: File size in bytes.
+ delimiter: CSV delimiter.
+
+ Returns:
+ SampleResult.
+ """
+ rows: list[dict[str, Any]] = []
+ all_columns: list[str] = []
+ total_rows = 0
+
+ with path.open(encoding="utf-8", errors="replace") as f:
+ # Detect dialect
+ sample = f.read(8192)
+ f.seek(0)
+
+ try:
+ dialect = csv.Sniffer().sniff(sample, delimiters=delimiter + ",;|")
+ except csv.Error:
+ dialect = csv.excel
+ dialect.delimiter = delimiter
+
+ reader = csv.DictReader(f, dialect=dialect)
+
+ if reader.fieldnames:
+ all_columns = list(reader.fieldnames)
+
+ for row in reader:
+ total_rows += 1
+ if len(rows) < n_rows:
+ if columns:
+ row = {k: v for k, v in row.items() if k in columns}
+ rows.append(row)
+
+ # Infer schema from sample
+ schema = self._infer_csv_schema(rows, all_columns)
+
+ return SampleResult(
+ columns=columns if columns else all_columns,
+ rows=rows,
+ total_rows=total_rows,
+ file_size=file_size,
+ format="csv",
+ schema=schema,
+ truncated=total_rows > n_rows,
+ )
+
+ def _sample_parquet(
+ self,
+ path: Path,
+ n_rows: int,
+ columns: list[str] | None,
+ file_size: int,
+ ) -> SampleResult:
+ """Sample rows from a Parquet file.
+
+ Args:
+ path: Path to the Parquet file.
+ n_rows: Number of rows to sample.
+ columns: Columns to include.
+ file_size: File size in bytes.
+
+ Returns:
+ SampleResult.
+ """
+ try:
+ import pyarrow.parquet as pq
+ except ImportError as err:
+ raise ImportError(
+ "pyarrow is required for Parquet support. Install with: pip install pyarrow"
+ ) from err
+
+ # Read metadata first
+ parquet_file = pq.ParquetFile(path)
+ total_rows = parquet_file.metadata.num_rows
+ all_columns = parquet_file.schema.names
+
+ # Read sample
+ table = parquet_file.read_row_groups(
+ [0] if parquet_file.metadata.num_row_groups > 0 else [],
+ columns=columns,
+ )
+
+ # Convert to dicts
+ df = table.to_pandas()
+ if len(df) > n_rows:
+ df = df.head(n_rows)
+
+ rows: list[dict[str, Any]] = df.to_dict(orient="records")
+
+ # Get schema
+ schema = {}
+ for pq_field in parquet_file.schema:
+ schema[pq_field.name] = str(pq_field.physical_type)
+
+ return SampleResult(
+ columns=columns if columns else all_columns,
+ rows=rows,
+ total_rows=total_rows,
+ file_size=file_size,
+ format="parquet",
+ schema=schema,
+ truncated=total_rows > n_rows,
+ )
+
+ def _get_csv_schema(self, path: Path, delimiter: str = ",") -> dict[str, str]:
+ """Infer schema from CSV by sampling.
+
+ Args:
+ path: Path to CSV file.
+ delimiter: CSV delimiter.
+
+ Returns:
+ Column type mapping.
+ """
+ sample = self._sample_csv(path, 100, None, 0, delimiter)
+ return sample.schema
+
+ def _get_parquet_schema(self, path: Path) -> dict[str, str]:
+ """Get schema from Parquet file.
+
+ Args:
+ path: Path to Parquet file.
+
+ Returns:
+ Column type mapping.
+ """
+ try:
+ import pyarrow.parquet as pq
+ except ImportError as err:
+ raise ImportError(
+ "pyarrow is required for Parquet support. Install with: pip install pyarrow"
+ ) from err
+
+ parquet_file = pq.ParquetFile(path)
+ schema = {}
+ for pq_field in parquet_file.schema:
+ schema[pq_field.name] = str(pq_field.physical_type)
+ return schema
+
+ def _count_csv_rows(self, path: Path) -> int:
+ """Count rows in a CSV file.
+
+ Args:
+ path: Path to CSV file.
+
+ Returns:
+ Row count.
+ """
+ count = 0
+ with path.open(encoding="utf-8", errors="replace") as f:
+ # Skip header
+ next(f, None)
+ for _ in f:
+ count += 1
+ return count
+
+ def _count_parquet_rows(self, path: Path) -> int:
+ """Count rows in a Parquet file.
+
+ Args:
+ path: Path to Parquet file.
+
+ Returns:
+ Row count.
+ """
+ try:
+ import pyarrow.parquet as pq
+ except ImportError as err:
+ raise ImportError(
+ "pyarrow is required for Parquet support. Install with: pip install pyarrow"
+ ) from err
+
+ parquet_file = pq.ParquetFile(path)
+ num_rows: int = parquet_file.metadata.num_rows
+ return num_rows
+
+ def _infer_csv_schema(self, rows: list[dict[str, Any]], columns: list[str]) -> dict[str, str]:
+ """Infer column types from sample rows.
+
+ Args:
+ rows: Sample rows.
+ columns: Column names.
+
+ Returns:
+ Column type mapping.
+ """
+ schema = {}
+
+ for col in columns:
+ values = [row.get(col) for row in rows if row.get(col)]
+
+ if not values:
+ schema[col] = "unknown"
+ continue
+
+ # Try to infer type from values
+ col_type = self._infer_value_type(values)
+ schema[col] = col_type
+
+ return schema
+
+ def _infer_value_type(self, values: list[Any]) -> str:
+ """Infer type from a list of values.
+
+ Args:
+ values: Sample values.
+
+ Returns:
+ Type name.
+ """
+ # Sample up to 20 non-null values
+ sample = [v for v in values[:20] if v is not None and v != ""]
+
+ if not sample:
+ return "unknown"
+
+ # Check for common types
+ int_count = 0
+ float_count = 0
+ bool_count = 0
+ date_count = 0
+
+ for v in sample:
+ v_str = str(v).strip()
+
+ # Check boolean
+ if v_str.lower() in ("true", "false", "yes", "no", "1", "0"):
+ bool_count += 1
+ continue
+
+ # Check integer
+ try:
+ int(v_str)
+ int_count += 1
+ continue
+ except ValueError:
+ pass
+
+ # Check float
+ try:
+ float(v_str)
+ float_count += 1
+ continue
+ except ValueError:
+ pass
+
+ # Check date-like
+ if self._looks_like_date(v_str):
+ date_count += 1
+ continue
+
+ total = len(sample)
+ threshold = 0.8 # 80% must match type
+
+ if int_count / total >= threshold:
+ return "integer"
+ elif float_count / total >= threshold:
+ return "float"
+ elif bool_count / total >= threshold:
+ return "boolean"
+ elif date_count / total >= threshold:
+ return "datetime"
+ else:
+ return "string"
+
+ def _looks_like_date(self, value: str) -> bool:
+ """Check if a value looks like a date.
+
+ Args:
+ value: String value.
+
+ Returns:
+ True if date-like.
+ """
+ import re
+
+ date_patterns = [
+ r"^\d{4}-\d{2}-\d{2}", # ISO date
+ r"^\d{2}/\d{2}/\d{4}", # US date
+ r"^\d{2}-\d{2}-\d{4}", # EU date
+ ]
+
+ for pattern in date_patterns:
+ if re.match(pattern, value):
+ return True
+
+ return False
+
+ def format_sample_as_markdown(self, result: SampleResult, max_rows: int = 10) -> str:
+ """Format sample result as markdown table.
+
+ Args:
+ result: SampleResult to format.
+ max_rows: Maximum rows to include.
+
+ Returns:
+ Markdown string.
+ """
+ if not result.rows:
+ return "*No data*"
+
+ rows_to_show = result.rows[:max_rows]
+ columns = result.columns
+
+ # Build table
+ lines = []
+
+ # Header
+ lines.append("| " + " | ".join(columns) + " |")
+ lines.append("| " + " | ".join(["---"] * len(columns)) + " |")
+
+ # Rows
+ for row in rows_to_show:
+ cells = []
+ for col in columns:
+ value = row.get(col, "")
+ # Truncate long values
+ value_str = str(value)
+ if len(value_str) > 50:
+ value_str = value_str[:47] + "..."
+ # Escape pipes
+ value_str = value_str.replace("|", "\\|")
+ cells.append(value_str)
+ lines.append("| " + " | ".join(cells) + " |")
+
+ if len(result.rows) > max_rows:
+ lines.append(f"\n*... and {len(result.rows) - max_rows} more rows*")
+
+ if result.truncated:
+ lines.append(f"\n*Total rows in file: {result.total_rows:,}*")
+
+ return "\n".join(lines)
diff --git a/python-packages/dataing/src/dataing/core/parsing/json_parser.py b/python-packages/dataing/src/dataing/core/parsing/json_parser.py
new file mode 100644
index 000000000..f2e6f3ef0
--- /dev/null
+++ b/python-packages/dataing/src/dataing/core/parsing/json_parser.py
@@ -0,0 +1,246 @@
+"""JSON file parser with safe loading and helpful summaries.
+
+Provides utilities for parsing JSON files with size limits
+and formatted summaries for LLM consumption.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+class JsonParser:
+ """Parser for JSON files with safe loading.
+
+ Provides size-limited parsing and helpful summaries
+ for large JSON structures.
+ """
+
+ MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
+
+ def __init__(self, max_file_size: int = MAX_FILE_SIZE) -> None:
+ """Initialize the JSON parser.
+
+ Args:
+ max_file_size: Maximum file size in bytes.
+ """
+ self.max_file_size = max_file_size
+
+ def parse_file(self, path: Path | str) -> Any:
+ """Parse a JSON file.
+
+ Args:
+ path: Path to the JSON file.
+
+ Returns:
+ Parsed JSON content.
+
+ Raises:
+ FileNotFoundError: If file doesn't exist.
+ ValueError: If file exceeds size limit.
+ json.JSONDecodeError: If JSON is invalid.
+ """
+ path = Path(path)
+
+ # Check file size
+ file_size = path.stat().st_size
+ if file_size > self.max_file_size:
+ raise ValueError(
+ f"JSON file exceeds size limit: {file_size:,} > {self.max_file_size:,} bytes"
+ )
+
+ content = path.read_text(encoding="utf-8")
+ return self.parse_string(content)
+
+ def parse_string(self, content: str) -> Any:
+ """Parse a JSON string.
+
+ Args:
+ content: JSON content as string.
+
+ Returns:
+ Parsed JSON content.
+
+ Raises:
+ json.JSONDecodeError: If JSON is invalid.
+ """
+ try:
+ return json.loads(content)
+ except json.JSONDecodeError as e:
+ logger.error(f"JSON parse error: {e}")
+ raise
+
+ def format_summary(
+ self,
+ data: Any,
+ max_depth: int = 3,
+ max_array_items: int = 5,
+ ) -> str:
+ """Format JSON data as a readable summary.
+
+ Useful for providing concise view of JSON content to LLMs.
+
+ Args:
+ data: Parsed JSON data.
+ max_depth: Maximum nesting depth to show.
+ max_array_items: Maximum array items to show before truncating.
+
+ Returns:
+ Formatted string summary.
+ """
+ return self._format_value(
+ data,
+ depth=0,
+ max_depth=max_depth,
+ max_array_items=max_array_items,
+ )
+
+ def get_schema_summary(self, data: Any) -> dict[str, Any]:
+ """Infer a schema summary from JSON data.
+
+ Useful for understanding the structure of large JSON files.
+
+ Args:
+ data: Parsed JSON data.
+
+ Returns:
+ Dict describing the structure.
+ """
+ return self._infer_schema(data)
+
+ def _format_value(
+ self,
+ value: Any,
+ depth: int,
+ max_depth: int,
+ max_array_items: int,
+ ) -> str:
+ """Recursively format a value.
+
+ Args:
+ value: Value to format.
+ depth: Current depth.
+ max_depth: Maximum depth.
+ max_array_items: Maximum array items.
+
+ Returns:
+ Formatted string.
+ """
+ indent = " " * depth
+
+ if depth >= max_depth:
+ if isinstance(value, dict):
+ return f"{{...}} ({len(value)} keys)"
+ elif isinstance(value, list):
+ return f"[...] ({len(value)} items)"
+ else:
+ return self._format_primitive(value)
+
+ if isinstance(value, dict):
+ if not value:
+ return "{}"
+ lines = ["{"]
+ for k, v in value.items():
+ formatted_v = self._format_value(v, depth + 1, max_depth, max_array_items)
+ lines.append(f'{indent} "{k}": {formatted_v}')
+ lines.append(f"{indent}}}")
+ return "\n".join(lines)
+
+ elif isinstance(value, list):
+ if not value:
+ return "[]"
+ lines = ["["]
+ for i, item in enumerate(value):
+ if i >= max_array_items:
+ lines.append(f"{indent} ... ({len(value) - max_array_items} more items)")
+ break
+ formatted_item = self._format_value(item, depth + 1, max_depth, max_array_items)
+ lines.append(f"{indent} {formatted_item}")
+ lines.append(f"{indent}]")
+ return "\n".join(lines)
+
+ else:
+ return self._format_primitive(value)
+
+ def _format_primitive(self, value: Any) -> str:
+ """Format a primitive value.
+
+ Args:
+ value: Primitive value.
+
+ Returns:
+ Formatted string.
+ """
+ if isinstance(value, str):
+ if len(value) > 100:
+ return f'"{value[:100]}..." ({len(value)} chars)'
+ return json.dumps(value)
+ elif value is None:
+ return "null"
+ elif isinstance(value, bool):
+ return "true" if value else "false"
+ else:
+ return str(value)
+
+ def _infer_schema(self, value: Any, path: str = "$") -> dict[str, Any]:
+ """Infer schema from a value.
+
+ Args:
+ value: Value to analyze.
+ path: JSON path to this value.
+
+ Returns:
+ Schema dict.
+ """
+ if isinstance(value, dict):
+ properties = {}
+ for k, v in value.items():
+ properties[k] = self._infer_schema(v, f"{path}.{k}")
+ return {"type": "object", "properties": properties}
+
+ elif isinstance(value, list):
+ if not value:
+ return {"type": "array", "items": {"type": "unknown"}}
+ # Sample first few items
+ item_types = set()
+ for item in value[:5]:
+ item_types.add(self._get_type_name(item))
+ return {
+ "type": "array",
+ "length": len(value),
+ "item_types": list(item_types),
+ }
+
+ else:
+ return {"type": self._get_type_name(value)}
+
+ def _get_type_name(self, value: Any) -> str:
+ """Get the JSON type name for a value.
+
+ Args:
+ value: Value to type.
+
+ Returns:
+ Type name string.
+ """
+ if value is None:
+ return "null"
+ elif isinstance(value, bool):
+ return "boolean"
+ elif isinstance(value, int):
+ return "integer"
+ elif isinstance(value, float):
+ return "number"
+ elif isinstance(value, str):
+ return "string"
+ elif isinstance(value, list):
+ return "array"
+ elif isinstance(value, dict):
+ return "object"
+ else:
+ return "unknown"
diff --git a/python-packages/dataing/src/dataing/core/parsing/log_parser.py b/python-packages/dataing/src/dataing/core/parsing/log_parser.py
new file mode 100644
index 000000000..1a9709ac6
--- /dev/null
+++ b/python-packages/dataing/src/dataing/core/parsing/log_parser.py
@@ -0,0 +1,490 @@
+"""Log file parser with pattern detection.
+
+Provides utilities for parsing log files, detecting common formats,
+and extracting structured log entries.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+class LogLevel(str, Enum):
+ """Standard log levels."""
+
+ DEBUG = "debug"
+ INFO = "info"
+ WARNING = "warning"
+ ERROR = "error"
+ CRITICAL = "critical"
+ UNKNOWN = "unknown"
+
+
+@dataclass
+class LogEntry:
+ """A parsed log entry.
+
+ Attributes:
+ timestamp: Parsed timestamp if detected.
+ level: Log level if detected.
+ message: The log message content.
+ source: Source/logger name if detected.
+ line_number: Original line number in file.
+ raw: The raw log line.
+ metadata: Additional parsed fields.
+ """
+
+ timestamp: datetime | None
+ level: LogLevel
+ message: str
+ source: str | None
+ line_number: int
+ raw: str
+ metadata: dict[str, Any] = field(default_factory=dict)
+
+
+class LogParser:
+ """Parser for log files with format detection.
+
+ Supports common log formats including:
+ - Standard Python logging
+ - Docker container logs
+ - Nginx/Apache access logs
+ - JSON-formatted logs (structured logging)
+ """
+
+ MAX_FILE_SIZE = 50 * 1024 * 1024 # 50 MB for logs
+
+ # Common timestamp patterns
+ TIMESTAMP_PATTERNS = [
+ # ISO 8601: 2024-01-15T10:30:45.123Z
+ (
+ r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?)",
+ "%Y-%m-%dT%H:%M:%S",
+ ),
+ # Standard datetime: 2024-01-15 10:30:45
+ (r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})", "%Y-%m-%d %H:%M:%S"),
+ # Compact: 20240115T103045
+ (r"(\d{8}T\d{6})", "%Y%m%dT%H%M%S"),
+ # Unix timestamp with brackets: [1705315845]
+ (r"\[(\d{10})\]", "epoch"),
+ ]
+
+ # Log level patterns (case-insensitive)
+ LEVEL_PATTERNS = [
+ (r"\b(DEBUG)\b", LogLevel.DEBUG),
+ (r"\b(INFO)\b", LogLevel.INFO),
+ (r"\b(WARN(?:ING)?)\b", LogLevel.WARNING),
+ (r"\b(ERROR)\b", LogLevel.ERROR),
+ (r"\b(CRIT(?:ICAL)?|FATAL)\b", LogLevel.CRITICAL),
+ ]
+
+ def __init__(self, max_file_size: int = MAX_FILE_SIZE) -> None:
+ """Initialize the log parser.
+
+ Args:
+ max_file_size: Maximum file size in bytes.
+ """
+ self.max_file_size = max_file_size
+
+ def parse_file(
+ self,
+ path: Path | str,
+ max_entries: int | None = None,
+ level_filter: LogLevel | None = None,
+ start_line: int = 1,
+ ) -> list[LogEntry]:
+ """Parse a log file into structured entries.
+
+ Args:
+ path: Path to the log file.
+ max_entries: Maximum entries to return.
+ level_filter: Only return entries of this level or higher.
+ start_line: 1-indexed line to start from.
+
+ Returns:
+ List of LogEntry objects.
+
+ Raises:
+ FileNotFoundError: If file doesn't exist.
+ ValueError: If file exceeds size limit.
+ """
+ path = Path(path)
+
+ # Check file size
+ file_size = path.stat().st_size
+ if file_size > self.max_file_size:
+ raise ValueError(
+ f"Log file exceeds size limit: {file_size:,} > {self.max_file_size:,} bytes"
+ )
+
+ content = path.read_text(encoding="utf-8", errors="replace")
+ return self.parse_lines(
+ content.splitlines(),
+ max_entries=max_entries,
+ level_filter=level_filter,
+ start_line=start_line,
+ )
+
+ def parse_lines(
+ self,
+ lines: list[str],
+ max_entries: int | None = None,
+ level_filter: LogLevel | None = None,
+ start_line: int = 1,
+ ) -> list[LogEntry]:
+ """Parse log lines into structured entries.
+
+ Args:
+ lines: List of log lines.
+ max_entries: Maximum entries to return.
+ level_filter: Only return entries of this level or higher.
+ start_line: Starting line number for numbering.
+
+ Returns:
+ List of LogEntry objects.
+ """
+ entries = []
+ level_priority = self._get_level_priority(level_filter) if level_filter else 0
+
+ for i, line in enumerate(lines):
+ if not line.strip():
+ continue
+
+ entry = self._parse_line(line, line_number=start_line + i)
+
+ # Apply level filter
+ if level_filter:
+ entry_priority = self._get_level_priority(entry.level)
+ if entry_priority < level_priority:
+ continue
+
+ entries.append(entry)
+
+ if max_entries and len(entries) >= max_entries:
+ break
+
+ return entries
+
+ def find_errors(
+ self,
+ path: Path | str,
+ max_results: int = 50,
+ context_lines: int = 2,
+ ) -> list[dict[str, Any]]:
+ """Find error entries with surrounding context.
+
+ Args:
+ path: Path to the log file.
+ max_results: Maximum errors to return.
+ context_lines: Number of lines before/after each error.
+
+ Returns:
+ List of error dicts with context.
+ """
+ path = Path(path)
+ content = path.read_text(encoding="utf-8", errors="replace")
+ lines = content.splitlines()
+
+ errors = []
+ for i, line in enumerate(lines):
+ entry = self._parse_line(line, line_number=i + 1)
+ if entry.level in (LogLevel.ERROR, LogLevel.CRITICAL):
+ # Get context
+ start = max(0, i - context_lines)
+ end = min(len(lines), i + context_lines + 1)
+
+ errors.append(
+ {
+ "entry": entry,
+ "context_before": lines[start:i],
+ "context_after": lines[i + 1 : end],
+ }
+ )
+
+ if len(errors) >= max_results:
+ break
+
+ return errors
+
+ def get_summary(self, path: Path | str) -> dict[str, Any]:
+ """Get a summary of a log file.
+
+ Args:
+ path: Path to the log file.
+
+ Returns:
+ Summary dict with counts and samples.
+ """
+ path = Path(path)
+ content = path.read_text(encoding="utf-8", errors="replace")
+ lines = content.splitlines()
+
+ level_counts: dict[str, int] = {}
+ first_timestamp: datetime | None = None
+ last_timestamp: datetime | None = None
+ sample_errors: list[str] = []
+
+ for i, line in enumerate(lines):
+ entry = self._parse_line(line, line_number=i + 1)
+
+ # Count levels
+ level_counts[entry.level.value] = level_counts.get(entry.level.value, 0) + 1
+
+ # Track timestamps
+ if entry.timestamp:
+ if first_timestamp is None:
+ first_timestamp = entry.timestamp
+ last_timestamp = entry.timestamp
+
+ # Sample errors
+ if entry.level in (LogLevel.ERROR, LogLevel.CRITICAL) and len(sample_errors) < 5:
+ sample_errors.append(entry.message[:200])
+
+ return {
+ "total_lines": len(lines),
+ "level_counts": level_counts,
+ "first_timestamp": first_timestamp.isoformat() if first_timestamp else None,
+ "last_timestamp": last_timestamp.isoformat() if last_timestamp else None,
+ "sample_errors": sample_errors,
+ }
+
+ def _parse_line(self, line: str, line_number: int) -> LogEntry:
+ """Parse a single log line.
+
+ Args:
+ line: The log line.
+ line_number: Line number in file.
+
+ Returns:
+ LogEntry object.
+ """
+ # Try JSON first
+ if line.strip().startswith("{"):
+ entry = self._parse_json_log(line, line_number)
+ if entry:
+ return entry
+
+ # Parse standard format
+ timestamp = self._extract_timestamp(line)
+ level = self._extract_level(line)
+ source = self._extract_source(line)
+ message = self._extract_message(line, timestamp, level, source)
+
+ return LogEntry(
+ timestamp=timestamp,
+ level=level,
+ message=message,
+ source=source,
+ line_number=line_number,
+ raw=line,
+ )
+
+ def _parse_json_log(self, line: str, line_number: int) -> LogEntry | None:
+ """Try to parse a JSON-formatted log line.
+
+ Args:
+ line: The log line.
+ line_number: Line number.
+
+ Returns:
+ LogEntry if valid JSON log, None otherwise.
+ """
+ import json
+
+ try:
+ data = json.loads(line)
+ if not isinstance(data, dict):
+ return None
+
+ # Extract common fields
+ timestamp = None
+ for ts_field in ["timestamp", "time", "@timestamp", "ts"]:
+ if ts_field in data:
+ timestamp = self._parse_timestamp_string(str(data[ts_field]))
+ break
+
+ level = LogLevel.UNKNOWN
+ for level_field in ["level", "severity", "lvl"]:
+ if level_field in data:
+ level = self._string_to_level(str(data[level_field]))
+ break
+
+ message = data.get("message", data.get("msg", str(data)))
+ source = data.get("logger", data.get("source", data.get("name")))
+
+ return LogEntry(
+ timestamp=timestamp,
+ level=level,
+ message=str(message),
+ source=str(source) if source else None,
+ line_number=line_number,
+ raw=line,
+ metadata=data,
+ )
+ except (json.JSONDecodeError, ValueError):
+ return None
+
+ def _extract_timestamp(self, line: str) -> datetime | None:
+ """Extract timestamp from a log line.
+
+ Args:
+ line: The log line.
+
+ Returns:
+ Parsed datetime or None.
+ """
+ for pattern, fmt in self.TIMESTAMP_PATTERNS:
+ match = re.search(pattern, line)
+ if match:
+ ts_str = match.group(1)
+ return self._parse_timestamp_string(ts_str, fmt)
+ return None
+
+ def _parse_timestamp_string(self, ts_str: str, fmt: str | None = None) -> datetime | None:
+ """Parse a timestamp string.
+
+ Args:
+ ts_str: Timestamp string.
+ fmt: Expected format.
+
+ Returns:
+ Parsed datetime or None.
+ """
+ if fmt == "epoch":
+ try:
+ return datetime.fromtimestamp(int(ts_str))
+ except (ValueError, OSError):
+ return None
+
+ # Try ISO format first
+ try:
+ # Handle timezone suffix
+ ts_str = ts_str.replace("Z", "+00:00")
+ return datetime.fromisoformat(ts_str)
+ except ValueError:
+ pass
+
+ # Try standard formats
+ for fmt in ["%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y%m%dT%H%M%S"]:
+ try:
+ return datetime.strptime(ts_str[:19], fmt)
+ except ValueError:
+ continue
+
+ return None
+
+ def _extract_level(self, line: str) -> LogLevel:
+ """Extract log level from a line.
+
+ Args:
+ line: The log line.
+
+ Returns:
+ LogLevel enum value.
+ """
+ upper_line = line.upper()
+ for pattern, level in self.LEVEL_PATTERNS:
+ if re.search(pattern, upper_line):
+ return level
+ return LogLevel.UNKNOWN
+
+ def _string_to_level(self, level_str: str) -> LogLevel:
+ """Convert a string to LogLevel.
+
+ Args:
+ level_str: Level string.
+
+ Returns:
+ LogLevel enum value.
+ """
+ level_str = level_str.upper()
+ if "DEBUG" in level_str:
+ return LogLevel.DEBUG
+ elif "INFO" in level_str:
+ return LogLevel.INFO
+ elif "WARN" in level_str:
+ return LogLevel.WARNING
+ elif "ERROR" in level_str:
+ return LogLevel.ERROR
+ elif "CRIT" in level_str or "FATAL" in level_str:
+ return LogLevel.CRITICAL
+ return LogLevel.UNKNOWN
+
+ def _extract_source(self, line: str) -> str | None:
+ """Extract source/logger name from a line.
+
+ Args:
+ line: The log line.
+
+ Returns:
+ Source name or None.
+ """
+ # Common patterns: [source], , source:
+ patterns = [
+ r"\[([a-zA-Z0-9_.]+)\]", # [source]
+ r"<([a-zA-Z0-9_.]+)>", #
+ r"^\S+\s+\S+\s+([a-zA-Z0-9_.]+):", # timestamp level source:
+ ]
+
+ for pattern in patterns:
+ match = re.search(pattern, line)
+ if match:
+ return match.group(1)
+
+ return None
+
+ def _extract_message(
+ self,
+ line: str,
+ timestamp: datetime | None,
+ level: LogLevel,
+ source: str | None,
+ ) -> str:
+ """Extract the message portion of a log line.
+
+ Args:
+ line: The log line.
+ timestamp: Parsed timestamp.
+ level: Parsed level.
+ source: Parsed source.
+
+ Returns:
+ The message content.
+ """
+ # Simple heuristic: take everything after level indicator
+ for pattern, _ in self.LEVEL_PATTERNS:
+ match = re.search(pattern, line, re.IGNORECASE)
+ if match:
+ # Return everything after the level
+ return line[match.end() :].strip(" -:")
+
+ # Fallback: return the whole line
+ return line.strip()
+
+ def _get_level_priority(self, level: LogLevel) -> int:
+ """Get priority number for a log level.
+
+ Args:
+ level: Log level.
+
+ Returns:
+ Priority (higher = more severe).
+ """
+ priorities = {
+ LogLevel.DEBUG: 10,
+ LogLevel.INFO: 20,
+ LogLevel.WARNING: 30,
+ LogLevel.ERROR: 40,
+ LogLevel.CRITICAL: 50,
+ LogLevel.UNKNOWN: 0,
+ }
+ return priorities.get(level, 0)
diff --git a/python-packages/dataing/src/dataing/core/parsing/text_parser.py b/python-packages/dataing/src/dataing/core/parsing/text_parser.py
new file mode 100644
index 000000000..24b8a73de
--- /dev/null
+++ b/python-packages/dataing/src/dataing/core/parsing/text_parser.py
@@ -0,0 +1,211 @@
+"""Text file parser with smart chunking support.
+
+Provides utilities for reading text files with line-range chunking
+and safe handling of various encodings.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ pass
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TextChunk:
+ """A chunk of text from a file.
+
+ Attributes:
+ content: The text content.
+ start_line: The 1-indexed start line number.
+ end_line: The 1-indexed end line number (inclusive).
+ total_lines: Total number of lines in the file.
+ truncated: Whether the content was truncated due to limits.
+ """
+
+ content: str
+ start_line: int
+ end_line: int
+ total_lines: int
+ truncated: bool = False
+
+
+class TextParser:
+ """Parser for plain text files with chunking support.
+
+ Provides safe reading of text files with encoding detection,
+ line-range selection, and size limits.
+ """
+
+ DEFAULT_ENCODING = "utf-8"
+ FALLBACK_ENCODINGS = ["latin-1", "cp1252", "iso-8859-1"]
+ MAX_LINE_LENGTH = 10000
+ MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
+
+ def __init__(
+ self,
+ max_line_length: int = MAX_LINE_LENGTH,
+ max_file_size: int = MAX_FILE_SIZE,
+ ) -> None:
+ """Initialize the text parser.
+
+ Args:
+ max_line_length: Maximum characters per line before truncation.
+ max_file_size: Maximum file size in bytes.
+ """
+ self.max_line_length = max_line_length
+ self.max_file_size = max_file_size
+
+ def read_file(
+ self,
+ path: Path | str,
+ start_line: int = 1,
+ end_line: int | None = None,
+ max_lines: int | None = None,
+ ) -> TextChunk:
+ """Read a text file with optional line-range selection.
+
+ Args:
+ path: Path to the file.
+ start_line: 1-indexed start line (default: 1).
+ end_line: 1-indexed end line (inclusive, default: all).
+ max_lines: Maximum lines to return (overrides end_line).
+
+ Returns:
+ TextChunk with content and metadata.
+
+ Raises:
+ FileNotFoundError: If file doesn't exist.
+ ValueError: If file exceeds size limit.
+ UnicodeDecodeError: If file cannot be decoded.
+ """
+ path = Path(path)
+
+ # Check file size
+ file_size = path.stat().st_size
+ if file_size > self.max_file_size:
+ raise ValueError(
+ f"File exceeds size limit: {file_size:,} > {self.max_file_size:,} bytes"
+ )
+
+ # Read with encoding detection
+ content = self._read_with_fallback(path)
+ lines = content.splitlines()
+ total_lines = len(lines)
+
+ # Validate and adjust line range
+ start_line = max(1, start_line)
+ if end_line is None:
+ end_line = total_lines
+ else:
+ end_line = min(end_line, total_lines)
+
+ if max_lines is not None:
+ end_line = min(start_line + max_lines - 1, end_line)
+
+ # Extract requested lines (convert to 0-indexed)
+ selected_lines = lines[start_line - 1 : end_line]
+
+ # Truncate long lines
+ truncated = False
+ processed_lines = []
+ for line in selected_lines:
+ if len(line) > self.max_line_length:
+ processed_lines.append(line[: self.max_line_length] + "...")
+ truncated = True
+ else:
+ processed_lines.append(line)
+
+ return TextChunk(
+ content="\n".join(processed_lines),
+ start_line=start_line,
+ end_line=end_line,
+ total_lines=total_lines,
+ truncated=truncated,
+ )
+
+ def count_lines(self, path: Path | str) -> int:
+ """Count lines in a file without loading it fully.
+
+ Args:
+ path: Path to the file.
+
+ Returns:
+ Number of lines in the file.
+ """
+ path = Path(path)
+ content = self._read_with_fallback(path)
+ return len(content.splitlines())
+
+ def search_lines(
+ self,
+ path: Path | str,
+ pattern: str,
+ max_results: int = 100,
+ case_sensitive: bool = False,
+ ) -> list[tuple[int, str]]:
+ """Search for lines containing a pattern.
+
+ Args:
+ path: Path to the file.
+ pattern: Search pattern (plain text, not regex).
+ max_results: Maximum number of results to return.
+ case_sensitive: Whether to do case-sensitive matching.
+
+ Returns:
+ List of (line_number, line_content) tuples.
+ """
+ path = Path(path)
+ content = self._read_with_fallback(path)
+ lines = content.splitlines()
+
+ if not case_sensitive:
+ pattern = pattern.lower()
+
+ results: list[tuple[int, str]] = []
+ for i, line in enumerate(lines, 1):
+ check_line = line if case_sensitive else line.lower()
+ if pattern in check_line:
+ # Truncate if needed
+ if len(line) > self.max_line_length:
+ line = line[: self.max_line_length] + "..."
+ results.append((i, line))
+ if len(results) >= max_results:
+ break
+
+ return results
+
+ def _read_with_fallback(self, path: Path) -> str:
+ """Read file with encoding fallback.
+
+ Args:
+ path: Path to the file.
+
+ Returns:
+ File content as string.
+
+ Raises:
+ UnicodeDecodeError: If all encodings fail.
+ """
+ # Try default encoding first
+ try:
+ return path.read_text(encoding=self.DEFAULT_ENCODING)
+ except UnicodeDecodeError:
+ pass
+
+ # Try fallback encodings
+ for encoding in self.FALLBACK_ENCODINGS:
+ try:
+ return path.read_text(encoding=encoding)
+ except UnicodeDecodeError:
+ continue
+
+ # Last resort: read with errors='replace'
+ logger.warning(f"Could not decode {path} cleanly, using replacement characters")
+ return path.read_text(encoding=self.DEFAULT_ENCODING, errors="replace")
diff --git a/python-packages/dataing/src/dataing/core/parsing/yaml_parser.py b/python-packages/dataing/src/dataing/core/parsing/yaml_parser.py
new file mode 100644
index 000000000..fbb7554ce
--- /dev/null
+++ b/python-packages/dataing/src/dataing/core/parsing/yaml_parser.py
@@ -0,0 +1,166 @@
+"""YAML file parser with safe loading.
+
+Provides utilities for parsing YAML files with safe defaults
+and helpful error messages.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any
+
+import yaml # type: ignore[import-untyped]
+
+logger = logging.getLogger(__name__)
+
+
+class YamlParser:
+ """Parser for YAML files with safe loading.
+
+ Uses safe_load by default to prevent code execution.
+ Provides helpful error messages for common YAML issues.
+ """
+
+ MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MB
+
+ def __init__(self, max_file_size: int = MAX_FILE_SIZE) -> None:
+ """Initialize the YAML parser.
+
+ Args:
+ max_file_size: Maximum file size in bytes.
+ """
+ self.max_file_size = max_file_size
+
+ def parse_file(self, path: Path | str) -> Any:
+ """Parse a YAML file safely.
+
+ Args:
+ path: Path to the YAML file.
+
+ Returns:
+ Parsed YAML content (dict, list, or primitive).
+
+ Raises:
+ FileNotFoundError: If file doesn't exist.
+ ValueError: If file exceeds size limit.
+ yaml.YAMLError: If YAML is invalid.
+ """
+ path = Path(path)
+
+ # Check file size
+ file_size = path.stat().st_size
+ if file_size > self.max_file_size:
+ raise ValueError(
+ f"YAML file exceeds size limit: {file_size:,} > {self.max_file_size:,} bytes"
+ )
+
+ content = path.read_text(encoding="utf-8")
+ return self.parse_string(content)
+
+ def parse_string(self, content: str) -> Any:
+ """Parse a YAML string safely.
+
+ Args:
+ content: YAML content as string.
+
+ Returns:
+ Parsed YAML content.
+
+ Raises:
+ yaml.YAMLError: If YAML is invalid.
+ """
+ try:
+ return yaml.safe_load(content)
+ except yaml.YAMLError as e:
+ logger.error(f"YAML parse error: {e}")
+ raise
+
+ def parse_file_all(self, path: Path | str) -> list[Any]:
+ """Parse a multi-document YAML file.
+
+ Args:
+ path: Path to the YAML file.
+
+ Returns:
+ List of parsed documents.
+
+ Raises:
+ FileNotFoundError: If file doesn't exist.
+ ValueError: If file exceeds size limit.
+ yaml.YAMLError: If YAML is invalid.
+ """
+ path = Path(path)
+
+ # Check file size
+ file_size = path.stat().st_size
+ if file_size > self.max_file_size:
+ raise ValueError(
+ f"YAML file exceeds size limit: {file_size:,} > {self.max_file_size:,} bytes"
+ )
+
+ content = path.read_text(encoding="utf-8")
+ return list(yaml.safe_load_all(content))
+
+ def format_summary(self, data: Any, max_depth: int = 3) -> str:
+ """Format YAML data as a readable summary.
+
+ Useful for providing concise view of YAML content to LLMs.
+
+ Args:
+ data: Parsed YAML data.
+ max_depth: Maximum nesting depth to show.
+
+ Returns:
+ Formatted string summary.
+ """
+ return self._format_value(data, depth=0, max_depth=max_depth)
+
+ def _format_value(self, value: Any, depth: int, max_depth: int) -> str:
+ """Recursively format a value.
+
+ Args:
+ value: Value to format.
+ depth: Current depth.
+ max_depth: Maximum depth.
+
+ Returns:
+ Formatted string.
+ """
+ indent = " " * depth
+
+ if depth >= max_depth:
+ if isinstance(value, dict):
+ return f"{{...}} ({len(value)} keys)"
+ elif isinstance(value, list):
+ return f"[...] ({len(value)} items)"
+ else:
+ return repr(value)
+
+ if isinstance(value, dict):
+ if not value:
+ return "{}"
+ lines = ["{"]
+ for k, v in value.items():
+ formatted_v = self._format_value(v, depth + 1, max_depth)
+ lines.append(f"{indent} {k}: {formatted_v}")
+ lines.append(f"{indent}}}")
+ return "\n".join(lines)
+
+ elif isinstance(value, list):
+ if not value:
+ return "[]"
+ lines = ["["]
+ for item in value:
+ formatted_item = self._format_value(item, depth + 1, max_depth)
+ lines.append(f"{indent} - {formatted_item}")
+ lines.append(f"{indent}]")
+ return "\n".join(lines)
+
+ elif isinstance(value, str):
+ if len(value) > 100:
+ return f'"{value[:100]}..." ({len(value)} chars)'
+ return repr(value)
+
+ else:
+ return repr(value)
diff --git a/python-packages/dataing/src/dataing/entrypoints/api/deps.py b/python-packages/dataing/src/dataing/entrypoints/api/deps.py
index 1e46ee0c6..00dcfccde 100644
--- a/python-packages/dataing/src/dataing/entrypoints/api/deps.py
+++ b/python-packages/dataing/src/dataing/entrypoints/api/deps.py
@@ -95,6 +95,9 @@ def __init__(self) -> None:
self.github_client_id = os.getenv("GITHUB_CLIENT_ID", "")
self.github_client_secret = os.getenv("GITHUB_CLIENT_SECRET", "")
+ # Repository root path for assistant file access
+ self.repo_root = os.getenv("DATAING_REPO_ROOT", ".")
+
settings = Settings()
diff --git a/python-packages/dataing/src/dataing/entrypoints/api/routes/__init__.py b/python-packages/dataing/src/dataing/entrypoints/api/routes/__init__.py
index f6265c393..ed2c1d79c 100644
--- a/python-packages/dataing/src/dataing/entrypoints/api/routes/__init__.py
+++ b/python-packages/dataing/src/dataing/entrypoints/api/routes/__init__.py
@@ -8,6 +8,7 @@
from dataing.entrypoints.api.routes.analytics import router as analytics_router
from dataing.entrypoints.api.routes.approvals import router as approvals_router
from dataing.entrypoints.api.routes.asset_instances import router as asset_instances_router
+from dataing.entrypoints.api.routes.assistant import router as assistant_router
from dataing.entrypoints.api.routes.auth import router as auth_router
from dataing.entrypoints.api.routes.bundles import router as bundles_router
from dataing.entrypoints.api.routes.comment_votes import router as comment_votes_router
@@ -55,6 +56,7 @@
# Include all route modules
api_router.include_router(auth_router, prefix="/auth") # Auth routes (no API key required)
+api_router.include_router(assistant_router) # Dataing Assistant chat API
api_router.include_router(asset_instances_router) # Cross-datasource asset search
api_router.include_router(investigations_router) # Unified investigation API
api_router.include_router(issues_router) # Issues CRUD API
diff --git a/python-packages/dataing/src/dataing/entrypoints/api/routes/assistant.py b/python-packages/dataing/src/dataing/entrypoints/api/routes/assistant.py
new file mode 100644
index 000000000..6955c6d6e
--- /dev/null
+++ b/python-packages/dataing/src/dataing/entrypoints/api/routes/assistant.py
@@ -0,0 +1,877 @@
+"""API routes for Dataing Assistant.
+
+Provides endpoints for chat sessions, messages, and real-time streaming.
+Uses Temporal workflows for durable execution and observability.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from collections.abc import AsyncIterator
+from datetime import UTC, datetime
+from enum import Enum
+from typing import Annotated, Any
+from uuid import UUID
+
+from fastapi import APIRouter, Depends, HTTPException, Query, Request
+from pydantic import BaseModel, Field
+from sse_starlette.sse import EventSourceResponse
+from temporalio.service import RPCError
+
+from dataing.adapters.db.app_db import AppDatabase
+from dataing.core.json_utils import to_json_string
+from dataing.entrypoints.api.deps import get_app_db, settings
+from dataing.entrypoints.api.middleware.auth import ApiKeyContext, verify_api_key
+from dataing.temporal.client import TemporalAgentClient
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/assistant", tags=["assistant"])
+
+# Type aliases for dependency injection
+AuthDep = Annotated[ApiKeyContext, Depends(verify_api_key)]
+AppDbDep = Annotated[AppDatabase, Depends(get_app_db)]
+
+# SSE configuration
+HEARTBEAT_INTERVAL_SECONDS = 15
+MAX_STREAM_DURATION_SECONDS = 300 # 5 minutes
+
+# In-memory message queue for active sessions (Redis in production)
+_active_streams: dict[str, asyncio.Queue[dict[str, Any]]] = {}
+
+
+# =============================================================================
+# Pydantic Models
+# =============================================================================
+
+
+class CreateSessionRequest(BaseModel):
+ """Request to create a new assistant session."""
+
+ parent_investigation_id: UUID | None = Field(
+ None, description="Optional parent investigation to link to"
+ )
+ title: str | None = Field(None, description="Optional session title")
+ metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class CreateSessionResponse(BaseModel):
+ """Response from creating a session."""
+
+ session_id: UUID
+ investigation_id: UUID
+ created_at: datetime
+
+
+class SessionSummary(BaseModel):
+ """Summary of a session for listing."""
+
+ id: UUID
+ title: str | None
+ created_at: datetime
+ last_activity: datetime
+ message_count: int
+ token_count: int
+
+
+class ListSessionsResponse(BaseModel):
+ """Response from listing sessions."""
+
+ sessions: list[SessionSummary]
+
+
+class MessageRole(str, Enum):
+ """Message role types."""
+
+ USER = "user"
+ ASSISTANT = "assistant"
+ SYSTEM = "system"
+ TOOL = "tool"
+
+
+class MessageResponse(BaseModel):
+ """A message in a session."""
+
+ id: UUID
+ role: MessageRole
+ content: str
+ tool_calls: list[dict[str, Any]] | None = None
+ created_at: datetime
+ token_count: int | None = None
+
+
+class SessionDetailResponse(BaseModel):
+ """Full session details with messages."""
+
+ id: UUID
+ investigation_id: UUID
+ title: str | None
+ created_at: datetime
+ last_activity: datetime
+ token_count: int
+ messages: list[MessageResponse]
+ parent_investigation_id: UUID | None = None
+
+
+class PageContextError(BaseModel):
+ """A frontend error captured by the error bus."""
+
+ type: str = Field(..., description="Error type: api, react, or console")
+ message: str
+ status: int | None = None
+ url: str | None = None
+ timestamp: int
+ stack_preview: str | None = None
+
+
+class PageContext(BaseModel):
+ """Context about the page the user is currently viewing."""
+
+ route: str
+ route_pattern: str
+ route_params: dict[str, str] = Field(default_factory=dict)
+ page_type: str
+ page_title: str
+ page_data: dict[str, Any] = Field(default_factory=dict)
+ errors: list[PageContextError] = Field(default_factory=list)
+
+
+class SendMessageRequest(BaseModel):
+ """Request to send a message."""
+
+ content: str = Field(..., min_length=1, max_length=32000)
+ page_context: PageContext | None = None
+
+
+class SendMessageResponse(BaseModel):
+ """Response from sending a message."""
+
+ message_id: UUID
+ status: str = "processing"
+
+
+class SSEEventType(str, Enum):
+ """SSE event types for streaming."""
+
+ TEXT = "text"
+ TOOL_CALL = "tool_call"
+ TOOL_RESULT = "tool_result"
+ COMPLETE = "complete"
+ ERROR = "error"
+ HEARTBEAT = "heartbeat"
+
+
+class ExportFormat(str, Enum):
+ """Export format options."""
+
+ JSON = "json"
+ MARKDOWN = "markdown"
+
+
+# =============================================================================
+# Temporal Client Cache
+# =============================================================================
+
+# Cache the Temporal client connection (lazy initialization)
+_temporal_client: TemporalAgentClient | None = None
+
+
+async def get_temporal_client() -> TemporalAgentClient:
+ """Get or create the Temporal agent client.
+
+ Returns:
+ Connected TemporalAgentClient.
+ """
+ global _temporal_client
+ if _temporal_client is None:
+ _temporal_client = await TemporalAgentClient.connect(
+ host=settings.TEMPORAL_HOST,
+ namespace=settings.TEMPORAL_NAMESPACE,
+ task_queue=settings.TEMPORAL_TASK_QUEUE,
+ )
+ logger.info(
+ f"Temporal agent client connected: host={settings.TEMPORAL_HOST}, "
+ f"namespace={settings.TEMPORAL_NAMESPACE}"
+ )
+ return _temporal_client
+
+
+# =============================================================================
+# Helper Functions
+# =============================================================================
+
+
+async def create_investigation_for_session(
+ db: AppDatabase,
+ tenant_id: UUID,
+ user_id: UUID | None,
+) -> UUID:
+ """Create an investigation record for a new assistant session.
+
+ Args:
+ db: Application database.
+ tenant_id: Tenant ID.
+ user_id: User ID (may be None for API key auth).
+
+ Returns:
+ The created investigation UUID.
+ """
+ # Create investigation with empty alert (assistant sessions are special)
+ row = await db.fetch_one(
+ """
+ INSERT INTO investigations (tenant_id, alert, created_by)
+ VALUES ($1, $2, $3)
+ RETURNING id
+ """,
+ tenant_id,
+ to_json_string({"type": "assistant_session", "description": "Assistant chat"}),
+ user_id,
+ )
+ if not row:
+ raise RuntimeError("Failed to create investigation")
+ result: UUID = row["id"]
+ return result
+
+
+# =============================================================================
+# Session Endpoints
+# =============================================================================
+
+
+@router.post("/sessions", response_model=CreateSessionResponse)
+async def create_session(
+ request: CreateSessionRequest,
+ auth: AuthDep,
+ db: AppDbDep,
+) -> CreateSessionResponse:
+ """Create a new assistant session.
+
+ Each session is linked to an investigation for tracking and context.
+ """
+ # Create the underlying investigation
+ investigation_id = await create_investigation_for_session(db, auth.tenant_id, auth.user_id)
+
+ # Create the session
+ row = await db.fetch_one(
+ """
+ INSERT INTO assistant_sessions
+ (investigation_id, tenant_id, user_id, parent_investigation_id, title, metadata)
+ VALUES ($1, $2, $3, $4, $5, $6)
+ RETURNING id, created_at
+ """,
+ investigation_id,
+ auth.tenant_id,
+ auth.user_id or UUID("00000000-0000-0000-0000-000000000000"),
+ request.parent_investigation_id,
+ request.title,
+ to_json_string(request.metadata),
+ )
+
+ if not row:
+ raise HTTPException(status_code=500, detail="Failed to create session")
+
+ return CreateSessionResponse(
+ session_id=row["id"],
+ investigation_id=investigation_id,
+ created_at=row["created_at"],
+ )
+
+
+@router.get("/sessions", response_model=ListSessionsResponse)
+async def list_sessions(
+ auth: AuthDep,
+ db: AppDbDep,
+ limit: int = Query(20, ge=1, le=100),
+ offset: int = Query(0, ge=0),
+) -> ListSessionsResponse:
+ """List the user's assistant sessions."""
+ rows = await db.fetch_all(
+ """
+ SELECT
+ s.id,
+ s.title,
+ s.created_at,
+ s.last_activity,
+ s.token_count,
+ COUNT(m.id) as message_count
+ FROM assistant_sessions s
+ LEFT JOIN assistant_messages m ON m.session_id = s.id
+ WHERE s.tenant_id = $1
+ AND ($2::uuid IS NULL OR s.user_id = $2)
+ GROUP BY s.id
+ ORDER BY s.last_activity DESC
+ LIMIT $3 OFFSET $4
+ """,
+ auth.tenant_id,
+ auth.user_id,
+ limit,
+ offset,
+ )
+
+ sessions = [
+ SessionSummary(
+ id=row["id"],
+ title=row["title"],
+ created_at=row["created_at"],
+ last_activity=row["last_activity"],
+ message_count=row["message_count"],
+ token_count=row["token_count"] or 0,
+ )
+ for row in rows
+ ]
+
+ return ListSessionsResponse(sessions=sessions)
+
+
+@router.get("/investigations/{investigation_id}/sessions", response_model=ListSessionsResponse)
+async def list_sessions_for_investigation(
+ investigation_id: UUID,
+ auth: AuthDep,
+ db: AppDbDep,
+ limit: int = Query(20, ge=1, le=100),
+ offset: int = Query(0, ge=0),
+) -> ListSessionsResponse:
+ """List assistant sessions linked to an investigation.
+
+ Returns sessions where the investigation is the parent.
+ """
+ rows = await db.fetch_all(
+ """
+ SELECT
+ s.id,
+ s.title,
+ s.created_at,
+ s.last_activity,
+ s.token_count,
+ COUNT(m.id) as message_count
+ FROM assistant_sessions s
+ LEFT JOIN assistant_messages m ON m.session_id = s.id
+ WHERE s.tenant_id = $1
+ AND s.parent_investigation_id = $2
+ GROUP BY s.id
+ ORDER BY s.last_activity DESC
+ LIMIT $3 OFFSET $4
+ """,
+ auth.tenant_id,
+ investigation_id,
+ limit,
+ offset,
+ )
+
+ sessions = [
+ SessionSummary(
+ id=row["id"],
+ title=row["title"],
+ created_at=row["created_at"],
+ last_activity=row["last_activity"],
+ message_count=row["message_count"],
+ token_count=row["token_count"] or 0,
+ )
+ for row in rows
+ ]
+
+ return ListSessionsResponse(sessions=sessions)
+
+
+@router.get("/sessions/{session_id}", response_model=SessionDetailResponse)
+async def get_session(
+ session_id: UUID,
+ auth: AuthDep,
+ db: AppDbDep,
+) -> SessionDetailResponse:
+ """Get full session details with messages."""
+ # Get session
+ session = await db.fetch_one(
+ """
+ SELECT id, investigation_id, title, created_at, last_activity,
+ token_count, parent_investigation_id
+ FROM assistant_sessions
+ WHERE id = $1 AND tenant_id = $2
+ """,
+ session_id,
+ auth.tenant_id,
+ )
+
+ if not session:
+ raise HTTPException(status_code=404, detail="Session not found")
+
+ # Get messages
+ message_rows = await db.fetch_all(
+ """
+ SELECT id, role, content, tool_calls, created_at, token_count
+ FROM assistant_messages
+ WHERE session_id = $1
+ ORDER BY created_at ASC
+ """,
+ session_id,
+ )
+
+ messages = [
+ MessageResponse(
+ id=row["id"],
+ role=MessageRole(row["role"]),
+ content=row["content"],
+ tool_calls=row["tool_calls"],
+ created_at=row["created_at"],
+ token_count=row["token_count"],
+ )
+ for row in message_rows
+ ]
+
+ return SessionDetailResponse(
+ id=session["id"],
+ investigation_id=session["investigation_id"],
+ title=session["title"],
+ created_at=session["created_at"],
+ last_activity=session["last_activity"],
+ token_count=session["token_count"] or 0,
+ messages=messages,
+ parent_investigation_id=session["parent_investigation_id"],
+ )
+
+
+@router.delete("/sessions/{session_id}")
+async def delete_session(
+ session_id: UUID,
+ auth: AuthDep,
+ db: AppDbDep,
+) -> dict[str, str]:
+ """Delete an assistant session."""
+ result = await db.execute(
+ """
+ DELETE FROM assistant_sessions
+ WHERE id = $1 AND tenant_id = $2
+ """,
+ session_id,
+ auth.tenant_id,
+ )
+
+ if result == "DELETE 0":
+ raise HTTPException(status_code=404, detail="Session not found")
+
+ return {"status": "deleted"}
+
+
+# =============================================================================
+# Message Endpoints
+# =============================================================================
+
+
+@router.post("/sessions/{session_id}/messages", response_model=SendMessageResponse)
+async def send_message(
+ session_id: UUID,
+ request_body: SendMessageRequest,
+ auth: AuthDep,
+ db: AppDbDep,
+) -> SendMessageResponse:
+ """Send a message to the assistant.
+
+ The response will be streamed via the /stream endpoint.
+ """
+ # Verify session exists and belongs to tenant
+ session = await db.fetch_one(
+ """
+ SELECT id, investigation_id FROM assistant_sessions
+ WHERE id = $1 AND tenant_id = $2
+ """,
+ session_id,
+ auth.tenant_id,
+ )
+
+ if not session:
+ raise HTTPException(status_code=404, detail="Session not found")
+
+ # Store user message
+ user_msg = await db.fetch_one(
+ """
+ INSERT INTO assistant_messages (session_id, role, content)
+ VALUES ($1, 'user', $2)
+ RETURNING id
+ """,
+ session_id,
+ request_body.content,
+ )
+
+ if not user_msg:
+ raise HTTPException(status_code=500, detail="Failed to store message")
+
+ # Initialize the stream queue for this session
+ queue: asyncio.Queue[dict[str, Any]] = asyncio.Queue()
+ _active_streams[str(session_id)] = queue
+
+ # Start background task to process the message
+ asyncio.create_task(
+ _process_message(
+ session_id=session_id,
+ message_content=request_body.content,
+ page_context=request_body.page_context,
+ auth=auth,
+ db=db,
+ queue=queue,
+ )
+ )
+
+ return SendMessageResponse(
+ message_id=user_msg["id"],
+ status="processing",
+ )
+
+
+async def _load_parent_investigation_context(
+ db: AppDatabase,
+ session_id: UUID,
+ tenant_id: UUID,
+) -> dict[str, Any] | None:
+ """Load parent investigation context for a session.
+
+ Args:
+ db: Application database.
+ session_id: The assistant session ID.
+ tenant_id: Tenant ID for security check.
+
+ Returns:
+ Parent investigation context dict or None if no parent.
+ """
+ # Get session with parent_investigation_id
+ session = await db.fetch_one(
+ """
+ SELECT parent_investigation_id FROM assistant_sessions
+ WHERE id = $1 AND tenant_id = $2
+ """,
+ session_id,
+ tenant_id,
+ )
+
+ if not session or not session.get("parent_investigation_id"):
+ return None
+
+ parent_id = session["parent_investigation_id"]
+
+ # Load parent investigation
+ investigation = await db.fetch_one(
+ """
+ SELECT id, dataset_id, metric_name, status, severity,
+ expected_value, actual_value, deviation_pct, anomaly_date,
+ finding, events, metadata, created_at, completed_at
+ FROM investigations
+ WHERE id = $1 AND tenant_id = $2
+ """,
+ parent_id,
+ tenant_id,
+ )
+
+ if not investigation:
+ return None
+
+ return {
+ "parent_investigation": {
+ "id": str(investigation["id"]),
+ "dataset_id": investigation["dataset_id"],
+ "metric_name": investigation["metric_name"],
+ "status": investigation["status"],
+ "severity": investigation.get("severity"),
+ "expected_value": investigation.get("expected_value"),
+ "actual_value": investigation.get("actual_value"),
+ "deviation_pct": investigation.get("deviation_pct"),
+ "anomaly_date": investigation.get("anomaly_date"),
+ "finding": investigation.get("finding"),
+ "events": investigation.get("events"),
+ "metadata": investigation.get("metadata"),
+ }
+ }
+
+
+async def _process_message(
+ session_id: UUID,
+ message_content: str,
+ page_context: PageContext | None,
+ auth: ApiKeyContext,
+ db: AppDatabase,
+ queue: asyncio.Queue[dict[str, Any]],
+) -> None:
+ """Process a message via Temporal workflow and send events to the queue.
+
+ This function:
+ 1. Gets or creates a Temporal workflow for the session
+ 2. Sends the message via signal
+ 3. Polls for the response
+ 4. Streams the response via SSE
+
+ Args:
+ session_id: The session ID.
+ message_content: The user's message.
+ page_context: Optional context about the page the user is viewing.
+ auth: Authentication context.
+ db: Application database.
+ queue: Queue for SSE events.
+ """
+ try:
+ # Get Temporal client
+ temporal_client = await get_temporal_client()
+
+ # Get conversation history for context
+ history_rows = await db.fetch_all(
+ """
+ SELECT role, content FROM assistant_messages
+ WHERE session_id = $1
+ ORDER BY created_at ASC
+ LIMIT 50
+ """,
+ session_id,
+ )
+
+ # Build context with history
+ history = [{"role": r["role"], "content": r["content"]} for r in history_rows]
+ context: dict[str, Any] = {"history": history} if history else {}
+
+ # Load parent investigation context if available
+ parent_context = await _load_parent_investigation_context(db, session_id, auth.tenant_id)
+ if parent_context:
+ context.update(parent_context)
+
+ # Attach page context if provided
+ if page_context:
+ context["page_context"] = page_context.model_dump()
+
+ logger.info(f"[ASSISTANT] Processing message via Temporal: {message_content[:100]}...")
+
+ # Get or create workflow for this session
+ session_str = str(session_id)
+ workflow_exists = await temporal_client.workflow_exists(session_str)
+
+ if workflow_exists:
+ # Update context and send message to existing workflow
+ logger.info(f"[ASSISTANT] Signaling existing workflow for session {session_str}")
+ await temporal_client.update_context(session_str, context)
+ await temporal_client.send_message(session_str, message_content)
+ else:
+ # Start new workflow with initial message
+ logger.info(f"[ASSISTANT] Starting new workflow for session {session_str}")
+ await temporal_client.start_session(
+ agent_name="dataing-assistant",
+ session_id=session_str,
+ tenant_id=str(auth.tenant_id),
+ context=context,
+ initial_message=message_content,
+ )
+
+ # Poll for response with timeout
+ response = await temporal_client.wait_for_response(
+ session_id=session_str,
+ timeout_seconds=300, # 5 minutes
+ poll_interval=0.5,
+ )
+
+ if response is None:
+ raise TimeoutError("Agent did not respond within timeout")
+
+ logger.info(f"[ASSISTANT] Response length: {len(response)}")
+
+ # Send the full response as a single text event
+ await queue.put(
+ {
+ "event": SSEEventType.TEXT.value,
+ "data": to_json_string({"text": response}),
+ }
+ )
+
+ # Store assistant response
+ await db.execute(
+ """
+ INSERT INTO assistant_messages (session_id, role, content)
+ VALUES ($1, 'assistant', $2)
+ """,
+ session_id,
+ response,
+ )
+
+ # Send completion event
+ await queue.put(
+ {
+ "event": SSEEventType.COMPLETE.value,
+ "data": to_json_string({"status": "complete"}),
+ }
+ )
+
+ except RPCError as e:
+ logger.exception(f"Temporal RPC error for session {session_id}")
+ await queue.put(
+ {
+ "event": SSEEventType.ERROR.value,
+ "data": to_json_string({"error": f"Temporal error: {e}"}),
+ }
+ )
+
+ except TimeoutError as e:
+ logger.exception(f"Timeout processing message for session {session_id}")
+ await queue.put(
+ {
+ "event": SSEEventType.ERROR.value,
+ "data": to_json_string({"error": str(e)}),
+ }
+ )
+
+ except Exception as e:
+ logger.exception(f"Error processing message for session {session_id}")
+ await queue.put(
+ {
+ "event": SSEEventType.ERROR.value,
+ "data": to_json_string({"error": str(e)}),
+ }
+ )
+
+ finally:
+ # Clean up the stream
+ if str(session_id) in _active_streams:
+ del _active_streams[str(session_id)]
+
+
+@router.get("/sessions/{session_id}/stream")
+async def stream_response(
+ request: Request,
+ session_id: UUID,
+ auth: AuthDep,
+ db: AppDbDep,
+ last_event_id: int | None = Query(None, description="Resume from event ID"),
+) -> EventSourceResponse:
+ """Stream assistant responses via Server-Sent Events.
+
+ Connect to this endpoint after sending a message to receive real-time
+ updates including text chunks, tool calls, and completion status.
+ """
+ # Verify session exists
+ session = await db.fetch_one(
+ """
+ SELECT id FROM assistant_sessions
+ WHERE id = $1 AND tenant_id = $2
+ """,
+ session_id,
+ auth.tenant_id,
+ )
+
+ if not session:
+ raise HTTPException(status_code=404, detail="Session not found")
+
+ async def event_generator() -> AsyncIterator[dict[str, Any]]:
+ """Generate SSE events."""
+ queue = _active_streams.get(str(session_id))
+ event_id = 0
+ last_heartbeat = datetime.now(UTC)
+
+ try:
+ while True:
+ # Check for client disconnect
+ if await request.is_disconnected():
+ logger.info(f"Client disconnected from session {session_id}")
+ break
+
+ # Check stream duration limit
+ stream_duration = (datetime.now(UTC) - last_heartbeat).total_seconds()
+ if stream_duration > MAX_STREAM_DURATION_SECONDS:
+ yield {
+ "event": "timeout",
+ "data": to_json_string({"message": "Stream timeout"}),
+ }
+ break
+
+ # Try to get event from queue
+ if queue:
+ try:
+ event = await asyncio.wait_for(queue.get(), timeout=1.0)
+ event_id += 1
+
+ # Skip events before last_event_id (for resumption)
+ if last_event_id and event_id <= last_event_id:
+ continue
+
+ event["id"] = str(event_id)
+ yield event
+
+ # Check for completion
+ if event.get("event") in (
+ SSEEventType.COMPLETE.value,
+ SSEEventType.ERROR.value,
+ ):
+ break
+
+ except TimeoutError:
+ pass
+
+ # Send heartbeat
+ now = datetime.now(UTC)
+ if (now - last_heartbeat).total_seconds() >= HEARTBEAT_INTERVAL_SECONDS:
+ yield {
+ "event": SSEEventType.HEARTBEAT.value,
+ "data": to_json_string({"timestamp": now.isoformat()}),
+ }
+ last_heartbeat = now
+
+ # If no queue yet, wait for it
+ if not queue:
+ await asyncio.sleep(0.5)
+ queue = _active_streams.get(str(session_id))
+
+ except asyncio.CancelledError:
+ logger.info(f"SSE stream cancelled for session {session_id}")
+
+ return EventSourceResponse(
+ event_generator(),
+ headers={"X-Accel-Buffering": "no"},
+ )
+
+
+# =============================================================================
+# Export Endpoint
+# =============================================================================
+
+
+@router.post("/sessions/{session_id}/export")
+async def export_session(
+ session_id: UUID,
+ auth: AuthDep,
+ db: AppDbDep,
+ format: ExportFormat = Query(ExportFormat.MARKDOWN), # noqa: B008
+) -> dict[str, Any]:
+ """Export a session as JSON or Markdown."""
+ # Get session with messages
+ session = await get_session(session_id, auth, db)
+
+ if format == ExportFormat.JSON:
+ return {
+ "format": "json",
+ "content": session.model_dump(mode="json"),
+ }
+
+ # Markdown format
+ lines = [
+ "# Assistant Session",
+ "",
+ f"**Session ID:** {session.id}",
+ f"**Created:** {session.created_at.isoformat()}",
+ f"**Messages:** {len(session.messages)}",
+ "",
+ "---",
+ "",
+ ]
+
+ for msg in session.messages:
+ role_label = msg.role.value.upper()
+ lines.append(f"## {role_label}")
+ lines.append("")
+ lines.append(msg.content)
+ lines.append("")
+
+ if msg.tool_calls:
+ lines.append("**Tool Calls:**")
+ for tc in msg.tool_calls:
+ lines.append(f"- `{tc.get('name', 'unknown')}`")
+ lines.append("")
+
+ lines.append("---")
+ lines.append("")
+
+ return {
+ "format": "markdown",
+ "content": "\n".join(lines),
+ }
diff --git a/python-packages/dataing/src/dataing/entrypoints/api/routes/investigation_feedback.py b/python-packages/dataing/src/dataing/entrypoints/api/routes/investigation_feedback.py
index 32a5cd5d8..a3712d7f9 100644
--- a/python-packages/dataing/src/dataing/entrypoints/api/routes/investigation_feedback.py
+++ b/python-packages/dataing/src/dataing/entrypoints/api/routes/investigation_feedback.py
@@ -29,10 +29,16 @@ class FeedbackCreate(BaseModel):
"""Request body for submitting feedback."""
target_type: Literal[
- "hypothesis", "query", "evidence", "synthesis", "investigation", "recommendation"
+ "hypothesis",
+ "query",
+ "evidence",
+ "synthesis",
+ "investigation",
+ "recommendation",
+ "assistant_message",
]
target_id: str # Can be UUID or composite ID like "{investigation_id}-rec-{index}"
- investigation_id: UUID
+ investigation_id: UUID | None = None # Optional for assistant messages
rating: Literal[1, -1]
reason: str | None = None
comment: str | None = None
@@ -53,6 +59,7 @@ class FeedbackResponse(BaseModel):
"synthesis": EventType.FEEDBACK_SYNTHESIS,
"investigation": EventType.FEEDBACK_INVESTIGATION,
"recommendation": EventType.FEEDBACK_RECOMMENDATION,
+ "assistant_message": EventType.FEEDBACK_ASSISTANT_MESSAGE,
}
diff --git a/python-packages/dataing/src/dataing/entrypoints/temporal_worker.py b/python-packages/dataing/src/dataing/entrypoints/temporal_worker.py
index 1b5de9a97..462d1f017 100644
--- a/python-packages/dataing/src/dataing/entrypoints/temporal_worker.py
+++ b/python-packages/dataing/src/dataing/entrypoints/temporal_worker.py
@@ -2,8 +2,8 @@
This module creates a production-ready Temporal worker that:
- Connects to Temporal using settings from environment
-- Wires all 8 activities with factory closures capturing dependencies
-- Registers both InvestigationWorkflow and EvaluateHypothesisWorkflow
+- Wires all activities with factory closures capturing dependencies
+- Registers InvestigationWorkflow, EvaluateHypothesisWorkflow, and AgentWorkflow
- Sets appropriate concurrency limits
Usage:
@@ -35,6 +35,7 @@
from dataing.core.snapshot_store import LocalSnapshotStore
from dataing.entrypoints.api.deps import settings
from dataing.temporal.activities import (
+ make_agent_turn_activity,
make_capture_snapshot_activity,
make_check_patterns_activity,
make_counter_analyze_activity,
@@ -47,7 +48,13 @@
make_synthesize_activity,
)
from dataing.temporal.adapters import TemporalAgentAdapter
-from dataing.temporal.workflows import EvaluateHypothesisWorkflow, InvestigationWorkflow
+from dataing.temporal.agents import AgentRegistry
+from dataing.temporal.agents.assistant_agent import AssistantTemporalAgent
+from dataing.temporal.workflows import (
+ AgentWorkflow,
+ EvaluateHypothesisWorkflow,
+ InvestigationWorkflow,
+)
logging.basicConfig(
level=logging.INFO,
@@ -93,15 +100,46 @@ async def create_dependencies() -> dict[str, Any]:
snapshot_store = LocalSnapshotStore("/tmp/dataing/snapshots")
logger.info("Snapshot store initialized")
+ # Agent registry for unified agent execution
+ agent_registry = create_agent_registry()
+ logger.info("Agent registry initialized")
+
return {
"app_db": app_db,
"agent_adapter": agent_adapter,
"context_engine": context_engine,
"pattern_repository": pattern_repository,
"snapshot_store": snapshot_store,
+ "agent_registry": agent_registry,
}
+def create_agent_registry() -> AgentRegistry:
+ """Create and populate the agent registry.
+
+ Returns:
+ Configured AgentRegistry with all available agents.
+ """
+ registry = AgentRegistry()
+
+ # Register the assistant agent
+ # Note: Tenant ID will be passed via activity context
+ # repo_path is set via DATAING_REPO_ROOT environment variable
+ assistant_agent = AssistantTemporalAgent(
+ api_key=settings.anthropic_api_key,
+ tenant_id="worker-default", # Will be overridden by activity input
+ model=settings.llm_model,
+ repo_path=settings.repo_root, # /repo in container, . locally
+ )
+ registry.register(assistant_agent)
+
+ logger.info(
+ f"Agent registry created with agents: {registry.list_agents()}, "
+ f"repo_root={settings.repo_root}"
+ )
+ return registry
+
+
def create_activities(deps: dict[str, Any]) -> list[Any]:
"""Create all activity functions with injected dependencies.
@@ -116,6 +154,7 @@ def create_activities(deps: dict[str, Any]) -> list[Any]:
pattern_repository = deps["pattern_repository"]
app_db = deps["app_db"]
snapshot_store = deps["snapshot_store"]
+ agent_registry = deps["agent_registry"]
# Cache for adapters to avoid recreating them
adapter_cache: dict[str, BaseAdapter] = {}
@@ -213,6 +252,8 @@ async def execute_query(self, sql: str, datasource_id: str | None = None) -> dic
adapter_database = AdapterDatabase(get_adapter)
activities = [
+ # Agent turn activity (generic for all agents)
+ make_agent_turn_activity(registry=agent_registry),
# Snapshot capture (fire-and-forget)
make_capture_snapshot_activity(snapshot_store=snapshot_store),
# Context and pattern activities
@@ -269,7 +310,7 @@ async def run_worker() -> None:
worker = Worker(
client,
task_queue=settings.TEMPORAL_TASK_QUEUE,
- workflows=[InvestigationWorkflow, EvaluateHypothesisWorkflow],
+ workflows=[InvestigationWorkflow, EvaluateHypothesisWorkflow, AgentWorkflow],
activities=activities,
max_concurrent_activities=MAX_CONCURRENT_ACTIVITIES,
max_concurrent_workflow_tasks=MAX_CONCURRENT_WORKFLOW_TASKS,
diff --git a/python-packages/dataing/src/dataing/temporal/__init__.py b/python-packages/dataing/src/dataing/temporal/__init__.py
index ee37a5326..830c06478 100644
--- a/python-packages/dataing/src/dataing/temporal/__init__.py
+++ b/python-packages/dataing/src/dataing/temporal/__init__.py
@@ -3,8 +3,10 @@
This package provides:
- InvestigationWorkflow: Main workflow for investigation orchestration
- EvaluateHypothesisWorkflow: Child workflow for parallel hypothesis evaluation
-- Activities: All investigation step activities
-- TemporalInvestigationClient: High-level client for workflow interaction
+- AgentWorkflow: Generic workflow for running any registered agent
+- Activities: All investigation step activities + generic agent_turn activity
+- TemporalInvestigationClient: High-level client for investigation workflows
+- TemporalAgentClient: High-level client for agent workflows
- Worker: Temporal worker to process workflows
Usage:
@@ -12,13 +14,17 @@
python -m dataing.temporal.worker
# Or import components
- from dataing.temporal.workflows import InvestigationWorkflow, EvaluateHypothesisWorkflow
- from dataing.temporal.client import TemporalInvestigationClient
- from dataing.temporal.activities import gather_context, generate_hypotheses, synthesize
+ from dataing.temporal.workflows import InvestigationWorkflow, AgentWorkflow
+ from dataing.temporal.client import TemporalInvestigationClient, TemporalAgentClient
+ from dataing.temporal.agents import AgentRegistry, TemporalAgentProtocol
- # Client usage
+ # Investigation client usage
client = await TemporalInvestigationClient.connect()
handle = await client.start_investigation(...)
- await client.cancel_investigation(investigation_id)
- await client.send_user_input(investigation_id, {"feedback": "..."})
+
+ # Agent client usage
+ agent_client = await TemporalAgentClient.connect()
+ await agent_client.start_session("dataing-assistant", "sess-123", "tenant-1")
+ await agent_client.send_message("sess-123", "What's wrong?")
+ response = await agent_client.wait_for_response("sess-123")
"""
diff --git a/python-packages/dataing/src/dataing/temporal/activities/__init__.py b/python-packages/dataing/src/dataing/temporal/activities/__init__.py
index 51e7c51d3..6ff47bf57 100644
--- a/python-packages/dataing/src/dataing/temporal/activities/__init__.py
+++ b/python-packages/dataing/src/dataing/temporal/activities/__init__.py
@@ -15,6 +15,10 @@
# Factory functions (for production with dependency injection)
# Input/Result dataclasses
+from dataing.temporal.activities.agent_turn import (
+ AgentTurnActivityInput,
+ make_agent_turn_activity,
+)
from dataing.temporal.activities.capture_snapshot import (
CaptureSnapshotInput,
CaptureSnapshotResult,
@@ -68,6 +72,7 @@
__all__ = [
# Factory functions
+ "make_agent_turn_activity",
"make_capture_snapshot_activity",
"make_gather_context_activity",
"make_check_patterns_activity",
@@ -79,6 +84,7 @@
"make_counter_analyze_activity",
"make_finalize_evidence_chain_activity",
# Input/Result types
+ "AgentTurnActivityInput",
"CaptureSnapshotInput",
"CaptureSnapshotResult",
"GatherContextInput",
diff --git a/python-packages/dataing/src/dataing/temporal/activities/agent_turn.py b/python-packages/dataing/src/dataing/temporal/activities/agent_turn.py
new file mode 100644
index 000000000..76028e36c
--- /dev/null
+++ b/python-packages/dataing/src/dataing/temporal/activities/agent_turn.py
@@ -0,0 +1,126 @@
+"""Generic agent turn activity for Temporal workflows.
+
+This activity executes a single turn of any registered agent, providing
+consistent observability and logging across all agent types.
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from dataclasses import dataclass
+from typing import Any
+
+from temporalio import activity
+
+from dataing.temporal.agents.protocol import AgentTurnInput
+from dataing.temporal.agents.registry import AgentRegistry
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class AgentTurnActivityInput:
+ """Input for the agent_turn activity (Temporal-serializable)."""
+
+ agent_name: str
+ message: str
+ session_id: str
+ context: dict[str, Any]
+ tenant_id: str
+
+
+def make_agent_turn_activity(registry: AgentRegistry) -> Any:
+ """Create the agent_turn activity with injected registry.
+
+ Args:
+ registry: Agent registry containing registered agents.
+
+ Returns:
+ The agent_turn activity function.
+ """
+
+ @activity.defn(name="agent_turn")
+ async def agent_turn(input: AgentTurnActivityInput) -> dict[str, Any]:
+ """Execute a single turn of an agent.
+
+ This activity:
+ 1. Looks up the agent in the registry
+ 2. Executes the turn with heartbeating
+ 3. Logs tool calls and metrics for observability
+ 4. Returns the serialized result
+
+ Args:
+ input: Agent turn input with message and context.
+
+ Returns:
+ Serialized AgentTurnResult as a dictionary.
+
+ Raises:
+ KeyError: If the agent is not registered.
+ Exception: If the agent turn fails.
+ """
+ logger.info(f"Starting agent turn: agent={input.agent_name}, session={input.session_id}")
+
+ # Get the agent from registry
+ try:
+ agent = registry.get(input.agent_name)
+ except KeyError as e:
+ logger.error(f"Agent not found: {input.agent_name}")
+ raise e
+
+ # Heartbeat to indicate we're starting
+ activity.heartbeat(f"Starting turn for {input.agent_name}")
+
+ start = time.monotonic()
+
+ # Build context with tenant_id
+ context = input.context.copy()
+ context["tenant_id"] = input.tenant_id
+
+ # Execute the agent turn
+ result = await agent.run_turn(
+ message=input.message,
+ session_id=input.session_id,
+ context=context,
+ )
+
+ duration_ms = int((time.monotonic() - start) * 1000)
+
+ # Heartbeat with progress
+ activity.heartbeat(f"Turn completed for {input.agent_name}")
+
+ # Log for Temporal visibility
+ activity.logger.info(
+ f"Agent turn completed: "
+ f"agent={input.agent_name}, "
+ f"session={input.session_id}, "
+ f"tool_calls={len(result.tool_calls)}, "
+ f"tokens={result.tokens_used}, "
+ f"duration_ms={duration_ms}, "
+ f"is_complete={result.is_complete}"
+ )
+
+ # Return serialized result
+ result_dict: dict[str, Any] = result.to_dict()
+ return result_dict
+
+ return agent_turn
+
+
+def from_input(input: AgentTurnInput) -> AgentTurnActivityInput:
+ """Convert AgentTurnInput to AgentTurnActivityInput.
+
+ Args:
+ input: AgentTurnInput instance.
+
+ Returns:
+ AgentTurnActivityInput for the activity.
+ """
+ return AgentTurnActivityInput(
+ agent_name=input.agent_name,
+ message=input.message,
+ session_id=input.session_id,
+ context=input.context,
+ tenant_id=input.tenant_id,
+ )
diff --git a/python-packages/dataing/src/dataing/temporal/agents/__init__.py b/python-packages/dataing/src/dataing/temporal/agents/__init__.py
new file mode 100644
index 000000000..0fcb87e8f
--- /dev/null
+++ b/python-packages/dataing/src/dataing/temporal/agents/__init__.py
@@ -0,0 +1,30 @@
+"""Unified Temporal interface for agents.
+
+This package provides a DRY interface that allows any agent (Investigation, Assistant,
+future agents) to run through Temporal with consistent observability.
+
+Design Decision: LLM Turns as Activities
+Each LLM request/response cycle is a Temporal activity. Tools execute within that
+activity and are logged, but don't create separate activities. This balances
+visibility with overhead.
+"""
+
+from dataing.temporal.agents.protocol import (
+ AgentTurnInput,
+ AgentTurnResult,
+ AgentWorkflowInput,
+ AgentWorkflowResult,
+ TemporalAgentProtocol,
+ ToolCall,
+)
+from dataing.temporal.agents.registry import AgentRegistry
+
+__all__ = [
+ "AgentRegistry",
+ "AgentTurnInput",
+ "AgentTurnResult",
+ "AgentWorkflowInput",
+ "AgentWorkflowResult",
+ "TemporalAgentProtocol",
+ "ToolCall",
+]
diff --git a/python-packages/dataing/src/dataing/temporal/agents/assistant_agent.py b/python-packages/dataing/src/dataing/temporal/agents/assistant_agent.py
new file mode 100644
index 000000000..5d6759ba0
--- /dev/null
+++ b/python-packages/dataing/src/dataing/temporal/agents/assistant_agent.py
@@ -0,0 +1,128 @@
+"""Assistant agent adapter for Temporal execution.
+
+Wraps the existing DataingAssistant for use with the unified Temporal interface.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any
+
+from dataing.agents.assistant import DataingAssistant
+from dataing.temporal.agents.base import BaseTemporalAgent
+from dataing.temporal.agents.protocol import AgentTurnResult
+
+logger = logging.getLogger(__name__)
+
+
+class AssistantTemporalAgent(BaseTemporalAgent):
+ """Wraps DataingAssistant for Temporal execution.
+
+ This adapter allows the existing DataingAssistant to run through the
+ unified Temporal agent workflow, providing:
+ - Durable execution with automatic retries
+ - Observability through Temporal UI
+ - Consistent interface with other agents
+ """
+
+ AGENT_NAME = "dataing-assistant"
+
+ def __init__(
+ self,
+ api_key: str,
+ tenant_id: str,
+ *,
+ model: str = "claude-sonnet-4-20250514",
+ repo_path: str | Path = ".",
+ github_token: str | None = None,
+ log_directories: list[str] | None = None,
+ ) -> None:
+ """Initialize the assistant agent.
+
+ Args:
+ api_key: Anthropic API key.
+ tenant_id: Tenant ID for multi-tenancy isolation.
+ model: LLM model to use.
+ repo_path: Path to local git repository.
+ github_token: Optional GitHub token for git tools.
+ log_directories: Directories to scan for log files.
+ """
+ super().__init__(name=self.AGENT_NAME, tenant_id=tenant_id)
+
+ # Create the underlying DataingAssistant
+ self._assistant = DataingAssistant(
+ api_key=api_key,
+ tenant_id=tenant_id,
+ model=model,
+ repo_path=repo_path,
+ github_token=github_token,
+ log_directories=log_directories,
+ )
+
+ logger.info(f"AssistantTemporalAgent initialized for tenant {tenant_id}")
+
+ async def _execute_turn(
+ self,
+ message: str,
+ session_id: str,
+ context: dict[str, Any],
+ ) -> AgentTurnResult:
+ """Execute one LLM turn using the DataingAssistant.
+
+ Args:
+ message: The user's message or prompt.
+ session_id: Session ID for conversation continuity.
+ context: Additional context (history, page context, etc.).
+
+ Returns:
+ AgentTurnResult with response and metadata.
+ """
+ logger.info(f"Processing message for session {session_id}: {message[:100]}...")
+
+ # Call the underlying assistant
+ # Note: DataingAssistant doesn't expose tool call details directly,
+ # so we capture what we can from the response
+ response = await self._assistant.ask(
+ message,
+ session_id=session_id,
+ context=context,
+ )
+
+ logger.info(f"Assistant response length: {len(response)}")
+
+ # Create the result
+ # Since DataingAssistant handles tools internally, we rely on
+ # the base class tool recording if we add instrumentation later
+ return AgentTurnResult(
+ response=response,
+ tool_calls=self._get_tool_calls(), # From base class if instrumented
+ is_complete=False, # Assistant conversations are ongoing until user ends
+ tokens_used=0, # DataingAssistant doesn't expose token counts yet
+ metadata={
+ "session_id": session_id,
+ "tenant_id": self._tenant_id,
+ },
+ )
+
+
+def create_assistant_agent(
+ api_key: str,
+ tenant_id: str,
+ **kwargs: Any,
+) -> AssistantTemporalAgent:
+ """Factory function to create an AssistantTemporalAgent.
+
+ Args:
+ api_key: Anthropic API key.
+ tenant_id: Tenant ID for multi-tenancy.
+ **kwargs: Additional arguments passed to AssistantTemporalAgent.
+
+ Returns:
+ Configured AssistantTemporalAgent instance.
+ """
+ return AssistantTemporalAgent(
+ api_key=api_key,
+ tenant_id=tenant_id,
+ **kwargs,
+ )
diff --git a/python-packages/dataing/src/dataing/temporal/agents/base.py b/python-packages/dataing/src/dataing/temporal/agents/base.py
new file mode 100644
index 000000000..5ecd05f20
--- /dev/null
+++ b/python-packages/dataing/src/dataing/temporal/agents/base.py
@@ -0,0 +1,149 @@
+"""Base class for Temporal-compatible agents.
+
+Provides common functionality for tool call logging and instrumentation.
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from abc import ABC, abstractmethod
+from typing import Any
+
+from dataing.temporal.agents.protocol import AgentTurnResult, ToolCall
+
+logger = logging.getLogger(__name__)
+
+
+class BaseTemporalAgent(ABC):
+ """Base class with tool call logging and common functionality.
+
+ Subclasses should implement the `_execute_turn` method to perform the
+ actual LLM interaction. This base class handles:
+ - Tool call tracking and timing
+ - Logging and instrumentation
+ - Standard result formatting
+ """
+
+ def __init__(self, name: str, tenant_id: str | None = None) -> None:
+ """Initialize the base agent.
+
+ Args:
+ name: Unique agent identifier.
+ tenant_id: Optional tenant ID for multi-tenancy.
+ """
+ self._name = name
+ self._tenant_id = tenant_id
+ self._tool_calls: list[ToolCall] = []
+
+ @property
+ def name(self) -> str:
+ """Unique agent identifier."""
+ return self._name
+
+ def _record_tool_call(
+ self,
+ name: str,
+ arguments: dict[str, Any],
+ result: str,
+ duration_ms: int,
+ ) -> None:
+ """Record a tool call for logging.
+
+ Args:
+ name: Tool name.
+ arguments: Tool arguments.
+ result: Tool result (may be truncated for logging).
+ duration_ms: Execution time in milliseconds.
+ """
+ tool_call = ToolCall(
+ name=name,
+ arguments=arguments,
+ result=result[:1000] if len(result) > 1000 else result, # Truncate for logging
+ duration_ms=duration_ms,
+ )
+ self._tool_calls.append(tool_call)
+ logger.debug(f"Tool call recorded: {name} ({duration_ms}ms)")
+
+ def _clear_tool_calls(self) -> None:
+ """Clear recorded tool calls for a new turn."""
+ self._tool_calls = []
+
+ def _get_tool_calls(self) -> list[ToolCall]:
+ """Get recorded tool calls for the current turn.
+
+ Returns:
+ List of tool calls recorded during the turn.
+ """
+ return self._tool_calls.copy()
+
+ async def run_turn(
+ self,
+ message: str,
+ session_id: str,
+ context: dict[str, Any],
+ ) -> AgentTurnResult:
+ """Execute one LLM turn with instrumentation.
+
+ Args:
+ message: The user's message or prompt.
+ session_id: Session ID for conversation continuity.
+ context: Additional context (history, page context, etc.).
+
+ Returns:
+ AgentTurnResult with response, tool calls, and completion status.
+ """
+ # Clear previous turn's tool calls
+ self._clear_tool_calls()
+
+ start_time = time.monotonic()
+
+ try:
+ # Execute the turn (implemented by subclass)
+ result = await self._execute_turn(message, session_id, context)
+
+ # Add recorded tool calls if the subclass didn't provide any
+ if not result.tool_calls:
+ result = AgentTurnResult(
+ response=result.response,
+ tool_calls=self._get_tool_calls(),
+ is_complete=result.is_complete,
+ tokens_used=result.tokens_used,
+ metadata=result.metadata,
+ )
+
+ duration_ms = int((time.monotonic() - start_time) * 1000)
+ logger.info(
+ f"Agent turn completed: agent={self._name}, "
+ f"tools={len(result.tool_calls)}, "
+ f"tokens={result.tokens_used}, "
+ f"duration_ms={duration_ms}"
+ )
+
+ return result
+
+ except Exception as e:
+ duration_ms = int((time.monotonic() - start_time) * 1000)
+ logger.error(
+ f"Agent turn failed: agent={self._name}, error={e}, duration_ms={duration_ms}"
+ )
+ raise
+
+ @abstractmethod
+ async def _execute_turn(
+ self,
+ message: str,
+ session_id: str,
+ context: dict[str, Any],
+ ) -> AgentTurnResult:
+ """Execute the actual LLM turn (implemented by subclass).
+
+ Args:
+ message: The user's message or prompt.
+ session_id: Session ID for conversation continuity.
+ context: Additional context (history, page context, etc.).
+
+ Returns:
+ AgentTurnResult with response, tool calls, and completion status.
+ """
+ ...
diff --git a/python-packages/dataing/src/dataing/temporal/agents/protocol.py b/python-packages/dataing/src/dataing/temporal/agents/protocol.py
new file mode 100644
index 000000000..be1b143f3
--- /dev/null
+++ b/python-packages/dataing/src/dataing/temporal/agents/protocol.py
@@ -0,0 +1,144 @@
+"""Protocol and types for Temporal-based agents.
+
+Defines the interface that all Temporal-compatible agents must implement.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Protocol, runtime_checkable
+
+
+@dataclass
+class ToolCall:
+ """Record of a tool invocation within an LLM turn."""
+
+ name: str
+ arguments: dict[str, Any]
+ result: str
+ duration_ms: int
+
+ def to_dict(self) -> dict[str, Any]:
+ """Convert to dictionary for serialization."""
+ return {
+ "name": self.name,
+ "arguments": self.arguments,
+ "result": self.result,
+ "duration_ms": self.duration_ms,
+ }
+
+ @classmethod
+ def from_dict(cls, data: dict[str, Any]) -> ToolCall:
+ """Create from dictionary."""
+ return cls(
+ name=data["name"],
+ arguments=data.get("arguments", {}),
+ result=data.get("result", ""),
+ duration_ms=data.get("duration_ms", 0),
+ )
+
+
+@dataclass
+class AgentTurnResult:
+ """Result of a single LLM turn (may include tool calls)."""
+
+ response: str
+ tool_calls: list[ToolCall]
+ is_complete: bool # True if agent signals conversation complete
+ tokens_used: int
+ metadata: dict[str, Any] = field(default_factory=dict) # Agent-specific data
+
+ def to_dict(self) -> dict[str, Any]:
+ """Convert to dictionary for Temporal serialization."""
+ return {
+ "response": self.response,
+ "tool_calls": [tc.to_dict() for tc in self.tool_calls],
+ "is_complete": self.is_complete,
+ "tokens_used": self.tokens_used,
+ "metadata": self.metadata,
+ }
+
+ @classmethod
+ def from_dict(cls, data: dict[str, Any]) -> AgentTurnResult:
+ """Create from dictionary."""
+ return cls(
+ response=data.get("response", ""),
+ tool_calls=[ToolCall.from_dict(tc) for tc in data.get("tool_calls", [])],
+ is_complete=data.get("is_complete", False),
+ tokens_used=data.get("tokens_used", 0),
+ metadata=data.get("metadata", {}),
+ )
+
+
+@dataclass
+class AgentTurnInput:
+ """Input to the agent_turn activity."""
+
+ agent_name: str
+ message: str
+ session_id: str
+ context: dict[str, Any] = field(default_factory=dict) # History, page context, etc.
+ tenant_id: str = ""
+
+ def to_dict(self) -> dict[str, Any]:
+ """Convert to dictionary for Temporal serialization."""
+ return {
+ "agent_name": self.agent_name,
+ "message": self.message,
+ "session_id": self.session_id,
+ "context": self.context,
+ "tenant_id": self.tenant_id,
+ }
+
+
+@dataclass
+class AgentWorkflowInput:
+ """Input for starting an agent workflow."""
+
+ agent_name: str
+ session_id: str
+ tenant_id: str
+ context: dict[str, Any] = field(default_factory=dict)
+ initial_message: str | None = None # Optional first message
+
+
+@dataclass
+class AgentWorkflowResult:
+ """Result of a completed agent workflow."""
+
+ session_id: str
+ turns: list[dict[str, Any]] = field(default_factory=list)
+ total_tokens: int = 0
+ status: str = "completed"
+
+
+@runtime_checkable
+class TemporalAgentProtocol(Protocol):
+ """Interface for agents that run in Temporal.
+
+ Any agent implementing this protocol can be registered with the AgentRegistry
+ and executed through the generic AgentWorkflow.
+ """
+
+ @property
+ def name(self) -> str:
+ """Unique agent identifier (e.g., 'dataing-assistant', 'investigation-agent')."""
+ ...
+
+ async def run_turn(
+ self,
+ message: str,
+ session_id: str,
+ context: dict[str, Any],
+ ) -> AgentTurnResult:
+ """Execute one LLM turn (may include tool calls).
+
+ Args:
+ message: The user's message or prompt.
+ session_id: Session ID for conversation continuity.
+ context: Additional context (history, page context, etc.).
+
+ Returns:
+ AgentTurnResult with response, tool calls, and completion status.
+ """
+ ...
diff --git a/python-packages/dataing/src/dataing/temporal/agents/registry.py b/python-packages/dataing/src/dataing/temporal/agents/registry.py
new file mode 100644
index 000000000..107ba34c9
--- /dev/null
+++ b/python-packages/dataing/src/dataing/temporal/agents/registry.py
@@ -0,0 +1,89 @@
+"""Registry for Temporal-compatible agents.
+
+Provides a central registry for agents that can be executed through Temporal workflows.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from dataing.temporal.agents.protocol import TemporalAgentProtocol
+
+logger = logging.getLogger(__name__)
+
+
+class AgentRegistry:
+ """Registry of agents available for Temporal execution.
+
+ This is a simple registry pattern that allows agents to be registered
+ and retrieved by name. The worker uses this to look up agents when
+ executing the agent_turn activity.
+
+ Usage:
+ # Register an agent
+ registry = AgentRegistry()
+ registry.register(my_agent)
+
+ # Get an agent
+ agent = registry.get("dataing-assistant")
+ """
+
+ def __init__(self) -> None:
+ """Initialize the registry."""
+ self._agents: dict[str, TemporalAgentProtocol] = {}
+
+ def register(self, agent: TemporalAgentProtocol) -> None:
+ """Register an agent.
+
+ Args:
+ agent: Agent implementing TemporalAgentProtocol.
+
+ Raises:
+ ValueError: If an agent with the same name is already registered.
+ """
+ if agent.name in self._agents:
+ raise ValueError(f"Agent '{agent.name}' is already registered")
+ self._agents[agent.name] = agent
+ logger.info(f"Registered agent: {agent.name}")
+
+ def get(self, name: str) -> TemporalAgentProtocol:
+ """Get an agent by name.
+
+ Args:
+ name: The agent's unique identifier.
+
+ Returns:
+ The registered agent.
+
+ Raises:
+ KeyError: If no agent with the given name is registered.
+ """
+ if name not in self._agents:
+ available = list(self._agents.keys())
+ raise KeyError(f"Agent '{name}' not found. Available agents: {available}")
+ return self._agents[name]
+
+ def list_agents(self) -> list[str]:
+ """List all registered agent names.
+
+ Returns:
+ List of agent names.
+ """
+ return list(self._agents.keys())
+
+ def has(self, name: str) -> bool:
+ """Check if an agent is registered.
+
+ Args:
+ name: The agent's unique identifier.
+
+ Returns:
+ True if the agent is registered.
+ """
+ return name in self._agents
+
+ def clear(self) -> None:
+ """Clear all registered agents (useful for testing)."""
+ self._agents.clear()
diff --git a/python-packages/dataing/src/dataing/temporal/client.py b/python-packages/dataing/src/dataing/temporal/client.py
index 5c8fb6d53..b9403610c 100644
--- a/python-packages/dataing/src/dataing/temporal/client.py
+++ b/python-packages/dataing/src/dataing/temporal/client.py
@@ -1,12 +1,15 @@
-"""Temporal client for interacting with investigation workflows."""
+"""Temporal client for interacting with investigation and agent workflows."""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
-from temporalio.client import Client
+from temporalio.client import Client, WorkflowHandle
+from temporalio.service import RPCError
+from dataing.temporal.agents.protocol import AgentWorkflowInput, AgentWorkflowResult
+from dataing.temporal.workflows.agent import AgentWorkflow, AgentWorkflowQueryStatus
from dataing.temporal.workflows.investigation import (
InvestigationInput,
InvestigationQueryStatus,
@@ -279,3 +282,324 @@ async def query_status(self, investigation_id: str) -> InvestigationQueryStatus:
handle = await self.get_handle(investigation_id)
status: InvestigationQueryStatus = await handle.query(InvestigationWorkflow.get_status)
return status
+
+
+@dataclass
+class AgentWorkflowStatus:
+ """Status of an agent workflow."""
+
+ workflow_id: str
+ run_id: str | None
+ workflow_status: str # Temporal workflow status
+ session_id: str | None = None
+ agent_name: str | None = None
+ turn_count: int | None = None
+ total_tokens: int | None = None
+ last_response: str | None = None
+ is_processing: bool = False
+
+
+class TemporalAgentClient:
+ """Client for interacting with agent workflows via Temporal.
+
+ This client provides a high-level interface for:
+ - Starting agent sessions (workflows)
+ - Sending messages via signals
+ - Querying session status
+ - Getting responses
+
+ Usage:
+ client = await TemporalAgentClient.connect(
+ host="localhost:7233",
+ namespace="default",
+ task_queue="investigations",
+ )
+
+ # Start or get agent session
+ handle = await client.start_or_get_session(
+ agent_name="dataing-assistant",
+ session_id="sess-123",
+ tenant_id="tenant-1",
+ )
+
+ # Send message
+ await client.send_message("sess-123", "What's wrong with the data?")
+
+ # Poll for response
+ status = await client.get_status("sess-123")
+ """
+
+ def __init__(
+ self,
+ client: Client,
+ task_queue: str = "investigations",
+ ) -> None:
+ """Initialize the Temporal agent client.
+
+ Args:
+ client: Temporal client connection.
+ task_queue: Task queue for agent workflows.
+ """
+ self._client = client
+ self._task_queue = task_queue
+
+ @classmethod
+ async def connect(
+ cls,
+ host: str = "localhost:7233",
+ namespace: str = "default",
+ task_queue: str = "investigations",
+ ) -> TemporalAgentClient:
+ """Connect to Temporal and create client.
+
+ Args:
+ host: Temporal server host.
+ namespace: Temporal namespace.
+ task_queue: Task queue for agent workflows.
+
+ Returns:
+ Connected TemporalAgentClient.
+ """
+ client = await Client.connect(target_host=host, namespace=namespace)
+ return cls(client=client, task_queue=task_queue)
+
+ def _workflow_id(self, session_id: str) -> str:
+ """Generate workflow ID for a session.
+
+ Args:
+ session_id: Session ID.
+
+ Returns:
+ Workflow ID in format "assistant-{session_id}".
+ """
+ return f"assistant-{session_id}"
+
+ async def start_session(
+ self,
+ agent_name: str,
+ session_id: str,
+ tenant_id: str,
+ context: dict[str, Any] | None = None,
+ initial_message: str | None = None,
+ ) -> WorkflowHandle[AgentWorkflowResult, Any]:
+ """Start a new agent session workflow.
+
+ Args:
+ agent_name: Name of the agent to use.
+ session_id: Unique session ID.
+ tenant_id: Tenant ID for multi-tenancy.
+ context: Optional initial context.
+ initial_message: Optional first message to process.
+
+ Returns:
+ Workflow handle for the session.
+ """
+ input_data = AgentWorkflowInput(
+ agent_name=agent_name,
+ session_id=session_id,
+ tenant_id=tenant_id,
+ context=context or {},
+ initial_message=initial_message,
+ )
+
+ handle: WorkflowHandle[AgentWorkflowResult, Any] = await self._client.start_workflow(
+ AgentWorkflow.run,
+ input_data,
+ id=self._workflow_id(session_id),
+ task_queue=self._task_queue,
+ )
+
+ return handle
+
+ async def get_handle(self, session_id: str) -> WorkflowHandle[AgentWorkflowResult, Any]:
+ """Get a handle to an existing session workflow.
+
+ Args:
+ session_id: Session ID.
+
+ Returns:
+ Workflow handle for the session.
+ """
+ handle: WorkflowHandle[AgentWorkflowResult, Any] = self._client.get_workflow_handle(
+ self._workflow_id(session_id),
+ result_type=AgentWorkflowResult,
+ )
+ return handle
+
+ async def workflow_exists(self, session_id: str) -> bool:
+ """Check if a workflow exists for the session.
+
+ Args:
+ session_id: Session ID.
+
+ Returns:
+ True if workflow exists and is running.
+ """
+ try:
+ handle = await self.get_handle(session_id)
+ desc = await handle.describe()
+ # Check if workflow is running
+ status = desc.status
+ if status is None:
+ return False
+ status_name = status.name if hasattr(status, "name") else str(status)
+ return status_name == "RUNNING"
+ except RPCError:
+ return False
+
+ async def start_or_get_session(
+ self,
+ agent_name: str,
+ session_id: str,
+ tenant_id: str,
+ context: dict[str, Any] | None = None,
+ ) -> WorkflowHandle[AgentWorkflowResult, Any]:
+ """Start a new session or get existing one.
+
+ Args:
+ agent_name: Name of the agent to use.
+ session_id: Unique session ID.
+ tenant_id: Tenant ID for multi-tenancy.
+ context: Optional context.
+
+ Returns:
+ Workflow handle for the session.
+ """
+ if await self.workflow_exists(session_id):
+ return await self.get_handle(session_id)
+ return await self.start_session(
+ agent_name=agent_name,
+ session_id=session_id,
+ tenant_id=tenant_id,
+ context=context,
+ )
+
+ async def send_message(self, session_id: str, message: str) -> None:
+ """Send a message to an agent session.
+
+ Args:
+ session_id: Session ID.
+ message: The user's message.
+ """
+ handle = await self.get_handle(session_id)
+ await handle.signal(AgentWorkflow.send_message, message)
+
+ async def update_context(self, session_id: str, context: dict[str, Any]) -> None:
+ """Update the context for a session.
+
+ Args:
+ session_id: Session ID.
+ context: Context to merge with existing context.
+ """
+ handle = await self.get_handle(session_id)
+ await handle.signal(AgentWorkflow.update_context, context)
+
+ async def complete_session(self, session_id: str) -> None:
+ """Complete a session and end the workflow.
+
+ Args:
+ session_id: Session ID.
+ """
+ handle = await self.get_handle(session_id)
+ await handle.signal(AgentWorkflow.complete_session)
+
+ async def get_status(self, session_id: str) -> AgentWorkflowStatus:
+ """Get the status of an agent session.
+
+ Args:
+ session_id: Session ID.
+
+ Returns:
+ Session status including workflow state and progress.
+ """
+ handle = await self.get_handle(session_id)
+ desc = await handle.describe()
+
+ # Map Temporal status
+ status = desc.status
+ if status is None:
+ workflow_status = "unknown"
+ else:
+ status_name = status.name if hasattr(status, "name") else str(status)
+ status_map = {
+ "RUNNING": "running",
+ "COMPLETED": "completed",
+ "FAILED": "failed",
+ "CANCELED": "cancelled",
+ "CANCELLED": "cancelled",
+ "TERMINATED": "terminated",
+ "TIMED_OUT": "timed_out",
+ }
+ workflow_status = status_map.get(status_name, "unknown")
+
+ query_status: AgentWorkflowQueryStatus | None = None
+
+ # If running, try to get detailed status via query
+ if workflow_status == "running":
+ try:
+ query_status = await handle.query(AgentWorkflow.get_status)
+ except Exception:
+ pass
+
+ return AgentWorkflowStatus(
+ workflow_id=self._workflow_id(session_id),
+ run_id=desc.run_id,
+ workflow_status=workflow_status,
+ session_id=query_status.session_id if query_status else None,
+ agent_name=query_status.agent_name if query_status else None,
+ turn_count=query_status.turn_count if query_status else None,
+ total_tokens=query_status.total_tokens if query_status else None,
+ last_response=query_status.last_response if query_status else None,
+ is_processing=query_status.is_processing if query_status else False,
+ )
+
+ async def get_last_turn(self, session_id: str) -> dict[str, Any] | None:
+ """Get the most recent turn from a session.
+
+ Args:
+ session_id: Session ID.
+
+ Returns:
+ Last turn dictionary or None if no turns yet.
+ """
+ handle = await self.get_handle(session_id)
+ result: dict[str, Any] | None = await handle.query(AgentWorkflow.get_last_turn)
+ return result
+
+ async def wait_for_response(
+ self,
+ session_id: str,
+ timeout_seconds: float = 300,
+ poll_interval: float = 0.5,
+ ) -> str | None:
+ """Wait for the agent to finish processing and return the response.
+
+ Args:
+ session_id: Session ID.
+ timeout_seconds: Maximum time to wait.
+ poll_interval: Time between polls.
+
+ Returns:
+ The agent's response, or None if timeout.
+ """
+ import asyncio
+
+ start_time = asyncio.get_event_loop().time()
+
+ while True:
+ status = await self.get_status(session_id)
+
+ # If not processing and we have a response, return it
+ if not status.is_processing and status.last_response:
+ return status.last_response
+
+ # Check for workflow completion or error
+ if status.workflow_status in ("completed", "failed", "cancelled"):
+ return status.last_response
+
+ # Check timeout
+ elapsed = asyncio.get_event_loop().time() - start_time
+ if elapsed >= timeout_seconds:
+ return None
+
+ await asyncio.sleep(poll_interval)
diff --git a/python-packages/dataing/src/dataing/temporal/workflows/__init__.py b/python-packages/dataing/src/dataing/temporal/workflows/__init__.py
index cc3fdc114..f2a97b209 100644
--- a/python-packages/dataing/src/dataing/temporal/workflows/__init__.py
+++ b/python-packages/dataing/src/dataing/temporal/workflows/__init__.py
@@ -1,5 +1,9 @@
"""Temporal workflow definitions for investigation orchestration."""
+from dataing.temporal.workflows.agent import (
+ AgentWorkflow,
+ AgentWorkflowQueryStatus,
+)
from dataing.temporal.workflows.evaluate_hypothesis import (
EvaluateHypothesisInput,
EvaluateHypothesisResult,
@@ -13,6 +17,7 @@
)
__all__ = [
+ # Investigation workflows
"InvestigationWorkflow",
"InvestigationInput",
"InvestigationResult",
@@ -20,4 +25,7 @@
"EvaluateHypothesisWorkflow",
"EvaluateHypothesisInput",
"EvaluateHypothesisResult",
+ # Generic agent workflow
+ "AgentWorkflow",
+ "AgentWorkflowQueryStatus",
]
diff --git a/python-packages/dataing/src/dataing/temporal/workflows/agent.py b/python-packages/dataing/src/dataing/temporal/workflows/agent.py
new file mode 100644
index 000000000..d4ebcc459
--- /dev/null
+++ b/python-packages/dataing/src/dataing/temporal/workflows/agent.py
@@ -0,0 +1,257 @@
+"""Generic agent workflow for Temporal execution.
+
+This workflow provides a unified way to run any registered agent through Temporal,
+with signals for message passing and queries for status monitoring.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import timedelta
+from typing import Any
+
+from temporalio import workflow
+
+with workflow.unsafe.imports_passed_through():
+ from dataing.temporal.activities.agent_turn import AgentTurnActivityInput
+ from dataing.temporal.agents.protocol import (
+ AgentTurnResult,
+ AgentWorkflowInput,
+ AgentWorkflowResult,
+ )
+
+
+@dataclass
+class AgentWorkflowQueryStatus:
+ """Status returned by the get_status query."""
+
+ session_id: str
+ agent_name: str
+ status: str # "waiting", "processing", "complete", "error"
+ turn_count: int
+ total_tokens: int
+ last_response: str | None
+ is_processing: bool
+
+
+@workflow.defn
+class AgentWorkflow:
+ """Generic workflow for running any registered agent.
+
+ This workflow:
+ 1. Waits for messages via signal
+ 2. Executes agent turns via the agent_turn activity
+ 3. Tracks conversation state and tokens
+ 4. Supports status queries
+
+ Usage:
+ # Start workflow
+ handle = await client.start_workflow(
+ AgentWorkflow.run,
+ AgentWorkflowInput(
+ agent_name="dataing-assistant",
+ session_id="sess-123",
+ tenant_id="tenant-456",
+ ),
+ id="assistant-sess-123",
+ task_queue="investigations",
+ )
+
+ # Send message via signal
+ await handle.signal(AgentWorkflow.send_message, "What's wrong?")
+
+ # Query status
+ status = await handle.query(AgentWorkflow.get_status)
+ """
+
+ def __init__(self) -> None:
+ """Initialize workflow state."""
+ self._turns: list[dict[str, Any]] = []
+ self._status = "waiting"
+ self._pending_message: str | None = None
+ self._total_tokens = 0
+ self._last_response: str | None = None
+ self._is_processing = False
+ self._session_id = ""
+ self._agent_name = ""
+ self._tenant_id = ""
+ self._context: dict[str, Any] = {}
+
+ @workflow.signal
+ def send_message(self, message: str) -> None:
+ """Signal to send a new message to the agent.
+
+ Args:
+ message: The user's message to process.
+ """
+ workflow.logger.info(f"Received message signal: {message[:100]}...")
+ self._pending_message = message
+
+ @workflow.signal
+ def update_context(self, context: dict[str, Any]) -> None:
+ """Signal to update the workflow context.
+
+ Args:
+ context: New context to merge with existing context.
+ """
+ self._context.update(context)
+ workflow.logger.info(f"Context updated with keys: {list(context.keys())}")
+
+ @workflow.signal
+ def complete_session(self) -> None:
+ """Signal to complete the session and end the workflow."""
+ workflow.logger.info("Received complete_session signal")
+ self._status = "complete"
+
+ @workflow.query
+ def get_status(self) -> AgentWorkflowQueryStatus:
+ """Query current workflow status.
+
+ Returns:
+ AgentWorkflowQueryStatus with current state.
+ """
+ return AgentWorkflowQueryStatus(
+ session_id=self._session_id,
+ agent_name=self._agent_name,
+ status=self._status,
+ turn_count=len(self._turns),
+ total_tokens=self._total_tokens,
+ last_response=self._last_response,
+ is_processing=self._is_processing,
+ )
+
+ @workflow.query
+ def get_turns(self) -> list[dict[str, Any]]:
+ """Query all recorded turns.
+
+ Returns:
+ List of turn dictionaries.
+ """
+ return self._turns
+
+ @workflow.query
+ def get_last_turn(self) -> dict[str, Any] | None:
+ """Query the most recent turn.
+
+ Returns:
+ Last turn dictionary or None if no turns yet.
+ """
+ return self._turns[-1] if self._turns else None
+
+ @workflow.run
+ async def run(self, input: AgentWorkflowInput) -> AgentWorkflowResult:
+ """Execute the agent workflow.
+
+ This workflow runs a loop that:
+ 1. Waits for a message signal
+ 2. Executes an agent turn
+ 3. Records the result
+ 4. Repeats until complete
+
+ Args:
+ input: Workflow input with agent name and session info.
+
+ Returns:
+ AgentWorkflowResult with all turns and totals.
+ """
+ self._session_id = input.session_id
+ self._agent_name = input.agent_name
+ self._tenant_id = input.tenant_id
+ self._context = input.context.copy() if input.context else {}
+ self._status = "waiting"
+
+ workflow.logger.info(
+ f"Starting agent workflow: agent={input.agent_name}, session={input.session_id}"
+ )
+
+ # Process initial message if provided
+ if input.initial_message:
+ self._pending_message = input.initial_message
+
+ # Main processing loop
+ while self._status not in ("complete", "error"):
+ # Wait for a message or completion signal
+ await workflow.wait_condition(
+ lambda: self._pending_message is not None or self._status == "complete"
+ )
+
+ # Check if we should exit
+ if self._status == "complete":
+ break
+
+ # Get the pending message
+ message = self._pending_message
+ self._pending_message = None
+
+ if message is None:
+ continue
+
+ # Process the message
+ self._is_processing = True
+ self._status = "processing"
+
+ try:
+ # Execute agent turn via activity
+ activity_input = AgentTurnActivityInput(
+ agent_name=self._agent_name,
+ message=message,
+ session_id=self._session_id,
+ context=self._context,
+ tenant_id=self._tenant_id,
+ )
+
+ result_dict = await workflow.execute_activity(
+ "agent_turn",
+ activity_input,
+ start_to_close_timeout=timedelta(minutes=5),
+ heartbeat_timeout=timedelta(minutes=2),
+ )
+
+ # Parse result
+ result = AgentTurnResult.from_dict(result_dict)
+
+ # Record the turn
+ turn_record = {
+ "message": message,
+ "response": result.response,
+ "tool_calls": [tc.to_dict() for tc in result.tool_calls],
+ "tokens_used": result.tokens_used,
+ "is_complete": result.is_complete,
+ }
+ self._turns.append(turn_record)
+ self._total_tokens += result.tokens_used
+ self._last_response = result.response
+
+ workflow.logger.info(
+ f"Turn completed: turn={len(self._turns)}, tokens={result.tokens_used}"
+ )
+
+ # Check if agent signals completion
+ if result.is_complete:
+ self._status = "complete"
+ else:
+ self._status = "waiting"
+
+ except Exception as e:
+ workflow.logger.error(f"Agent turn failed: {e}")
+ self._status = "error"
+ self._turns.append(
+ {
+ "message": message,
+ "error": str(e),
+ }
+ )
+
+ finally:
+ self._is_processing = False
+
+ workflow.logger.info(
+ f"Agent workflow completed: turns={len(self._turns)}, tokens={self._total_tokens}"
+ )
+
+ return AgentWorkflowResult(
+ session_id=self._session_id,
+ turns=self._turns,
+ total_tokens=self._total_tokens,
+ status=self._status,
+ )
diff --git a/python-packages/dataing/tests/unit/agents/test_assistant.py b/python-packages/dataing/tests/unit/agents/test_assistant.py
new file mode 100644
index 000000000..d19982831
--- /dev/null
+++ b/python-packages/dataing/tests/unit/agents/test_assistant.py
@@ -0,0 +1,412 @@
+"""Unit tests for DataingAssistant."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+from uuid import UUID
+
+import pytest
+
+from dataing.agents.assistant import (
+ ASSISTANT_SYSTEM_PROMPT,
+ DataingAssistant,
+ create_assistant,
+)
+
+
+class TestDataingAssistantInit:
+ """Tests for DataingAssistant initialization."""
+
+ def test_init_minimal(self) -> None:
+ """Test initialization with minimal arguments."""
+ with patch("dataing.agents.assistant.AnthropicProvider"):
+ with patch("dataing.agents.assistant.AnthropicModel"):
+ with patch("dataing.agents.assistant.BondAgent"):
+ assistant = DataingAssistant(
+ api_key="test-key",
+ tenant_id="test-tenant",
+ )
+
+ assert assistant._tenant_id == "test-tenant"
+ assert assistant._repo_path == Path(".")
+ assert assistant._github_token is None
+
+ def test_init_with_uuid_tenant(self) -> None:
+ """Test initialization with UUID tenant ID."""
+ tenant_uuid = UUID("12345678-1234-5678-1234-567812345678")
+
+ with patch("dataing.agents.assistant.AnthropicProvider"):
+ with patch("dataing.agents.assistant.AnthropicModel"):
+ with patch("dataing.agents.assistant.BondAgent"):
+ assistant = DataingAssistant(
+ api_key="test-key",
+ tenant_id=tenant_uuid,
+ )
+
+ assert assistant._tenant_id == str(tenant_uuid)
+
+ def test_init_with_all_options(self) -> None:
+ """Test initialization with all options."""
+ with patch("dataing.agents.assistant.AnthropicProvider"):
+ with patch("dataing.agents.assistant.AnthropicModel"):
+ with patch("dataing.agents.assistant.BondAgent"):
+ assistant = DataingAssistant(
+ api_key="test-key",
+ tenant_id="test-tenant",
+ model="claude-opus-4-20250514",
+ repo_path="/path/to/repo",
+ github_token="gh-token",
+ log_directories=["/var/log", "/app/logs"],
+ max_retries=5,
+ )
+
+ assert assistant._repo_path == Path("/path/to/repo")
+ assert assistant._github_token == "gh-token"
+
+
+class TestDataingAssistantTools:
+ """Tests for DataingAssistant tool building."""
+
+ @pytest.fixture
+ def assistant(self) -> DataingAssistant:
+ """Create a test assistant instance."""
+ with patch("dataing.agents.assistant.AnthropicProvider"):
+ with patch("dataing.agents.assistant.AnthropicModel"):
+ with patch("dataing.agents.assistant.BondAgent"):
+ return DataingAssistant(
+ api_key="test-key",
+ tenant_id="test-tenant",
+ )
+
+ def test_build_tools_includes_file_tools(self, assistant: DataingAssistant) -> None:
+ """Test that file tools are included."""
+ tools = assistant._build_tools()
+ tool_names = [t.function.__name__ for t in tools if hasattr(t, "function")]
+
+ # Local file tools
+ assert "read_local_file" in tool_names
+ assert "search_in_files" in tool_names
+ assert "list_directory" in tool_names
+
+ def test_build_tools_includes_docker_tools(self, assistant: DataingAssistant) -> None:
+ """Test that Docker tools are included."""
+ tools = assistant._build_tools()
+ tool_names = [t.function.__name__ for t in tools if hasattr(t, "function")]
+
+ # Docker tools
+ assert "list_docker_containers" in tool_names
+ assert "get_docker_container_status" in tool_names
+ assert "get_docker_container_health" in tool_names
+ assert "get_docker_container_stats" in tool_names
+ assert "find_unhealthy_docker_containers" in tool_names
+
+ def test_build_tools_includes_log_tools(self, assistant: DataingAssistant) -> None:
+ """Test that log tools are included."""
+ tools = assistant._build_tools()
+ tool_names = [t.function.__name__ for t in tools if hasattr(t, "function")]
+
+ # Log tools (bound methods)
+ assert "_get_logs" in tool_names
+ assert "_search_logs" in tool_names
+ assert "_get_recent_errors" in tool_names
+
+
+class TestDataingAssistantLogTools:
+ """Tests for DataingAssistant log tool methods."""
+
+ @pytest.fixture
+ def assistant(self) -> DataingAssistant:
+ """Create a test assistant instance."""
+ with patch("dataing.agents.assistant.AnthropicProvider"):
+ with patch("dataing.agents.assistant.AnthropicModel"):
+ with patch("dataing.agents.assistant.BondAgent"):
+ return DataingAssistant(
+ api_key="test-key",
+ tenant_id="test-tenant",
+ )
+
+ @pytest.mark.asyncio
+ async def test_get_logs_success(self, assistant: DataingAssistant) -> None:
+ """Test successful log retrieval."""
+ from datetime import datetime
+
+ from dataing.agents.tools.log_providers.base import LogEntry, LogResult
+
+ mock_result = LogResult(
+ entries=[
+ LogEntry(
+ timestamp=datetime(2024, 1, 15, 10, 30, 45),
+ message="Application started",
+ level="info",
+ source="/app/logs/app.log",
+ ),
+ LogEntry(
+ timestamp=datetime(2024, 1, 15, 10, 30, 46),
+ message="Database connected",
+ level="info",
+ source="/app/logs/app.log",
+ ),
+ ],
+ source="/app/logs/app.log",
+ )
+
+ assistant._log_provider.get_logs = AsyncMock(return_value=mock_result)
+
+ result = await assistant._get_logs("/app/logs/app.log", max_entries=10)
+
+ assert "2 entries" in result
+ assert "Application started" in result
+ assert "Database connected" in result
+
+ @pytest.mark.asyncio
+ async def test_get_logs_error(self, assistant: DataingAssistant) -> None:
+ """Test log retrieval with error."""
+ from dataing.agents.tools.log_providers.base import LogResult
+
+ mock_result = LogResult(
+ entries=[],
+ source="/nonexistent/log",
+ error="File not found",
+ )
+
+ assistant._log_provider.get_logs = AsyncMock(return_value=mock_result)
+
+ result = await assistant._get_logs("/nonexistent/log")
+
+ assert "Error reading logs" in result
+ assert "File not found" in result
+
+ @pytest.mark.asyncio
+ async def test_search_logs_success(self, assistant: DataingAssistant) -> None:
+ """Test successful log search."""
+ from datetime import datetime
+
+ from dataing.agents.tools.log_providers.base import LogEntry, LogResult
+
+ mock_result = LogResult(
+ entries=[
+ LogEntry(
+ timestamp=datetime(2024, 1, 15, 10, 30, 48),
+ message="ERROR: Connection refused",
+ level="error",
+ source="/app/logs/app.log",
+ ),
+ ],
+ source="multiple",
+ )
+
+ assistant._log_provider.search_logs = AsyncMock(return_value=mock_result)
+
+ result = await assistant._search_logs("ERROR")
+
+ assert "1 entries" in result
+ assert "Connection refused" in result
+
+ @pytest.mark.asyncio
+ async def test_get_recent_errors_success(self, assistant: DataingAssistant) -> None:
+ """Test successful recent errors retrieval."""
+ from datetime import datetime
+
+ from dataing.agents.tools.log_providers.base import LogEntry, LogResult
+
+ mock_result = LogResult(
+ entries=[
+ LogEntry(
+ timestamp=datetime(2024, 1, 15, 10, 30, 48),
+ message="Failed to connect to database",
+ level="error",
+ source="/app/logs/app.log",
+ metadata={
+ "context_before": ["Attempting connection..."],
+ "context_after": ["Retrying in 5 seconds"],
+ },
+ ),
+ ],
+ source="/app/logs/app.log",
+ )
+
+ assistant._log_provider.get_recent_errors = AsyncMock(return_value=mock_result)
+
+ result = await assistant._get_recent_errors("/app/logs/app.log")
+
+ assert "1 found" in result
+ assert "Failed to connect" in result
+ assert "Context before" in result
+ assert "Context after" in result
+
+
+class TestDataingAssistantAsk:
+ """Tests for DataingAssistant.ask method."""
+
+ @pytest.fixture
+ def assistant(self) -> DataingAssistant:
+ """Create a test assistant instance."""
+ with patch("dataing.agents.assistant.AnthropicProvider"):
+ with patch("dataing.agents.assistant.AnthropicModel"):
+ with patch("dataing.agents.assistant.BondAgent") as mock_agent_class:
+ mock_agent = MagicMock()
+ mock_agent.ask = AsyncMock(return_value="Test response")
+ mock_agent_class.return_value = mock_agent
+
+ assistant = DataingAssistant(
+ api_key="test-key",
+ tenant_id="test-tenant",
+ )
+ assistant._agent = mock_agent
+ return assistant
+
+ @pytest.mark.asyncio
+ async def test_ask_simple_question(self, assistant: DataingAssistant) -> None:
+ """Test asking a simple question."""
+ result = await assistant.ask("What containers are running?")
+
+ assert result == "Test response"
+ assistant._agent.ask.assert_called_once()
+
+ @pytest.mark.asyncio
+ async def test_ask_with_session_id(self, assistant: DataingAssistant) -> None:
+ """Test asking with a session ID."""
+ await assistant.ask("Check logs", session_id="session-123")
+
+ call_args = assistant._agent.ask.call_args
+ assert "session-123" in str(call_args)
+
+ @pytest.mark.asyncio
+ async def test_ask_with_context(self, assistant: DataingAssistant) -> None:
+ """Test asking with additional context."""
+ context = {
+ "investigation": {
+ "id": "inv-123",
+ "status": "in_progress",
+ },
+ "datasource": {
+ "name": "production-db",
+ "type": "postgresql",
+ },
+ }
+
+ await assistant.ask("What's the status?", context=context)
+
+ call_args = assistant._agent.ask.call_args
+ prompt = call_args[0][0]
+ assert "inv-123" in prompt
+ assert "production-db" in prompt
+
+ @pytest.mark.asyncio
+ async def test_ask_with_handlers(self, assistant: DataingAssistant) -> None:
+ """Test asking with streaming handlers."""
+ handlers = MagicMock()
+
+ await assistant.ask("Check health", handlers=handlers)
+
+ call_args = assistant._agent.ask.call_args
+ assert call_args.kwargs.get("handlers") == handlers
+
+
+class TestDataingAssistantContextFormatting:
+ """Tests for context formatting."""
+
+ @pytest.fixture
+ def assistant(self) -> DataingAssistant:
+ """Create a test assistant instance."""
+ with patch("dataing.agents.assistant.AnthropicProvider"):
+ with patch("dataing.agents.assistant.AnthropicModel"):
+ with patch("dataing.agents.assistant.BondAgent"):
+ return DataingAssistant(
+ api_key="test-key",
+ tenant_id="test-tenant",
+ )
+
+ def test_format_context_with_investigation(self, assistant: DataingAssistant) -> None:
+ """Test formatting context with investigation."""
+ context = {
+ "investigation": {
+ "id": "inv-456",
+ "status": "completed",
+ "finding": {"root_cause": "Null values in column X"},
+ },
+ }
+
+ result = assistant._format_context(context)
+
+ assert "inv-456" in result
+ assert "completed" in result
+ assert "Null values" in result
+
+ def test_format_context_with_datasource(self, assistant: DataingAssistant) -> None:
+ """Test formatting context with datasource."""
+ context = {
+ "datasource": {
+ "name": "analytics-dw",
+ "type": "snowflake",
+ },
+ }
+
+ result = assistant._format_context(context)
+
+ assert "analytics-dw" in result
+ assert "snowflake" in result
+
+ def test_format_context_with_alerts(self, assistant: DataingAssistant) -> None:
+ """Test formatting context with alerts."""
+ context = {
+ "recent_alerts": [{"id": "1"}, {"id": "2"}, {"id": "3"}],
+ }
+
+ result = assistant._format_context(context)
+
+ assert "Recent alerts: 3" in result
+
+
+class TestCreateAssistant:
+ """Tests for the create_assistant factory function."""
+
+ def test_create_assistant_basic(self) -> None:
+ """Test creating an assistant with basic arguments."""
+ with patch("dataing.agents.assistant.AnthropicProvider"):
+ with patch("dataing.agents.assistant.AnthropicModel"):
+ with patch("dataing.agents.assistant.BondAgent"):
+ assistant = create_assistant(
+ api_key="test-key",
+ tenant_id="test-tenant",
+ )
+
+ assert isinstance(assistant, DataingAssistant)
+ assert assistant._tenant_id == "test-tenant"
+
+ def test_create_assistant_with_kwargs(self) -> None:
+ """Test creating an assistant with additional kwargs."""
+ with patch("dataing.agents.assistant.AnthropicProvider"):
+ with patch("dataing.agents.assistant.AnthropicModel"):
+ with patch("dataing.agents.assistant.BondAgent"):
+ assistant = create_assistant(
+ api_key="test-key",
+ tenant_id="test-tenant",
+ github_token="gh-token",
+ log_directories=["/logs"],
+ )
+
+ assert assistant._github_token == "gh-token"
+
+
+class TestSystemPrompt:
+ """Tests for the system prompt."""
+
+ def test_system_prompt_contains_capabilities(self) -> None:
+ """Test that system prompt describes capabilities."""
+ assert "Infrastructure debugging" in ASSISTANT_SYSTEM_PROMPT
+ assert "Data questions" in ASSISTANT_SYSTEM_PROMPT
+ assert "Investigation support" in ASSISTANT_SYSTEM_PROMPT
+
+ def test_system_prompt_contains_dataing_overview(self) -> None:
+ """Test that system prompt describes Dataing platform."""
+ assert "Dataing" in ASSISTANT_SYSTEM_PROMPT
+ assert "data quality" in ASSISTANT_SYSTEM_PROMPT
+ assert "Investigations" in ASSISTANT_SYSTEM_PROMPT
+
+ def test_system_prompt_contains_approach_guidance(self) -> None:
+ """Test that system prompt includes approach guidance."""
+ assert "helpful" in ASSISTANT_SYSTEM_PROMPT
+ assert "reasoning" in ASSISTANT_SYSTEM_PROMPT
+ assert "next steps" in ASSISTANT_SYSTEM_PROMPT
diff --git a/python-packages/dataing/tests/unit/agents/tools/__init__.py b/python-packages/dataing/tests/unit/agents/tools/__init__.py
new file mode 100644
index 000000000..37464fbe6
--- /dev/null
+++ b/python-packages/dataing/tests/unit/agents/tools/__init__.py
@@ -0,0 +1 @@
+"""Tests for agents/tools package."""
diff --git a/python-packages/dataing/tests/unit/agents/tools/log_providers/__init__.py b/python-packages/dataing/tests/unit/agents/tools/log_providers/__init__.py
new file mode 100644
index 000000000..bf2aa3d6c
--- /dev/null
+++ b/python-packages/dataing/tests/unit/agents/tools/log_providers/__init__.py
@@ -0,0 +1 @@
+"""Tests for log providers."""
diff --git a/python-packages/dataing/tests/unit/agents/tools/log_providers/test_base.py b/python-packages/dataing/tests/unit/agents/tools/log_providers/test_base.py
new file mode 100644
index 000000000..0c744a76e
--- /dev/null
+++ b/python-packages/dataing/tests/unit/agents/tools/log_providers/test_base.py
@@ -0,0 +1,152 @@
+"""Tests for log provider base classes."""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+from dataing.agents.tools.log_providers.base import (
+ LogEntry,
+ LogProviderConfig,
+ LogResult,
+ LogSource,
+)
+
+
+class TestLogSource:
+ """Tests for LogSource enum."""
+
+ def test_values(self) -> None:
+ """Test all enum values exist."""
+ assert LogSource.LOCAL_FILE == "local_file"
+ assert LogSource.DOCKER == "docker"
+ assert LogSource.CLOUDWATCH == "cloudwatch"
+ assert LogSource.KUBERNETES == "kubernetes"
+
+
+class TestLogProviderConfig:
+ """Tests for LogProviderConfig."""
+
+ def test_create_config(self) -> None:
+ """Test creating a config."""
+ config = LogProviderConfig(
+ source=LogSource.LOCAL_FILE,
+ name="Test Provider",
+ )
+
+ assert config.source == LogSource.LOCAL_FILE
+ assert config.name == "Test Provider"
+ assert config.enabled is True
+ assert config.settings == {}
+
+ def test_create_with_settings(self) -> None:
+ """Test creating with settings."""
+ config = LogProviderConfig(
+ source=LogSource.DOCKER,
+ name="Docker Logs",
+ enabled=False,
+ settings={"host": "tcp://localhost:2375"},
+ )
+
+ assert config.enabled is False
+ assert config.settings["host"] == "tcp://localhost:2375"
+
+
+class TestLogEntry:
+ """Tests for LogEntry."""
+
+ def test_create_entry(self) -> None:
+ """Test creating a log entry."""
+ now = datetime.now()
+ entry = LogEntry(
+ timestamp=now,
+ message="Test message",
+ level="info",
+ source="app.log",
+ )
+
+ assert entry.timestamp == now
+ assert entry.message == "Test message"
+ assert entry.level == "info"
+ assert entry.source == "app.log"
+ assert entry.metadata == {}
+
+ def test_create_minimal_entry(self) -> None:
+ """Test creating with minimal fields."""
+ entry = LogEntry(
+ timestamp=None,
+ message="Just a message",
+ )
+
+ assert entry.timestamp is None
+ assert entry.message == "Just a message"
+ assert entry.level is None
+ assert entry.source is None
+
+ def test_entry_with_metadata(self) -> None:
+ """Test entry with metadata."""
+ entry = LogEntry(
+ timestamp=None,
+ message="Test",
+ metadata={"line": 42, "container": "app-1"},
+ )
+
+ assert entry.metadata["line"] == 42
+ assert entry.metadata["container"] == "app-1"
+
+
+class TestLogResult:
+ """Tests for LogResult."""
+
+ def test_successful_result(self) -> None:
+ """Test successful result."""
+ entries = [
+ LogEntry(timestamp=None, message="Entry 1"),
+ LogEntry(timestamp=None, message="Entry 2"),
+ ]
+
+ result = LogResult(
+ entries=entries,
+ source="test.log",
+ )
+
+ assert result.success
+ assert len(result.entries) == 2
+ assert result.source == "test.log"
+ assert not result.truncated
+ assert result.next_token is None
+ assert result.error is None
+
+ def test_failed_result(self) -> None:
+ """Test failed result."""
+ result = LogResult(
+ entries=[],
+ source="test.log",
+ error="File not found",
+ )
+
+ assert not result.success
+ assert len(result.entries) == 0
+ assert result.error == "File not found"
+
+ def test_truncated_result(self) -> None:
+ """Test truncated result with pagination."""
+ result = LogResult(
+ entries=[LogEntry(timestamp=None, message="Entry 1")],
+ source="test.log",
+ truncated=True,
+ next_token="line:100",
+ )
+
+ assert result.success
+ assert result.truncated
+ assert result.next_token == "line:100"
+
+ def test_empty_result(self) -> None:
+ """Test empty but successful result."""
+ result = LogResult(
+ entries=[],
+ source="test.log",
+ )
+
+ assert result.success
+ assert len(result.entries) == 0
diff --git a/python-packages/dataing/tests/unit/agents/tools/log_providers/test_local.py b/python-packages/dataing/tests/unit/agents/tools/log_providers/test_local.py
new file mode 100644
index 000000000..0439bcc3f
--- /dev/null
+++ b/python-packages/dataing/tests/unit/agents/tools/log_providers/test_local.py
@@ -0,0 +1,195 @@
+"""Tests for local file log provider."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from dataing.agents.tools.log_providers.base import LogProviderConfig, LogSource
+from dataing.agents.tools.log_providers.local import (
+ LocalFileLogProvider,
+ create_local_provider,
+)
+
+
+@pytest.fixture
+def sample_log_dir(tmp_path: Path) -> Path:
+ """Create sample log files."""
+ log_dir = tmp_path / "logs"
+ log_dir.mkdir()
+
+ # Create app.log
+ (log_dir / "app.log").write_text(
+ """2024-01-15 10:30:45 INFO Starting application
+2024-01-15 10:30:46 DEBUG Loading configuration
+2024-01-15 10:30:47 WARNING Config file not found, using defaults
+2024-01-15 10:30:48 ERROR Failed to connect to database
+2024-01-15 10:30:49 INFO Retrying connection
+2024-01-15 10:30:50 INFO Application started successfully
+"""
+ )
+
+ # Create worker.log
+ (log_dir / "worker.log").write_text(
+ """2024-01-15 11:00:00 INFO Worker started
+2024-01-15 11:00:01 INFO Processing job 123
+2024-01-15 11:00:02 ERROR Job 123 failed: timeout
+2024-01-15 11:00:03 INFO Processing job 124
+2024-01-15 11:00:04 INFO Job 124 completed
+"""
+ )
+
+ return log_dir
+
+
+@pytest.fixture
+def provider(sample_log_dir: Path) -> LocalFileLogProvider:
+ """Create a local file log provider."""
+ config = LogProviderConfig(
+ source=LogSource.LOCAL_FILE,
+ name="Test Logs",
+ )
+ return LocalFileLogProvider(
+ config=config,
+ log_directories=[sample_log_dir],
+ )
+
+
+class TestLocalFileLogProvider:
+ """Tests for LocalFileLogProvider."""
+
+ def test_source_type(self, provider: LocalFileLogProvider) -> None:
+ """Test source type property."""
+ assert provider.source_type == LogSource.LOCAL_FILE
+
+ def test_name(self, provider: LocalFileLogProvider) -> None:
+ """Test name property."""
+ assert provider.name == "Test Logs"
+
+ @pytest.mark.asyncio
+ async def test_list_sources(self, provider: LocalFileLogProvider, sample_log_dir: Path) -> None:
+ """Test listing log sources."""
+ sources = await provider.list_sources()
+
+ assert len(sources) == 2
+ assert any("app.log" in s for s in sources)
+ assert any("worker.log" in s for s in sources)
+
+ @pytest.mark.asyncio
+ async def test_get_logs(self, provider: LocalFileLogProvider, sample_log_dir: Path) -> None:
+ """Test getting logs from a file."""
+ log_path = str(sample_log_dir / "app.log")
+ result = await provider.get_logs(log_path)
+
+ assert result.success
+ assert len(result.entries) == 6
+ assert result.source == log_path
+
+ @pytest.mark.asyncio
+ async def test_get_logs_max_entries(
+ self, provider: LocalFileLogProvider, sample_log_dir: Path
+ ) -> None:
+ """Test max entries limit."""
+ log_path = str(sample_log_dir / "app.log")
+ result = await provider.get_logs(log_path, max_entries=3)
+
+ assert result.success
+ assert len(result.entries) == 3
+ assert result.truncated
+
+ @pytest.mark.asyncio
+ async def test_get_logs_with_filter(
+ self, provider: LocalFileLogProvider, sample_log_dir: Path
+ ) -> None:
+ """Test filtering logs by pattern."""
+ log_path = str(sample_log_dir / "app.log")
+ result = await provider.get_logs(log_path, filter_pattern="ERROR")
+
+ assert result.success
+ assert len(result.entries) == 1
+ # The message is the content after the level; check raw line or level
+ assert result.entries[0].level == "error"
+ assert "ERROR" in result.entries[0].metadata["raw"]
+
+ @pytest.mark.asyncio
+ async def test_get_logs_nonexistent(self, provider: LocalFileLogProvider) -> None:
+ """Test handling of nonexistent file."""
+ result = await provider.get_logs("/nonexistent/file.log")
+
+ assert not result.success
+ assert result.error is not None
+ assert "not found" in result.error.lower()
+
+ @pytest.mark.asyncio
+ async def test_get_recent_errors(
+ self, provider: LocalFileLogProvider, sample_log_dir: Path
+ ) -> None:
+ """Test getting recent errors."""
+ log_path = str(sample_log_dir / "app.log")
+ result = await provider.get_recent_errors(log_path)
+
+ assert result.success
+ assert len(result.entries) == 1
+ assert result.entries[0].level == "error"
+
+ @pytest.mark.asyncio
+ async def test_search_logs(self, provider: LocalFileLogProvider, sample_log_dir: Path) -> None:
+ """Test searching logs."""
+ log_path = str(sample_log_dir / "app.log")
+ result = await provider.search_logs("database", source_id=log_path)
+
+ assert result.success
+ assert len(result.entries) == 1
+ assert "database" in result.entries[0].message.lower()
+
+ @pytest.mark.asyncio
+ async def test_search_all_sources(self, provider: LocalFileLogProvider) -> None:
+ """Test searching across all sources."""
+ result = await provider.search_logs("ERROR")
+
+ assert result.success
+ # Should find errors in both log files
+ assert len(result.entries) >= 2
+
+ def test_add_log_directory(self, provider: LocalFileLogProvider, tmp_path: Path) -> None:
+ """Test adding a log directory."""
+ new_dir = tmp_path / "new_logs"
+ new_dir.mkdir()
+
+ provider.add_log_directory(new_dir)
+
+ assert new_dir in provider._log_dirs
+
+
+class TestCreateLocalProvider:
+ """Tests for create_local_provider helper."""
+
+ def test_create_default(self) -> None:
+ """Test creating with defaults."""
+ provider = create_local_provider()
+
+ assert provider.name == "Local Files"
+ assert provider.source_type == LogSource.LOCAL_FILE
+
+ def test_create_with_directories(self, tmp_path: Path) -> None:
+ """Test creating with directories."""
+ log_dir = tmp_path / "logs"
+ log_dir.mkdir()
+
+ provider = create_local_provider(
+ name="Custom Logs",
+ directories=[str(log_dir)],
+ )
+
+ assert provider.name == "Custom Logs"
+ assert Path(log_dir) in provider._log_dirs
+
+ def test_create_with_patterns(self) -> None:
+ """Test creating with custom patterns."""
+ provider = create_local_provider(
+ patterns=["*.txt", "*.out"],
+ )
+
+ assert "*.txt" in provider._log_patterns
+ assert "*.out" in provider._log_patterns
diff --git a/python-packages/dataing/tests/unit/agents/tools/test_docker.py b/python-packages/dataing/tests/unit/agents/tools/test_docker.py
new file mode 100644
index 000000000..bffc1da15
--- /dev/null
+++ b/python-packages/dataing/tests/unit/agents/tools/test_docker.py
@@ -0,0 +1,493 @@
+"""Unit tests for Docker status tool."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from dataing.agents.tools.docker import (
+ ContainerStatus,
+ ContainerSummary,
+ DockerStatusResult,
+ DockerStatusTool,
+ _format_bytes,
+ _format_ports,
+ find_unhealthy_docker_containers,
+ get_docker_container_status,
+ list_docker_containers,
+ register_docker_tools,
+)
+from dataing.agents.tools.registry import ToolCategory, ToolRegistry
+
+
+class TestContainerDataclasses:
+ """Tests for container dataclasses."""
+
+ def test_container_status_create(self) -> None:
+ """Test creating a ContainerStatus."""
+ status = ContainerStatus(
+ id="abc123",
+ name="test-container",
+ status="running",
+ image="nginx:latest",
+ created="2024-01-15T10:00:00Z",
+ started="2024-01-15T10:00:01Z",
+ ports={"80/tcp": [{"HostPort": "8080"}]},
+ health={"Status": "healthy"},
+ )
+
+ assert status.id == "abc123"
+ assert status.name == "test-container"
+ assert status.status == "running"
+ assert status.image == "nginx:latest"
+ assert status.health["Status"] == "healthy"
+
+ def test_container_status_minimal(self) -> None:
+ """Test creating a minimal ContainerStatus."""
+ status = ContainerStatus(
+ id="xyz",
+ name="minimal",
+ status="exited",
+ image="alpine",
+ )
+
+ assert status.id == "xyz"
+ assert status.ports == {}
+ assert status.health == {}
+ assert status.error is None
+
+ def test_container_summary(self) -> None:
+ """Test creating a ContainerSummary."""
+ summary = ContainerSummary(
+ id="abc123",
+ name="test-container",
+ status="running",
+ image="nginx:latest",
+ )
+
+ assert summary.id == "abc123"
+ assert summary.name == "test-container"
+
+ def test_docker_status_result_success(self) -> None:
+ """Test successful DockerStatusResult."""
+ result = DockerStatusResult(
+ success=True,
+ containers=[
+ ContainerSummary(id="abc", name="container1", status="running", image="nginx"),
+ ],
+ )
+
+ assert result.success is True
+ assert len(result.containers) == 1
+ assert result.error is None
+
+ def test_docker_status_result_error(self) -> None:
+ """Test error DockerStatusResult."""
+ result = DockerStatusResult(success=False, error="Docker not available")
+
+ assert result.success is False
+ assert result.error == "Docker not available"
+
+
+class TestDockerStatusTool:
+ """Tests for DockerStatusTool class."""
+
+ @pytest.fixture
+ def mock_docker_client(self) -> MagicMock:
+ """Create a mock Docker client."""
+ client = MagicMock()
+ client.ping.return_value = True
+ return client
+
+ @pytest.fixture
+ def mock_container(self) -> MagicMock:
+ """Create a mock container."""
+ container = MagicMock()
+ container.short_id = "abc123"
+ container.name = "test-container"
+ container.status = "running"
+
+ mock_image = MagicMock()
+ mock_image.tags = ["nginx:latest"]
+ container.image = mock_image
+
+ container.attrs = {
+ "Created": "2024-01-15T10:00:00Z",
+ "State": {
+ "StartedAt": "2024-01-15T10:00:01Z",
+ "Health": {"Status": "healthy"},
+ },
+ "NetworkSettings": {
+ "Ports": {"80/tcp": [{"HostPort": "8080"}]},
+ },
+ }
+ return container
+
+ @pytest.mark.asyncio
+ async def test_list_containers_success(
+ self, mock_docker_client: MagicMock, mock_container: MagicMock
+ ) -> None:
+ """Test listing containers successfully."""
+ mock_docker_client.containers.list.return_value = [mock_container]
+
+ tool = DockerStatusTool()
+ tool._client = mock_docker_client
+
+ result = await tool.list_containers()
+
+ assert result.success is True
+ assert len(result.containers) == 1
+ assert result.containers[0].name == "test-container"
+ assert result.containers[0].status == "running"
+
+ @pytest.mark.asyncio
+ async def test_list_containers_no_docker(self) -> None:
+ """Test listing containers when Docker is not installed."""
+ tool = DockerStatusTool()
+ tool._client = None # Reset client
+
+ # Mock _get_client to raise ImportError
+ with patch.object(tool, "_get_client") as mock_get:
+ mock_get.side_effect = ImportError("docker package required")
+
+ result = await tool.list_containers()
+
+ assert result.success is False
+ assert "docker package required" in result.error
+
+ @pytest.mark.asyncio
+ async def test_get_container_status_success(
+ self, mock_docker_client: MagicMock, mock_container: MagicMock
+ ) -> None:
+ """Test getting container status successfully."""
+ mock_docker_client.containers.get.return_value = mock_container
+
+ tool = DockerStatusTool()
+ tool._client = mock_docker_client
+
+ result = await tool.get_container_status("test-container")
+
+ assert result.success is True
+ assert result.container is not None
+ assert result.container.name == "test-container"
+ assert result.container.status == "running"
+ assert result.container.health["Status"] == "healthy"
+
+ @pytest.mark.asyncio
+ async def test_get_container_status_not_found(self, mock_docker_client: MagicMock) -> None:
+ """Test getting status for non-existent container."""
+ mock_docker_client.containers.get.side_effect = Exception("Container not found")
+
+ tool = DockerStatusTool()
+ tool._client = mock_docker_client
+
+ result = await tool.get_container_status("nonexistent")
+
+ assert result.success is False
+ assert "Container not found" in result.error
+
+ @pytest.mark.asyncio
+ async def test_get_container_health_healthy(
+ self, mock_docker_client: MagicMock, mock_container: MagicMock
+ ) -> None:
+ """Test getting health for a healthy container."""
+ mock_docker_client.containers.get.return_value = mock_container
+
+ tool = DockerStatusTool()
+ tool._client = mock_docker_client
+
+ health = await tool.get_container_health("test-container")
+
+ assert health["healthy"] is True
+ assert health["status"] == "healthy"
+
+ @pytest.mark.asyncio
+ async def test_get_container_health_no_healthcheck(
+ self, mock_docker_client: MagicMock, mock_container: MagicMock
+ ) -> None:
+ """Test getting health for container without health check."""
+ mock_container.attrs["State"]["Health"] = {}
+ mock_docker_client.containers.get.return_value = mock_container
+
+ tool = DockerStatusTool()
+ tool._client = mock_docker_client
+
+ health = await tool.get_container_health("test-container")
+
+ assert health["healthy"] is None
+ assert "No health check configured" in health["message"]
+
+ @pytest.mark.asyncio
+ async def test_get_container_stats_success(
+ self, mock_docker_client: MagicMock, mock_container: MagicMock
+ ) -> None:
+ """Test getting container stats."""
+ mock_container.stats.return_value = {
+ "cpu_stats": {
+ "cpu_usage": {"total_usage": 200000000},
+ "system_cpu_usage": 1000000000,
+ },
+ "precpu_stats": {
+ "cpu_usage": {"total_usage": 100000000},
+ "system_cpu_usage": 500000000,
+ },
+ "memory_stats": {
+ "usage": 50 * 1024 * 1024, # 50 MB
+ "limit": 512 * 1024 * 1024, # 512 MB
+ },
+ "networks": {
+ "eth0": {
+ "rx_bytes": 1000000,
+ "tx_bytes": 500000,
+ },
+ },
+ }
+ mock_docker_client.containers.get.return_value = mock_container
+
+ tool = DockerStatusTool()
+ tool._client = mock_docker_client
+
+ stats = await tool.get_container_stats("test-container")
+
+ assert "error" not in stats
+ assert stats["memory_usage_mb"] == 50.0
+ assert stats["memory_limit_mb"] == 512.0
+
+ @pytest.mark.asyncio
+ async def test_get_container_stats_not_running(
+ self, mock_docker_client: MagicMock, mock_container: MagicMock
+ ) -> None:
+ """Test getting stats for non-running container."""
+ mock_container.status = "exited"
+ mock_docker_client.containers.get.return_value = mock_container
+
+ tool = DockerStatusTool()
+ tool._client = mock_docker_client
+
+ stats = await tool.get_container_stats("test-container")
+
+ assert "error" in stats
+ assert "not running" in stats["error"]
+
+ @pytest.mark.asyncio
+ async def test_find_unhealthy_containers(self, mock_docker_client: MagicMock) -> None:
+ """Test finding unhealthy containers."""
+ # Create running healthy container
+ healthy_container = MagicMock()
+ healthy_container.short_id = "abc123"
+ healthy_container.name = "healthy-container"
+ healthy_container.status = "running"
+ healthy_container.image = MagicMock()
+ healthy_container.image.tags = ["nginx:latest"]
+ healthy_container.attrs = {
+ "State": {"Health": {"Status": "healthy"}},
+ "NetworkSettings": {"Ports": {}},
+ }
+
+ # Create stopped container
+ stopped_container = MagicMock()
+ stopped_container.short_id = "def456"
+ stopped_container.name = "stopped-container"
+ stopped_container.status = "exited"
+ stopped_container.image = MagicMock()
+ stopped_container.image.tags = ["alpine"]
+ stopped_container.attrs = {
+ "State": {},
+ "NetworkSettings": {"Ports": {}},
+ }
+
+ mock_docker_client.containers.list.return_value = [
+ healthy_container,
+ stopped_container,
+ ]
+ mock_docker_client.containers.get.return_value = healthy_container
+
+ tool = DockerStatusTool()
+ tool._client = mock_docker_client
+
+ unhealthy = await tool.find_unhealthy_containers()
+
+ assert len(unhealthy) == 1
+ assert unhealthy[0]["name"] == "stopped-container"
+ assert unhealthy[0]["reason"] == "not_running"
+
+
+class TestToolFunctions:
+ """Tests for tool functions."""
+
+ @pytest.mark.asyncio
+ async def test_list_docker_containers_formatted(self) -> None:
+ """Test list_docker_containers returns formatted output."""
+ mock_result = DockerStatusResult(
+ success=True,
+ containers=[
+ ContainerSummary(id="abc", name="web", status="running", image="nginx"),
+ ContainerSummary(id="def", name="db", status="exited", image="postgres"),
+ ],
+ )
+
+ with patch.object(
+ DockerStatusTool,
+ "list_containers",
+ new_callable=AsyncMock,
+ return_value=mock_result,
+ ):
+ # Reset the global tool
+ import dataing.agents.tools.docker as docker_module
+
+ docker_module._default_tool = DockerStatusTool()
+
+ output = await list_docker_containers()
+
+ assert "Docker Containers:" in output
+ assert "web" in output
+ assert "db" in output
+ assert "🟢" in output # Running indicator
+ assert "🔴" in output # Stopped indicator
+
+ @pytest.mark.asyncio
+ async def test_list_docker_containers_error(self) -> None:
+ """Test list_docker_containers handles errors."""
+ mock_result = DockerStatusResult(success=False, error="Connection refused")
+
+ with patch.object(
+ DockerStatusTool,
+ "list_containers",
+ new_callable=AsyncMock,
+ return_value=mock_result,
+ ):
+ import dataing.agents.tools.docker as docker_module
+
+ docker_module._default_tool = DockerStatusTool()
+
+ output = await list_docker_containers()
+
+ assert "Error" in output
+ assert "Connection refused" in output
+
+ @pytest.mark.asyncio
+ async def test_get_docker_container_status_formatted(self) -> None:
+ """Test get_docker_container_status returns formatted output."""
+ mock_result = DockerStatusResult(
+ success=True,
+ container=ContainerStatus(
+ id="abc123",
+ name="test-container",
+ status="running",
+ image="nginx:latest",
+ created="2024-01-15T10:00:00Z",
+ ports={"80/tcp": [{"HostPort": "8080"}]},
+ health={"Status": "healthy"},
+ ),
+ )
+
+ with patch.object(
+ DockerStatusTool,
+ "get_container_status",
+ new_callable=AsyncMock,
+ return_value=mock_result,
+ ):
+ import dataing.agents.tools.docker as docker_module
+
+ docker_module._default_tool = DockerStatusTool()
+
+ output = await get_docker_container_status("test-container")
+
+ assert "test-container" in output
+ assert "running" in output
+ assert "nginx:latest" in output
+ assert "8080->80/tcp" in output
+
+ @pytest.mark.asyncio
+ async def test_find_unhealthy_docker_containers_all_healthy(self) -> None:
+ """Test find_unhealthy_docker_containers when all healthy."""
+ with patch.object(
+ DockerStatusTool,
+ "find_unhealthy_containers",
+ new_callable=AsyncMock,
+ return_value=[],
+ ):
+ import dataing.agents.tools.docker as docker_module
+
+ docker_module._default_tool = DockerStatusTool()
+
+ output = await find_unhealthy_docker_containers()
+
+ assert "All containers are healthy" in output
+ assert "✅" in output
+
+
+class TestFormatHelpers:
+ """Tests for format helper functions."""
+
+ def test_format_ports_with_bindings(self) -> None:
+ """Test formatting ports with host bindings."""
+ ports = {
+ "80/tcp": [{"HostPort": "8080"}],
+ "443/tcp": [{"HostPort": "8443"}],
+ }
+
+ result = _format_ports(ports)
+
+ assert "8080->80/tcp" in result
+ assert "8443->443/tcp" in result
+
+ def test_format_ports_no_bindings(self) -> None:
+ """Test formatting ports without host bindings."""
+ ports = {
+ "80/tcp": None,
+ "443/tcp": [],
+ }
+
+ result = _format_ports(ports)
+
+ assert "80/tcp" in result
+ assert "443/tcp" in result
+
+ def test_format_ports_empty(self) -> None:
+ """Test formatting empty ports."""
+ assert _format_ports({}) == "none"
+
+ def test_format_bytes_bytes(self) -> None:
+ """Test formatting bytes."""
+ assert _format_bytes(100) == "100.0 B"
+
+ def test_format_bytes_kilobytes(self) -> None:
+ """Test formatting kilobytes."""
+ assert _format_bytes(1024) == "1.0 KB"
+
+ def test_format_bytes_megabytes(self) -> None:
+ """Test formatting megabytes."""
+ assert _format_bytes(1024 * 1024) == "1.0 MB"
+
+ def test_format_bytes_gigabytes(self) -> None:
+ """Test formatting gigabytes."""
+ assert _format_bytes(1024 * 1024 * 1024) == "1.0 GB"
+
+
+class TestToolRegistration:
+ """Tests for tool registration."""
+
+ def test_register_docker_tools(self) -> None:
+ """Test registering Docker tools with registry."""
+ registry = ToolRegistry()
+
+ register_docker_tools(registry)
+
+ # Check tools are registered in the global registry
+ tool_names = list(registry._tools.keys())
+ assert "list_docker_containers" in tool_names
+ assert "get_docker_container_status" in tool_names
+ assert "get_docker_container_health" in tool_names
+ assert "get_docker_container_stats" in tool_names
+ assert "find_unhealthy_docker_containers" in tool_names
+
+ def test_docker_tools_have_correct_category(self) -> None:
+ """Test that Docker tools have correct category."""
+ registry = ToolRegistry()
+
+ register_docker_tools(registry)
+
+ for tool_config in registry._tools.values():
+ assert tool_config.category == ToolCategory.DOCKER
diff --git a/python-packages/dataing/tests/unit/agents/tools/test_local_files.py b/python-packages/dataing/tests/unit/agents/tools/test_local_files.py
new file mode 100644
index 000000000..c43fc6822
--- /dev/null
+++ b/python-packages/dataing/tests/unit/agents/tools/test_local_files.py
@@ -0,0 +1,330 @@
+"""Tests for local_files tool module."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from dataing.agents.tools.local_files import (
+ LocalFileReader,
+ get_file_reader,
+ read_local_file,
+ reset_file_reader,
+ search_in_files,
+)
+
+
+@pytest.fixture
+def temp_repo(tmp_path: Path) -> Path:
+ """Create a temporary repository structure."""
+ # Create allowed directories
+ (tmp_path / "python-packages" / "dataing" / "src").mkdir(parents=True)
+ (tmp_path / "frontend" / "src").mkdir(parents=True)
+ (tmp_path / "demo").mkdir()
+ (tmp_path / "docs").mkdir()
+ (tmp_path / "secret-dir").mkdir()
+
+ # Create test files
+ (tmp_path / "python-packages" / "dataing" / "src" / "main.py").write_text(
+ "print('hello')\nprint('world')\n"
+ )
+ (tmp_path / "frontend" / "src" / "App.tsx").write_text(
+ "export const App = () => Hello
;"
+ )
+ (tmp_path / "docker-compose.yml").write_text("services:\n app:\n image: test")
+ (tmp_path / "README.md").write_text("# Test Repo\n\nThis is a test.")
+ (tmp_path / ".env").write_text("SECRET_KEY=super_secret_123")
+ (tmp_path / ".env.example").write_text("SECRET_KEY=your_key_here")
+ (tmp_path / "secret-dir" / "data.txt").write_text("Not allowed!")
+
+ return tmp_path
+
+
+@pytest.fixture
+def reader(temp_repo: Path) -> LocalFileReader:
+ """Create a file reader for the temp repo."""
+ return LocalFileReader(temp_repo)
+
+
+@pytest.fixture(autouse=True)
+def reset_singleton() -> None:
+ """Reset singleton before each test."""
+ reset_file_reader()
+
+
+class TestLocalFileReader:
+ """Tests for LocalFileReader class."""
+
+ def test_read_allowed_file(self, reader: LocalFileReader) -> None:
+ """Test reading an allowed file."""
+ result = reader.read_file("python-packages/dataing/src/main.py")
+
+ assert result.success
+ assert result.content is not None
+ assert "print('hello')" in result.content
+ assert result.file_type == "python"
+ assert result.line_count == 2
+
+ def test_read_root_allowed_file(self, reader: LocalFileReader) -> None:
+ """Test reading an allowed root file."""
+ result = reader.read_file("docker-compose.yml")
+
+ assert result.success
+ assert "services:" in result.content
+ assert result.file_type == "yaml"
+
+ def test_read_markdown_file(self, reader: LocalFileReader) -> None:
+ """Test reading a markdown file."""
+ result = reader.read_file("README.md")
+
+ assert result.success
+ assert "# Test Repo" in result.content
+ assert result.file_type == "markdown"
+
+ def test_block_env_file(self, reader: LocalFileReader) -> None:
+ """Test that .env files are blocked."""
+ result = reader.read_file(".env")
+
+ assert not result.success
+ assert result.error is not None
+ assert "blocked" in result.error.lower()
+ assert ".env.example" in result.error # Should suggest alternative
+
+ def test_block_outside_allowed_dirs(self, reader: LocalFileReader) -> None:
+ """Test that files outside allowed dirs are blocked."""
+ result = reader.read_file("secret-dir/data.txt")
+
+ assert not result.success
+ assert "not in allowed directories" in result.error
+
+ def test_path_traversal_blocked(self, reader: LocalFileReader, temp_repo: Path) -> None:
+ """Test that path traversal is blocked."""
+ # Try to escape using ..
+ result = reader.read_file("python-packages/../../../etc/passwd")
+
+ assert not result.success
+ assert result.error is not None
+ # Should either detect traversal or be outside repo
+
+ def test_path_traversal_in_middle(self, reader: LocalFileReader) -> None:
+ """Test traversal in middle of path."""
+ result = reader.read_file("python-packages/dataing/../../../.env")
+
+ assert not result.success
+
+ def test_absolute_path_blocked(self, reader: LocalFileReader) -> None:
+ """Test that absolute paths outside repo are blocked."""
+ result = reader.read_file("/etc/passwd")
+
+ assert not result.success
+
+ def test_nonexistent_file(self, reader: LocalFileReader) -> None:
+ """Test handling of nonexistent file."""
+ result = reader.read_file("python-packages/nonexistent.py")
+
+ assert not result.success
+ assert "not found" in result.error.lower()
+
+ def test_read_line_range(self, reader: LocalFileReader, temp_repo: Path) -> None:
+ """Test reading specific line range."""
+ # Create a longer file
+ lines = [f"Line {i}" for i in range(1, 101)]
+ (temp_repo / "python-packages" / "long_file.py").write_text("\n".join(lines))
+
+ result = reader.read_file("python-packages/long_file.py", start_line=10, end_line=15)
+
+ assert result.success
+ assert "Line 10" in result.content
+ assert "Line 15" in result.content
+ assert "Line 9" not in result.content
+ assert "Line 16" not in result.content
+
+ def test_file_too_large(self, reader: LocalFileReader, temp_repo: Path) -> None:
+ """Test handling of oversized file."""
+ # Create a reader with small size limit
+ small_reader = LocalFileReader(temp_repo, max_file_size=100)
+
+ # Create a file larger than limit
+ (temp_repo / "python-packages" / "large.py").write_text("x" * 200)
+
+ result = small_reader.read_file("python-packages/large.py")
+
+ assert not result.success
+ assert "too large" in result.error.lower()
+
+
+class TestPathValidation:
+ """Tests for path validation."""
+
+ def test_is_path_allowed_in_allowed_dir(self, reader: LocalFileReader) -> None:
+ """Test that paths in allowed dirs are allowed."""
+ is_allowed, error = reader.is_path_allowed("python-packages/test.py")
+ assert is_allowed
+ assert error is None
+
+ def test_is_path_allowed_root_pattern(self, reader: LocalFileReader) -> None:
+ """Test root patterns are allowed."""
+ is_allowed, error = reader.is_path_allowed("docker-compose.yml")
+ assert is_allowed
+
+ is_allowed, error = reader.is_path_allowed("docker-compose.dev.yml")
+ assert is_allowed
+
+ def test_is_path_blocked_pattern(self, reader: LocalFileReader) -> None:
+ """Test blocked patterns."""
+ patterns_to_test = [
+ ".env",
+ "credentials.json",
+ "secret.yaml",
+ "api_token.txt",
+ "private.key",
+ "cert.pem",
+ ]
+
+ for pattern in patterns_to_test:
+ is_allowed, error = reader.is_path_allowed(f"python-packages/{pattern}")
+ assert not is_allowed, f"{pattern} should be blocked"
+
+ def test_is_path_outside_repo(self, reader: LocalFileReader) -> None:
+ """Test paths outside repo are blocked."""
+ is_allowed, error = reader.is_path_allowed("/etc/passwd")
+ assert not is_allowed
+
+
+class TestFileSearch:
+ """Tests for file search functionality."""
+
+ def test_search_files(self, reader: LocalFileReader, temp_repo: Path) -> None:
+ """Test searching for pattern in files."""
+ results = reader.search_files("hello")
+
+ assert len(results) > 0
+ # Should find in python file
+ found_python = any("main.py" in r[0] for r in results)
+ assert found_python
+
+ def test_search_in_directory(self, reader: LocalFileReader, temp_repo: Path) -> None:
+ """Test searching in specific directory."""
+ results = reader.search_files("print", directory="python-packages")
+
+ assert len(results) > 0
+ # All results should be in python-packages
+ assert all("python-packages" in r[0] for r in results)
+
+ def test_search_blocked_directory(self, reader: LocalFileReader) -> None:
+ """Test that searching blocked directories returns empty."""
+ results = reader.search_files("data", directory="secret-dir")
+
+ assert len(results) == 0
+
+ def test_search_max_results(self, reader: LocalFileReader, temp_repo: Path) -> None:
+ """Test max results limit."""
+ # Create many files with matching content
+ for i in range(20):
+ (temp_repo / "python-packages" / f"file_{i}.py").write_text("MATCH\n" * 10)
+
+ results = reader.search_files("MATCH", max_results=5)
+
+ assert len(results) <= 5
+
+
+class TestListDirectory:
+ """Tests for directory listing."""
+
+ def test_list_directory(self, reader: LocalFileReader) -> None:
+ """Test listing directory contents."""
+ files = reader.list_files("python-packages/dataing/src")
+
+ assert len(files) > 0
+ assert any("main.py" in f for f in files)
+
+ def test_list_with_pattern(self, reader: LocalFileReader, temp_repo: Path) -> None:
+ """Test listing with glob pattern."""
+ # Create multiple file types
+ (temp_repo / "python-packages" / "a.py").write_text("")
+ (temp_repo / "python-packages" / "b.py").write_text("")
+ (temp_repo / "python-packages" / "c.txt").write_text("")
+
+ files = reader.list_files("python-packages", pattern="*.py")
+
+ assert all(f.endswith(".py") for f in files)
+
+ def test_list_blocked_directory(self, reader: LocalFileReader) -> None:
+ """Test listing blocked directory."""
+ files = reader.list_files("secret-dir")
+
+ assert len(files) == 0
+
+
+class TestToolFunctions:
+ """Tests for async tool functions."""
+
+ @pytest.mark.asyncio
+ async def test_read_local_file(self, temp_repo: Path) -> None:
+ """Test read_local_file tool function."""
+ # Initialize with temp repo
+ reset_file_reader()
+ get_file_reader(temp_repo)
+
+ result = await read_local_file("docker-compose.yml")
+
+ assert "[yaml]" in result
+ assert "docker-compose.yml" in result
+ assert "services:" in result
+
+ @pytest.mark.asyncio
+ async def test_read_local_file_blocked(self, temp_repo: Path) -> None:
+ """Test read_local_file with blocked file."""
+ reset_file_reader()
+ get_file_reader(temp_repo)
+
+ result = await read_local_file(".env")
+
+ assert "Error:" in result
+ assert "blocked" in result.lower()
+
+ @pytest.mark.asyncio
+ async def test_search_in_files(self, temp_repo: Path) -> None:
+ """Test search_in_files tool function."""
+ reset_file_reader()
+ get_file_reader(temp_repo)
+
+ result = await search_in_files("hello")
+
+ assert "matches" in result.lower()
+ assert "main.py" in result
+
+
+class TestSymlinkSafety:
+ """Tests for symlink handling."""
+
+ def test_symlink_to_blocked_file(self, reader: LocalFileReader, temp_repo: Path) -> None:
+ """Test that symlinks to blocked files are rejected."""
+ # Create symlink to .env
+ link_path = temp_repo / "python-packages" / "env_link"
+ try:
+ link_path.symlink_to(temp_repo / ".env")
+ except OSError:
+ pytest.skip("Symlinks not supported")
+
+ result = reader.read_file("python-packages/env_link")
+
+ # Should be blocked either because target is .env or outside allowed
+ assert not result.success
+
+ def test_symlink_outside_repo(self, reader: LocalFileReader, temp_repo: Path) -> None:
+ """Test that symlinks outside repo are rejected."""
+ # Create symlink to /etc/passwd (if it exists)
+ if not Path("/etc/passwd").exists():
+ pytest.skip("Test requires /etc/passwd")
+
+ link_path = temp_repo / "python-packages" / "passwd_link"
+ try:
+ link_path.symlink_to("/etc/passwd")
+ except OSError:
+ pytest.skip("Symlinks not supported")
+
+ result = reader.read_file("python-packages/passwd_link")
+
+ assert not result.success
diff --git a/python-packages/dataing/tests/unit/agents/tools/test_registry.py b/python-packages/dataing/tests/unit/agents/tools/test_registry.py
new file mode 100644
index 000000000..6a4e0e2fc
--- /dev/null
+++ b/python-packages/dataing/tests/unit/agents/tools/test_registry.py
@@ -0,0 +1,469 @@
+"""Tests for the unified tool registry."""
+
+from __future__ import annotations
+
+from uuid import uuid4
+
+import pytest
+from pydantic_ai.tools import Tool
+
+from dataing.agents.tools.registry import (
+ TenantToolConfig,
+ ToolCategory,
+ ToolConfig,
+ ToolRegistry,
+ get_default_registry,
+ reset_registry,
+)
+
+
+@pytest.fixture
+def registry() -> ToolRegistry:
+ """Create a fresh registry for each test."""
+ return ToolRegistry()
+
+
+@pytest.fixture
+def sample_tool() -> Tool:
+ """Create a sample tool for testing."""
+
+ async def sample_func(arg: str) -> str:
+ """Sample tool function."""
+ return f"Result: {arg}"
+
+ return Tool(sample_func)
+
+
+@pytest.fixture
+def sample_config(sample_tool: Tool) -> ToolConfig:
+ """Create a sample tool config."""
+ return ToolConfig(
+ name="sample_tool",
+ category=ToolCategory.FILES,
+ description="A sample tool for testing",
+ tool=sample_tool,
+ )
+
+
+class TestToolCategory:
+ """Tests for ToolCategory enum."""
+
+ def test_categories_exist(self) -> None:
+ """Verify all expected categories exist."""
+ assert ToolCategory.FILES == "files"
+ assert ToolCategory.GIT == "git"
+ assert ToolCategory.DOCKER == "docker"
+ assert ToolCategory.LOGS == "logs"
+ assert ToolCategory.DATASOURCE == "datasource"
+ assert ToolCategory.ENVIRONMENT == "environment"
+
+ def test_category_is_string(self) -> None:
+ """Verify categories are string-based for serialization."""
+ assert isinstance(ToolCategory.FILES.value, str)
+
+
+class TestToolConfig:
+ """Tests for ToolConfig dataclass."""
+
+ def test_default_values(self, sample_tool: Tool) -> None:
+ """Verify default values are set correctly."""
+ config = ToolConfig(
+ name="test",
+ category=ToolCategory.FILES,
+ description="Test tool",
+ tool=sample_tool,
+ )
+ assert config.enabled_by_default is True
+ assert config.requires_auth is True
+ assert config.priority == 100
+
+ def test_custom_values(self, sample_tool: Tool) -> None:
+ """Verify custom values override defaults."""
+ config = ToolConfig(
+ name="test",
+ category=ToolCategory.DOCKER,
+ description="Test tool",
+ tool=sample_tool,
+ enabled_by_default=False,
+ requires_auth=False,
+ priority=50,
+ )
+ assert config.enabled_by_default is False
+ assert config.requires_auth is False
+ assert config.priority == 50
+
+
+class TestTenantToolConfig:
+ """Tests for TenantToolConfig dataclass."""
+
+ def test_default_empty_sets(self) -> None:
+ """Verify defaults are empty."""
+ config = TenantToolConfig()
+ assert config.enabled_tools == set()
+ assert config.disabled_tools == set()
+ assert config.tool_limits == {}
+
+ def test_custom_values(self) -> None:
+ """Verify custom values work."""
+ config = TenantToolConfig(
+ enabled_tools={"tool1", "tool2"},
+ disabled_tools={"tool3"},
+ tool_limits={"tool1": {"rate": 100}},
+ )
+ assert "tool1" in config.enabled_tools
+ assert "tool3" in config.disabled_tools
+ assert config.tool_limits["tool1"]["rate"] == 100
+
+
+class TestToolRegistry:
+ """Tests for ToolRegistry class."""
+
+ def test_register_tool(self, registry: ToolRegistry, sample_config: ToolConfig) -> None:
+ """Test basic tool registration."""
+ registry.register(sample_config)
+ assert registry.get_tool("sample_tool") == sample_config
+
+ def test_register_duplicate_raises(
+ self, registry: ToolRegistry, sample_config: ToolConfig
+ ) -> None:
+ """Test that registering duplicate tool raises ValueError."""
+ registry.register(sample_config)
+ with pytest.raises(ValueError, match="already registered"):
+ registry.register(sample_config)
+
+ def test_register_tool_function(self, registry: ToolRegistry) -> None:
+ """Test register_tool convenience method."""
+
+ async def my_tool(x: int) -> int:
+ """Double the input."""
+ return x * 2
+
+ registry.register_tool(
+ name="doubler",
+ category=ToolCategory.ENVIRONMENT,
+ description="Doubles a number",
+ func=my_tool,
+ enabled_by_default=False,
+ priority=10,
+ )
+
+ config = registry.get_tool("doubler")
+ assert config is not None
+ assert config.name == "doubler"
+ assert config.category == ToolCategory.ENVIRONMENT
+ assert config.enabled_by_default is False
+ assert config.priority == 10
+
+ def test_get_tool_not_found(self, registry: ToolRegistry) -> None:
+ """Test get_tool returns None for unknown tool."""
+ assert registry.get_tool("nonexistent") is None
+
+ def test_get_tools_by_category(self, registry: ToolRegistry, sample_tool: Tool) -> None:
+ """Test filtering tools by category."""
+ # Register tools in different categories
+ registry.register(
+ ToolConfig(
+ name="file1",
+ category=ToolCategory.FILES,
+ description="File tool 1",
+ tool=sample_tool,
+ )
+ )
+ registry.register(
+ ToolConfig(
+ name="file2",
+ category=ToolCategory.FILES,
+ description="File tool 2",
+ tool=sample_tool,
+ )
+ )
+ registry.register(
+ ToolConfig(
+ name="docker1",
+ category=ToolCategory.DOCKER,
+ description="Docker tool",
+ tool=sample_tool,
+ )
+ )
+
+ file_tools = registry.get_tools_by_category(ToolCategory.FILES)
+ assert len(file_tools) == 2
+ assert all(t.category == ToolCategory.FILES for t in file_tools)
+
+ docker_tools = registry.get_tools_by_category(ToolCategory.DOCKER)
+ assert len(docker_tools) == 1
+
+ def test_get_all_tools(self, registry: ToolRegistry, sample_tool: Tool) -> None:
+ """Test getting all registered tools."""
+ registry.register(
+ ToolConfig(
+ name="tool1",
+ category=ToolCategory.FILES,
+ description="Tool 1",
+ tool=sample_tool,
+ )
+ )
+ registry.register(
+ ToolConfig(
+ name="tool2",
+ category=ToolCategory.GIT,
+ description="Tool 2",
+ tool=sample_tool,
+ )
+ )
+
+ all_tools = registry.get_all_tools()
+ assert len(all_tools) == 2
+
+ def test_priority_ordering(self, registry: ToolRegistry, sample_tool: Tool) -> None:
+ """Test tools are ordered by priority within category."""
+ registry.register(
+ ToolConfig(
+ name="low_priority",
+ category=ToolCategory.FILES,
+ description="Low priority",
+ tool=sample_tool,
+ priority=200,
+ )
+ )
+ registry.register(
+ ToolConfig(
+ name="high_priority",
+ category=ToolCategory.FILES,
+ description="High priority",
+ tool=sample_tool,
+ priority=10,
+ )
+ )
+
+ file_tools = registry.get_tools_by_category(ToolCategory.FILES)
+ assert file_tools[0].name == "high_priority"
+ assert file_tools[1].name == "low_priority"
+
+
+class TestTenantConfiguration:
+ """Tests for per-tenant tool configuration."""
+
+ def test_set_and_get_tenant_config(self, registry: ToolRegistry) -> None:
+ """Test setting and retrieving tenant config."""
+ tenant_id = uuid4()
+ config = TenantToolConfig(
+ enabled_tools={"tool1"},
+ disabled_tools={"tool2"},
+ )
+
+ registry.set_tenant_config(tenant_id, config)
+ retrieved = registry.get_tenant_config(tenant_id)
+
+ assert retrieved == config
+
+ def test_get_tenant_config_default(self, registry: ToolRegistry) -> None:
+ """Test default config for unknown tenant."""
+ unknown_tenant = uuid4()
+ config = registry.get_tenant_config(unknown_tenant)
+
+ assert config.enabled_tools == set()
+ assert config.disabled_tools == set()
+
+ def test_is_tool_enabled_default(
+ self, registry: ToolRegistry, sample_config: ToolConfig
+ ) -> None:
+ """Test is_tool_enabled with default settings."""
+ registry.register(sample_config)
+
+ # No tenant - uses default
+ assert registry.is_tool_enabled("sample_tool") is True
+
+ # Unknown tenant - uses default
+ assert registry.is_tool_enabled("sample_tool", uuid4()) is True
+
+ def test_is_tool_enabled_disabled_by_default(
+ self, registry: ToolRegistry, sample_tool: Tool
+ ) -> None:
+ """Test tool disabled by default."""
+ registry.register(
+ ToolConfig(
+ name="disabled_tool",
+ category=ToolCategory.FILES,
+ description="Disabled by default",
+ tool=sample_tool,
+ enabled_by_default=False,
+ )
+ )
+
+ assert registry.is_tool_enabled("disabled_tool") is False
+
+ def test_is_tool_enabled_tenant_override(
+ self, registry: ToolRegistry, sample_config: ToolConfig
+ ) -> None:
+ """Test tenant can override default enabled state."""
+ registry.register(sample_config)
+ tenant_id = uuid4()
+
+ # Disable for this tenant
+ registry.set_tenant_config(
+ tenant_id,
+ TenantToolConfig(disabled_tools={"sample_tool"}),
+ )
+
+ assert registry.is_tool_enabled("sample_tool", tenant_id) is False
+ # Other tenants still have it enabled
+ assert registry.is_tool_enabled("sample_tool", uuid4()) is True
+
+ def test_enable_tool_for_tenant(self, registry: ToolRegistry, sample_tool: Tool) -> None:
+ """Test enabling a disabled-by-default tool for tenant."""
+ registry.register(
+ ToolConfig(
+ name="premium_tool",
+ category=ToolCategory.LOGS,
+ description="Premium feature",
+ tool=sample_tool,
+ enabled_by_default=False,
+ )
+ )
+ tenant_id = uuid4()
+
+ # Initially disabled
+ assert registry.is_tool_enabled("premium_tool", tenant_id) is False
+
+ # Enable for tenant
+ registry.enable_tool(tenant_id, "premium_tool")
+ assert registry.is_tool_enabled("premium_tool", tenant_id) is True
+
+ def test_disable_tool_for_tenant(
+ self, registry: ToolRegistry, sample_config: ToolConfig
+ ) -> None:
+ """Test disabling an enabled tool for tenant."""
+ registry.register(sample_config)
+ tenant_id = uuid4()
+
+ # Initially enabled
+ assert registry.is_tool_enabled("sample_tool", tenant_id) is True
+
+ # Disable for tenant
+ registry.disable_tool(tenant_id, "sample_tool")
+ assert registry.is_tool_enabled("sample_tool", tenant_id) is False
+
+ def test_enable_removes_from_disabled(
+ self, registry: ToolRegistry, sample_config: ToolConfig
+ ) -> None:
+ """Test that enabling removes from disabled set."""
+ registry.register(sample_config)
+ tenant_id = uuid4()
+
+ registry.disable_tool(tenant_id, "sample_tool")
+ config = registry.get_tenant_config(tenant_id)
+ assert "sample_tool" in config.disabled_tools
+
+ registry.enable_tool(tenant_id, "sample_tool")
+ config = registry.get_tenant_config(tenant_id)
+ assert "sample_tool" not in config.disabled_tools
+ assert "sample_tool" in config.enabled_tools
+
+
+class TestGetEnabledTools:
+ """Tests for get_enabled_tools method."""
+
+ def test_returns_pydantic_tools(
+ self, registry: ToolRegistry, sample_config: ToolConfig
+ ) -> None:
+ """Test that get_enabled_tools returns Tool instances."""
+ registry.register(sample_config)
+ tools = registry.get_enabled_tools()
+
+ assert len(tools) == 1
+ assert isinstance(tools[0], Tool)
+
+ def test_filters_disabled_tools(self, registry: ToolRegistry, sample_tool: Tool) -> None:
+ """Test that disabled tools are filtered out."""
+ registry.register(
+ ToolConfig(
+ name="enabled",
+ category=ToolCategory.FILES,
+ description="Enabled",
+ tool=sample_tool,
+ )
+ )
+ registry.register(
+ ToolConfig(
+ name="disabled",
+ category=ToolCategory.FILES,
+ description="Disabled",
+ tool=sample_tool,
+ enabled_by_default=False,
+ )
+ )
+
+ tools = registry.get_enabled_tools()
+ assert len(tools) == 1
+
+ def test_filters_by_category(self, registry: ToolRegistry, sample_tool: Tool) -> None:
+ """Test filtering by categories."""
+ registry.register(
+ ToolConfig(
+ name="file_tool",
+ category=ToolCategory.FILES,
+ description="File",
+ tool=sample_tool,
+ )
+ )
+ registry.register(
+ ToolConfig(
+ name="docker_tool",
+ category=ToolCategory.DOCKER,
+ description="Docker",
+ tool=sample_tool,
+ )
+ )
+
+ file_tools = registry.get_enabled_tools(categories=[ToolCategory.FILES])
+ assert len(file_tools) == 1
+
+ both = registry.get_enabled_tools(categories=[ToolCategory.FILES, ToolCategory.DOCKER])
+ assert len(both) == 2
+
+ def test_respects_tenant_config(self, registry: ToolRegistry, sample_tool: Tool) -> None:
+ """Test tenant config affects enabled tools."""
+ registry.register(
+ ToolConfig(
+ name="tool1",
+ category=ToolCategory.FILES,
+ description="Tool 1",
+ tool=sample_tool,
+ )
+ )
+ registry.register(
+ ToolConfig(
+ name="tool2",
+ category=ToolCategory.FILES,
+ description="Tool 2",
+ tool=sample_tool,
+ )
+ )
+
+ tenant_id = uuid4()
+ registry.disable_tool(tenant_id, "tool1")
+
+ # Without tenant - both enabled
+ assert len(registry.get_enabled_tools()) == 2
+
+ # With tenant - only tool2 enabled
+ assert len(registry.get_enabled_tools(tenant_id)) == 1
+
+
+class TestSingleton:
+ """Tests for the singleton registry."""
+
+ def test_get_default_registry_returns_same_instance(self) -> None:
+ """Test singleton returns same instance."""
+ reset_registry()
+ reg1 = get_default_registry()
+ reg2 = get_default_registry()
+ assert reg1 is reg2
+
+ def test_reset_registry_clears_instance(self) -> None:
+ """Test reset creates new instance."""
+ reg1 = get_default_registry()
+ reset_registry()
+ reg2 = get_default_registry()
+ assert reg1 is not reg2
diff --git a/python-packages/dataing/tests/unit/core/parsing/__init__.py b/python-packages/dataing/tests/unit/core/parsing/__init__.py
new file mode 100644
index 000000000..98efcb933
--- /dev/null
+++ b/python-packages/dataing/tests/unit/core/parsing/__init__.py
@@ -0,0 +1 @@
+"""Tests for core/parsing package."""
diff --git a/python-packages/dataing/tests/unit/core/parsing/test_data_parser.py b/python-packages/dataing/tests/unit/core/parsing/test_data_parser.py
new file mode 100644
index 000000000..408581d7c
--- /dev/null
+++ b/python-packages/dataing/tests/unit/core/parsing/test_data_parser.py
@@ -0,0 +1,155 @@
+"""Tests for data_parser module."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from dataing.core.parsing.data_parser import DataParser, SampleResult
+
+
+@pytest.fixture
+def data_parser() -> DataParser:
+ """Create a data parser instance."""
+ return DataParser()
+
+
+@pytest.fixture
+def sample_csv_file(tmp_path: Path) -> Path:
+ """Create a sample CSV file."""
+ content = """id,name,value,active
+1,Alice,100,true
+2,Bob,200,false
+3,Carol,300,true
+4,Dave,400,false
+5,Eve,500,true
+"""
+ file_path = tmp_path / "data.csv"
+ file_path.write_text(content)
+ return file_path
+
+
+@pytest.fixture
+def sample_tsv_file(tmp_path: Path) -> Path:
+ """Create a sample TSV file."""
+ content = "id\tname\tvalue\n1\tAlice\t100\n2\tBob\t200\n"
+ file_path = tmp_path / "data.tsv"
+ file_path.write_text(content)
+ return file_path
+
+
+class TestSampleResult:
+ """Tests for SampleResult dataclass."""
+
+ def test_result_creation(self) -> None:
+ """Test creating a sample result."""
+ result = SampleResult(
+ columns=["id", "name"],
+ rows=[{"id": "1", "name": "test"}],
+ total_rows=100,
+ file_size=1000,
+ format="csv",
+ )
+
+ assert len(result.columns) == 2
+ assert len(result.rows) == 1
+ assert result.total_rows == 100
+ assert not result.truncated
+
+
+class TestDataParser:
+ """Tests for DataParser class."""
+
+ def test_sample_csv(self, data_parser: DataParser, sample_csv_file: Path) -> None:
+ """Test sampling a CSV file."""
+ result = data_parser.sample_file(sample_csv_file)
+
+ assert isinstance(result, SampleResult)
+ assert result.format == "csv"
+ assert "id" in result.columns
+ assert "name" in result.columns
+ assert len(result.rows) == 5
+ assert result.rows[0]["name"] == "Alice"
+
+ def test_sample_csv_with_n_rows(self, data_parser: DataParser, sample_csv_file: Path) -> None:
+ """Test sampling specific number of rows."""
+ result = data_parser.sample_file(sample_csv_file, n_rows=2)
+
+ assert len(result.rows) == 2
+ assert result.truncated
+
+ def test_sample_csv_specific_columns(
+ self, data_parser: DataParser, sample_csv_file: Path
+ ) -> None:
+ """Test sampling specific columns."""
+ result = data_parser.sample_file(sample_csv_file, columns=["id", "name"])
+
+ assert result.columns == ["id", "name"]
+ assert "value" not in result.rows[0]
+
+ def test_sample_tsv(self, data_parser: DataParser, sample_tsv_file: Path) -> None:
+ """Test sampling a TSV file."""
+ result = data_parser.sample_file(sample_tsv_file)
+
+ assert result.format == "csv" # TSV is a variant of CSV
+ assert len(result.rows) == 2
+ assert result.rows[0]["name"] == "Alice"
+
+ def test_get_schema(self, data_parser: DataParser, sample_csv_file: Path) -> None:
+ """Test getting schema from CSV."""
+ schema = data_parser.get_schema(sample_csv_file)
+
+ assert "id" in schema
+ assert "name" in schema
+ # Schema inference should detect types
+ assert schema["id"] == "integer"
+ assert schema["name"] == "string"
+ assert schema["active"] == "boolean"
+
+ def test_count_rows(self, data_parser: DataParser, sample_csv_file: Path) -> None:
+ """Test counting rows."""
+ count = data_parser.count_rows(sample_csv_file)
+ assert count == 5
+
+ def test_format_as_markdown(self, data_parser: DataParser, sample_csv_file: Path) -> None:
+ """Test formatting as markdown table."""
+ result = data_parser.sample_file(sample_csv_file, n_rows=2)
+ markdown = data_parser.format_sample_as_markdown(result)
+
+ assert "| id |" in markdown
+ assert "| name |" in markdown
+ assert "Alice" in markdown
+ assert "---" in markdown # Table separator
+
+ def test_file_not_found(self, data_parser: DataParser) -> None:
+ """Test handling of missing file."""
+ with pytest.raises(FileNotFoundError):
+ data_parser.sample_file("/nonexistent/file.csv")
+
+ def test_unsupported_format(self, data_parser: DataParser, tmp_path: Path) -> None:
+ """Test handling of unsupported format."""
+ file_path = tmp_path / "data.xlsx"
+ file_path.write_text("not excel")
+
+ with pytest.raises(ValueError, match="Unsupported"):
+ data_parser.sample_file(file_path)
+
+ def test_file_size_limit(self, data_parser: DataParser, tmp_path: Path) -> None:
+ """Test file size limit."""
+ parser = DataParser(max_file_size=100)
+ file_path = tmp_path / "large.csv"
+ file_path.write_text("a,b,c\n" + "1,2,3\n" * 100)
+
+ with pytest.raises(ValueError, match="exceeds size limit"):
+ parser.sample_file(file_path)
+
+ def test_empty_csv(self, data_parser: DataParser, tmp_path: Path) -> None:
+ """Test handling of empty CSV."""
+ file_path = tmp_path / "empty.csv"
+ file_path.write_text("id,name,value\n")
+
+ result = data_parser.sample_file(file_path)
+
+ assert len(result.rows) == 0
+ assert result.total_rows == 0
diff --git a/python-packages/dataing/tests/unit/core/parsing/test_json_parser.py b/python-packages/dataing/tests/unit/core/parsing/test_json_parser.py
new file mode 100644
index 000000000..e7f9b518e
--- /dev/null
+++ b/python-packages/dataing/tests/unit/core/parsing/test_json_parser.py
@@ -0,0 +1,133 @@
+"""Tests for json_parser module."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+
+from dataing.core.parsing.json_parser import JsonParser
+
+
+@pytest.fixture
+def json_parser() -> JsonParser:
+ """Create a JSON parser instance."""
+ return JsonParser()
+
+
+class TestJsonParser:
+ """Tests for JsonParser class."""
+
+ def test_parse_simple_dict(self, json_parser: JsonParser, tmp_path: Path) -> None:
+ """Test parsing a simple dict."""
+ file_path = tmp_path / "config.json"
+ file_path.write_text('{"key": "value", "number": 42}')
+
+ result = json_parser.parse_file(file_path)
+
+ assert result == {"key": "value", "number": 42}
+
+ def test_parse_nested_structure(self, json_parser: JsonParser, tmp_path: Path) -> None:
+ """Test parsing nested structures."""
+ data = {
+ "database": {
+ "host": "localhost",
+ "port": 5432,
+ "credentials": {"username": "admin", "password": "secret"},
+ }
+ }
+ file_path = tmp_path / "nested.json"
+ file_path.write_text(json.dumps(data))
+
+ result = json_parser.parse_file(file_path)
+
+ assert result["database"]["host"] == "localhost"
+ assert result["database"]["port"] == 5432
+ assert result["database"]["credentials"]["username"] == "admin"
+
+ def test_parse_array(self, json_parser: JsonParser, tmp_path: Path) -> None:
+ """Test parsing an array."""
+ data = [{"name": "item1", "value": 1}, {"name": "item2", "value": 2}]
+ file_path = tmp_path / "array.json"
+ file_path.write_text(json.dumps(data))
+
+ result = json_parser.parse_file(file_path)
+
+ assert len(result) == 2
+ assert result[0]["name"] == "item1"
+
+ def test_parse_string(self, json_parser: JsonParser) -> None:
+ """Test parsing a JSON string directly."""
+ result = json_parser.parse_string('{"key": "value", "list": ["a", "b"]}')
+
+ assert result["key"] == "value"
+ assert result["list"] == ["a", "b"]
+
+ def test_format_summary_simple(self, json_parser: JsonParser) -> None:
+ """Test format_summary with simple data."""
+ data = {"key": "value", "number": 42}
+ summary = json_parser.format_summary(data)
+
+ assert '"key"' in summary
+ assert '"value"' in summary
+
+ def test_format_summary_truncates_arrays(self, json_parser: JsonParser) -> None:
+ """Test format_summary truncates long arrays."""
+ data = {"items": list(range(20))}
+ summary = json_parser.format_summary(data, max_array_items=3)
+
+ assert "0" in summary
+ assert "1" in summary
+ assert "2" in summary
+ assert "more items" in summary
+
+ def test_format_summary_max_depth(self, json_parser: JsonParser) -> None:
+ """Test format_summary respects max_depth."""
+ data = {"l1": {"l2": {"l3": {"l4": "deep"}}}}
+ summary = json_parser.format_summary(data, max_depth=2)
+
+ assert "l1" in summary
+ assert "l2" in summary
+ assert "..." in summary
+
+ def test_get_schema_summary(self, json_parser: JsonParser) -> None:
+ """Test schema inference."""
+ data = {
+ "name": "test",
+ "count": 42,
+ "active": True,
+ "items": [1, 2, 3],
+ "nested": {"key": "value"},
+ }
+ schema = json_parser.get_schema_summary(data)
+
+ assert schema["type"] == "object"
+ assert "properties" in schema
+ assert schema["properties"]["name"]["type"] == "string"
+ assert schema["properties"]["count"]["type"] == "integer"
+ assert schema["properties"]["active"]["type"] == "boolean"
+ assert schema["properties"]["items"]["type"] == "array"
+ assert schema["properties"]["nested"]["type"] == "object"
+
+ def test_file_not_found(self, json_parser: JsonParser) -> None:
+ """Test handling of missing file."""
+ with pytest.raises(FileNotFoundError):
+ json_parser.parse_file("/nonexistent/file.json")
+
+ def test_file_size_limit(self, json_parser: JsonParser, tmp_path: Path) -> None:
+ """Test file size limit."""
+ parser = JsonParser(max_file_size=100)
+ file_path = tmp_path / "large.json"
+ file_path.write_text('{"key": "' + "x" * 200 + '"}')
+
+ with pytest.raises(ValueError, match="exceeds size limit"):
+ parser.parse_file(file_path)
+
+ def test_invalid_json(self, json_parser: JsonParser, tmp_path: Path) -> None:
+ """Test handling of invalid JSON."""
+ file_path = tmp_path / "invalid.json"
+ file_path.write_text('{"key": invalid}')
+
+ with pytest.raises(json.JSONDecodeError):
+ json_parser.parse_file(file_path)
diff --git a/python-packages/dataing/tests/unit/core/parsing/test_log_parser.py b/python-packages/dataing/tests/unit/core/parsing/test_log_parser.py
new file mode 100644
index 000000000..58e27b9bf
--- /dev/null
+++ b/python-packages/dataing/tests/unit/core/parsing/test_log_parser.py
@@ -0,0 +1,184 @@
+"""Tests for log_parser module."""
+
+from __future__ import annotations
+
+from datetime import datetime
+from pathlib import Path
+
+import pytest
+
+from dataing.core.parsing.log_parser import LogEntry, LogLevel, LogParser
+
+
+@pytest.fixture
+def log_parser() -> LogParser:
+ """Create a log parser instance."""
+ return LogParser()
+
+
+@pytest.fixture
+def sample_log_file(tmp_path: Path) -> Path:
+ """Create a sample log file."""
+ content = """2024-01-15 10:30:45 INFO Starting application
+2024-01-15 10:30:46 DEBUG Loading configuration
+2024-01-15 10:30:47 WARNING Config file not found, using defaults
+2024-01-15 10:30:48 ERROR Failed to connect to database
+2024-01-15 10:30:49 INFO Application started successfully
+"""
+ file_path = tmp_path / "app.log"
+ file_path.write_text(content)
+ return file_path
+
+
+class TestLogLevel:
+ """Tests for LogLevel enum."""
+
+ def test_levels_exist(self) -> None:
+ """Test all expected levels exist."""
+ assert LogLevel.DEBUG.value == "debug"
+ assert LogLevel.INFO.value == "info"
+ assert LogLevel.WARNING.value == "warning"
+ assert LogLevel.ERROR.value == "error"
+ assert LogLevel.CRITICAL.value == "critical"
+ assert LogLevel.UNKNOWN.value == "unknown"
+
+
+class TestLogEntry:
+ """Tests for LogEntry dataclass."""
+
+ def test_entry_creation(self) -> None:
+ """Test creating a log entry."""
+ entry = LogEntry(
+ timestamp=datetime(2024, 1, 15, 10, 30, 45),
+ level=LogLevel.INFO,
+ message="Test message",
+ source="test_module",
+ line_number=1,
+ raw="2024-01-15 10:30:45 INFO test_module: Test message",
+ )
+
+ assert entry.timestamp.year == 2024
+ assert entry.level == LogLevel.INFO
+ assert entry.message == "Test message"
+ assert entry.source == "test_module"
+
+
+class TestLogParser:
+ """Tests for LogParser class."""
+
+ def test_parse_file(self, log_parser: LogParser, sample_log_file: Path) -> None:
+ """Test parsing a log file."""
+ entries = log_parser.parse_file(sample_log_file)
+
+ assert len(entries) == 5
+ assert entries[0].level == LogLevel.INFO
+ assert entries[2].level == LogLevel.WARNING
+ assert entries[3].level == LogLevel.ERROR
+
+ def test_parse_with_max_entries(self, log_parser: LogParser, sample_log_file: Path) -> None:
+ """Test parsing with entry limit."""
+ entries = log_parser.parse_file(sample_log_file, max_entries=2)
+
+ assert len(entries) == 2
+
+ def test_parse_with_level_filter(self, log_parser: LogParser, sample_log_file: Path) -> None:
+ """Test parsing with level filter."""
+ entries = log_parser.parse_file(sample_log_file, level_filter=LogLevel.WARNING)
+
+ # Should only get WARNING and ERROR
+ assert len(entries) == 2
+ assert all(
+ e.level in (LogLevel.WARNING, LogLevel.ERROR, LogLevel.CRITICAL) for e in entries
+ )
+
+ def test_parse_iso_timestamp(self, log_parser: LogParser, tmp_path: Path) -> None:
+ """Test parsing ISO 8601 timestamps."""
+ file_path = tmp_path / "iso.log"
+ file_path.write_text("2024-01-15T10:30:45.123Z INFO Message")
+
+ entries = log_parser.parse_file(file_path)
+
+ assert len(entries) == 1
+ assert entries[0].timestamp is not None
+ assert entries[0].timestamp.year == 2024
+
+ def test_parse_json_log(self, log_parser: LogParser, tmp_path: Path) -> None:
+ """Test parsing JSON-formatted log lines."""
+ content = '{"timestamp": "2024-01-15T10:30:45", "level": "INFO", "message": "Test"}\n'
+ file_path = tmp_path / "json.log"
+ file_path.write_text(content)
+
+ entries = log_parser.parse_file(file_path)
+
+ assert len(entries) == 1
+ assert entries[0].level == LogLevel.INFO
+ assert entries[0].message == "Test"
+
+ def test_find_errors(self, log_parser: LogParser, sample_log_file: Path) -> None:
+ """Test finding errors with context."""
+ errors = log_parser.find_errors(sample_log_file, context_lines=1)
+
+ assert len(errors) == 1
+ assert errors[0]["entry"].level == LogLevel.ERROR
+ assert len(errors[0]["context_before"]) == 1
+ assert len(errors[0]["context_after"]) == 1
+
+ def test_get_summary(self, log_parser: LogParser, sample_log_file: Path) -> None:
+ """Test log file summary."""
+ summary = log_parser.get_summary(sample_log_file)
+
+ assert summary["total_lines"] == 5
+ assert summary["level_counts"]["info"] == 2
+ assert summary["level_counts"]["debug"] == 1
+ assert summary["level_counts"]["warning"] == 1
+ assert summary["level_counts"]["error"] == 1
+ assert len(summary["sample_errors"]) == 1
+
+ def test_parse_lines_directly(self, log_parser: LogParser) -> None:
+ """Test parsing lines without file."""
+ lines = [
+ "2024-01-15 10:30:45 INFO First message",
+ "2024-01-15 10:30:46 ERROR Second message",
+ ]
+
+ entries = log_parser.parse_lines(lines)
+
+ assert len(entries) == 2
+ assert entries[0].level == LogLevel.INFO
+ assert entries[1].level == LogLevel.ERROR
+
+ def test_file_not_found(self, log_parser: LogParser) -> None:
+ """Test handling of missing file."""
+ with pytest.raises(FileNotFoundError):
+ log_parser.parse_file("/nonexistent/file.log")
+
+ def test_file_size_limit(self, log_parser: LogParser, tmp_path: Path) -> None:
+ """Test file size limit."""
+ parser = LogParser(max_file_size=100)
+ file_path = tmp_path / "large.log"
+ file_path.write_text("x" * 200)
+
+ with pytest.raises(ValueError, match="exceeds size limit"):
+ parser.parse_file(file_path)
+
+ def test_unknown_level(self, log_parser: LogParser) -> None:
+ """Test handling of unknown log level."""
+ entries = log_parser.parse_lines(["Some message without level"])
+
+ assert len(entries) == 1
+ assert entries[0].level == LogLevel.UNKNOWN
+
+ def test_multiline_support(self, log_parser: LogParser, tmp_path: Path) -> None:
+ """Test that each line is parsed independently."""
+ content = """2024-01-15 10:30:45 ERROR Exception occurred
+java.lang.NullPointerException
+ at com.example.Main.run(Main.java:42)
+2024-01-15 10:30:46 INFO Continuing execution
+"""
+ file_path = tmp_path / "multi.log"
+ file_path.write_text(content)
+
+ entries = log_parser.parse_file(file_path)
+
+ # Each line is a separate entry
+ assert len(entries) == 4
diff --git a/python-packages/dataing/tests/unit/core/parsing/test_text_parser.py b/python-packages/dataing/tests/unit/core/parsing/test_text_parser.py
new file mode 100644
index 000000000..683522f29
--- /dev/null
+++ b/python-packages/dataing/tests/unit/core/parsing/test_text_parser.py
@@ -0,0 +1,126 @@
+"""Tests for text_parser module."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from dataing.core.parsing.text_parser import TextChunk, TextParser
+
+
+@pytest.fixture
+def text_parser() -> TextParser:
+ """Create a text parser instance."""
+ return TextParser()
+
+
+@pytest.fixture
+def sample_file(tmp_path: Path) -> Path:
+ """Create a sample text file."""
+ content = "\n".join([f"Line {i}" for i in range(1, 101)])
+ file_path = tmp_path / "sample.txt"
+ file_path.write_text(content)
+ return file_path
+
+
+class TestTextParser:
+ """Tests for TextParser class."""
+
+ def test_read_entire_file(self, text_parser: TextParser, sample_file: Path) -> None:
+ """Test reading an entire file."""
+ result = text_parser.read_file(sample_file)
+
+ assert isinstance(result, TextChunk)
+ assert result.total_lines == 100
+ assert result.start_line == 1
+ assert result.end_line == 100
+ assert "Line 1" in result.content
+ assert "Line 100" in result.content
+ assert not result.truncated
+
+ def test_read_line_range(self, text_parser: TextParser, sample_file: Path) -> None:
+ """Test reading a specific line range."""
+ result = text_parser.read_file(sample_file, start_line=10, end_line=20)
+
+ assert result.start_line == 10
+ assert result.end_line == 20
+ assert "Line 10" in result.content
+ assert "Line 20" in result.content
+ assert "Line 9" not in result.content
+ assert "Line 21" not in result.content
+
+ def test_read_with_max_lines(self, text_parser: TextParser, sample_file: Path) -> None:
+ """Test reading with max_lines limit."""
+ result = text_parser.read_file(sample_file, start_line=1, max_lines=5)
+
+ assert result.start_line == 1
+ assert result.end_line == 5
+ lines = result.content.split("\n")
+ assert len(lines) == 5
+
+ def test_count_lines(self, text_parser: TextParser, sample_file: Path) -> None:
+ """Test counting lines in a file."""
+ count = text_parser.count_lines(sample_file)
+ assert count == 100
+
+ def test_search_lines(self, text_parser: TextParser, sample_file: Path) -> None:
+ """Test searching for lines."""
+ results = text_parser.search_lines(sample_file, "Line 5")
+
+ # Should match Line 5, Line 50-59 (11 total)
+ assert len(results) == 11
+ assert (5, "Line 5") in results
+ assert (50, "Line 50") in results
+
+ def test_search_case_insensitive(self, text_parser: TextParser, tmp_path: Path) -> None:
+ """Test case-insensitive search."""
+ file_path = tmp_path / "mixed_case.txt"
+ file_path.write_text("Hello World\nhello world\nHELLO WORLD")
+
+ results = text_parser.search_lines(file_path, "hello", case_sensitive=False)
+ assert len(results) == 3
+
+ results_sensitive = text_parser.search_lines(file_path, "Hello", case_sensitive=True)
+ assert len(results_sensitive) == 1
+
+ def test_search_max_results(self, text_parser: TextParser, sample_file: Path) -> None:
+ """Test search with max results limit."""
+ results = text_parser.search_lines(sample_file, "Line", max_results=5)
+ assert len(results) == 5
+
+ def test_long_line_truncation(self, text_parser: TextParser, tmp_path: Path) -> None:
+ """Test that long lines are truncated."""
+ parser = TextParser(max_line_length=50)
+ file_path = tmp_path / "long_lines.txt"
+ file_path.write_text("a" * 100 + "\nshort line")
+
+ result = parser.read_file(file_path)
+ lines = result.content.split("\n")
+
+ assert len(lines[0]) == 53 # 50 chars + "..."
+ assert lines[0].endswith("...")
+ assert result.truncated
+
+ def test_file_not_found(self, text_parser: TextParser) -> None:
+ """Test handling of missing file."""
+ with pytest.raises(FileNotFoundError):
+ text_parser.read_file("/nonexistent/file.txt")
+
+ def test_file_size_limit(self, text_parser: TextParser, tmp_path: Path) -> None:
+ """Test file size limit."""
+ parser = TextParser(max_file_size=100)
+ file_path = tmp_path / "large.txt"
+ file_path.write_text("x" * 200)
+
+ with pytest.raises(ValueError, match="exceeds size limit"):
+ parser.read_file(file_path)
+
+ def test_encoding_fallback(self, text_parser: TextParser, tmp_path: Path) -> None:
+ """Test encoding fallback for non-UTF8 files."""
+ file_path = tmp_path / "latin1.txt"
+ # Write bytes that are valid Latin-1 but not UTF-8
+ file_path.write_bytes(b"Caf\xe9")
+
+ result = text_parser.read_file(file_path)
+ assert "Caf" in result.content
diff --git a/python-packages/dataing/tests/unit/core/parsing/test_yaml_parser.py b/python-packages/dataing/tests/unit/core/parsing/test_yaml_parser.py
new file mode 100644
index 000000000..324477c35
--- /dev/null
+++ b/python-packages/dataing/tests/unit/core/parsing/test_yaml_parser.py
@@ -0,0 +1,137 @@
+"""Tests for yaml_parser module."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from dataing.core.parsing.yaml_parser import YamlParser
+
+
+@pytest.fixture
+def yaml_parser() -> YamlParser:
+ """Create a YAML parser instance."""
+ return YamlParser()
+
+
+class TestYamlParser:
+ """Tests for YamlParser class."""
+
+ def test_parse_simple_dict(self, yaml_parser: YamlParser, tmp_path: Path) -> None:
+ """Test parsing a simple dict."""
+ file_path = tmp_path / "config.yaml"
+ file_path.write_text("key: value\nnumber: 42")
+
+ result = yaml_parser.parse_file(file_path)
+
+ assert result == {"key": "value", "number": 42}
+
+ def test_parse_nested_structure(self, yaml_parser: YamlParser, tmp_path: Path) -> None:
+ """Test parsing nested structures."""
+ content = """
+database:
+ host: localhost
+ port: 5432
+ credentials:
+ username: admin
+ password: secret
+"""
+ file_path = tmp_path / "nested.yaml"
+ file_path.write_text(content)
+
+ result = yaml_parser.parse_file(file_path)
+
+ assert result["database"]["host"] == "localhost"
+ assert result["database"]["port"] == 5432
+ assert result["database"]["credentials"]["username"] == "admin"
+
+ def test_parse_list(self, yaml_parser: YamlParser, tmp_path: Path) -> None:
+ """Test parsing a list."""
+ content = """
+items:
+ - name: item1
+ value: 1
+ - name: item2
+ value: 2
+"""
+ file_path = tmp_path / "list.yaml"
+ file_path.write_text(content)
+
+ result = yaml_parser.parse_file(file_path)
+
+ assert len(result["items"]) == 2
+ assert result["items"][0]["name"] == "item1"
+
+ def test_parse_string(self, yaml_parser: YamlParser) -> None:
+ """Test parsing a YAML string directly."""
+ result = yaml_parser.parse_string("key: value\nlist:\n - a\n - b")
+
+ assert result["key"] == "value"
+ assert result["list"] == ["a", "b"]
+
+ def test_parse_multi_document(self, yaml_parser: YamlParser, tmp_path: Path) -> None:
+ """Test parsing multi-document YAML."""
+ content = """---
+doc: 1
+---
+doc: 2
+---
+doc: 3
+"""
+ file_path = tmp_path / "multi.yaml"
+ file_path.write_text(content)
+
+ result = yaml_parser.parse_file_all(file_path)
+
+ assert len(result) == 3
+ assert result[0]["doc"] == 1
+ assert result[2]["doc"] == 3
+
+ def test_format_summary_simple(self, yaml_parser: YamlParser) -> None:
+ """Test format_summary with simple data."""
+ data = {"key": "value", "number": 42}
+ summary = yaml_parser.format_summary(data)
+
+ assert "key" in summary
+ assert "'value'" in summary
+
+ def test_format_summary_nested(self, yaml_parser: YamlParser) -> None:
+ """Test format_summary with nested data."""
+ data = {
+ "level1": {
+ "level2": {
+ "level3": {"level4": "deep"},
+ },
+ },
+ }
+ summary = yaml_parser.format_summary(data, max_depth=2)
+
+ assert "level1" in summary
+ assert "level2" in summary
+ # level3 should be truncated
+ assert "..." in summary
+
+ def test_file_not_found(self, yaml_parser: YamlParser) -> None:
+ """Test handling of missing file."""
+ with pytest.raises(FileNotFoundError):
+ yaml_parser.parse_file("/nonexistent/file.yaml")
+
+ def test_file_size_limit(self, yaml_parser: YamlParser, tmp_path: Path) -> None:
+ """Test file size limit."""
+ parser = YamlParser(max_file_size=100)
+ file_path = tmp_path / "large.yaml"
+ file_path.write_text("key: " + "x" * 200)
+
+ with pytest.raises(ValueError, match="exceeds size limit"):
+ parser.parse_file(file_path)
+
+ def test_invalid_yaml(self, yaml_parser: YamlParser, tmp_path: Path) -> None:
+ """Test handling of invalid YAML."""
+ import yaml
+
+ file_path = tmp_path / "invalid.yaml"
+ file_path.write_text("key: [invalid\nbroken: yaml")
+
+ with pytest.raises(yaml.YAMLError):
+ yaml_parser.parse_file(file_path)
diff --git a/python-packages/dataing/tests/unit/entrypoints/api/routes/test_assistant.py b/python-packages/dataing/tests/unit/entrypoints/api/routes/test_assistant.py
new file mode 100644
index 000000000..d9bded55e
--- /dev/null
+++ b/python-packages/dataing/tests/unit/entrypoints/api/routes/test_assistant.py
@@ -0,0 +1,437 @@
+"""Unit tests for assistant API routes."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+from unittest.mock import AsyncMock, MagicMock, patch
+from uuid import UUID
+
+import pytest
+
+from dataing.entrypoints.api.routes.assistant import (
+ CreateSessionRequest,
+ CreateSessionResponse,
+ ExportFormat,
+ ListSessionsResponse,
+ MessageResponse,
+ MessageRole,
+ SendMessageRequest,
+ SendMessageResponse,
+ SessionDetailResponse,
+ SessionSummary,
+ router,
+)
+
+
+class TestPydanticModels:
+ """Tests for Pydantic request/response models."""
+
+ def test_create_session_request_minimal(self) -> None:
+ """Test CreateSessionRequest with minimal data."""
+ req = CreateSessionRequest()
+ assert req.parent_investigation_id is None
+ assert req.title is None
+ assert req.metadata == {}
+
+ def test_create_session_request_full(self) -> None:
+ """Test CreateSessionRequest with all fields."""
+ parent_id = UUID("12345678-1234-5678-1234-567812345678")
+ req = CreateSessionRequest(
+ parent_investigation_id=parent_id,
+ title="Debug Session",
+ metadata={"key": "value"},
+ )
+ assert req.parent_investigation_id == parent_id
+ assert req.title == "Debug Session"
+ assert req.metadata == {"key": "value"}
+
+ def test_create_session_response(self) -> None:
+ """Test CreateSessionResponse model."""
+ session_id = UUID("11111111-1111-1111-1111-111111111111")
+ investigation_id = UUID("22222222-2222-2222-2222-222222222222")
+ created = datetime.now(UTC)
+
+ resp = CreateSessionResponse(
+ session_id=session_id,
+ investigation_id=investigation_id,
+ created_at=created,
+ )
+ assert resp.session_id == session_id
+ assert resp.investigation_id == investigation_id
+ assert resp.created_at == created
+
+ def test_session_summary(self) -> None:
+ """Test SessionSummary model."""
+ session_id = UUID("11111111-1111-1111-1111-111111111111")
+ created = datetime.now(UTC)
+
+ summary = SessionSummary(
+ id=session_id,
+ title="Test Session",
+ created_at=created,
+ last_activity=created,
+ message_count=5,
+ token_count=1000,
+ )
+ assert summary.id == session_id
+ assert summary.title == "Test Session"
+ assert summary.message_count == 5
+ assert summary.token_count == 1000
+
+ def test_list_sessions_response(self) -> None:
+ """Test ListSessionsResponse model."""
+ resp = ListSessionsResponse(sessions=[])
+ assert resp.sessions == []
+
+ def test_message_response(self) -> None:
+ """Test MessageResponse model."""
+ msg_id = UUID("33333333-3333-3333-3333-333333333333")
+ created = datetime.now(UTC)
+
+ msg = MessageResponse(
+ id=msg_id,
+ role=MessageRole.USER,
+ content="Hello!",
+ created_at=created,
+ token_count=10,
+ )
+ assert msg.id == msg_id
+ assert msg.role == MessageRole.USER
+ assert msg.content == "Hello!"
+ assert msg.tool_calls is None
+ assert msg.token_count == 10
+
+ def test_message_response_with_tool_calls(self) -> None:
+ """Test MessageResponse with tool calls."""
+ msg_id = UUID("33333333-3333-3333-3333-333333333333")
+ created = datetime.now(UTC)
+ tool_calls = [{"name": "read_file", "arguments": {"path": "/test"}}]
+
+ msg = MessageResponse(
+ id=msg_id,
+ role=MessageRole.ASSISTANT,
+ content="Let me read that file.",
+ tool_calls=tool_calls,
+ created_at=created,
+ )
+ assert msg.tool_calls == tool_calls
+
+ def test_session_detail_response(self) -> None:
+ """Test SessionDetailResponse model."""
+ session_id = UUID("11111111-1111-1111-1111-111111111111")
+ investigation_id = UUID("22222222-2222-2222-2222-222222222222")
+ created = datetime.now(UTC)
+
+ resp = SessionDetailResponse(
+ id=session_id,
+ investigation_id=investigation_id,
+ title="Test",
+ created_at=created,
+ last_activity=created,
+ token_count=0,
+ messages=[],
+ )
+ assert resp.id == session_id
+ assert resp.investigation_id == investigation_id
+ assert resp.messages == []
+ assert resp.parent_investigation_id is None
+
+ def test_send_message_request(self) -> None:
+ """Test SendMessageRequest model."""
+ req = SendMessageRequest(content="Hello, assistant!")
+ assert req.content == "Hello, assistant!"
+
+ def test_send_message_request_validation(self) -> None:
+ """Test SendMessageRequest validation."""
+ # Empty content should fail
+ with pytest.raises(ValueError):
+ SendMessageRequest(content="")
+
+ def test_send_message_response(self) -> None:
+ """Test SendMessageResponse model."""
+ msg_id = UUID("33333333-3333-3333-3333-333333333333")
+ resp = SendMessageResponse(message_id=msg_id, status="processing")
+ assert resp.message_id == msg_id
+ assert resp.status == "processing"
+
+
+class TestMessageRole:
+ """Tests for MessageRole enum."""
+
+ def test_all_roles(self) -> None:
+ """Test all message roles exist."""
+ assert MessageRole.USER.value == "user"
+ assert MessageRole.ASSISTANT.value == "assistant"
+ assert MessageRole.SYSTEM.value == "system"
+ assert MessageRole.TOOL.value == "tool"
+
+
+class TestExportFormat:
+ """Tests for ExportFormat enum."""
+
+ def test_all_formats(self) -> None:
+ """Test all export formats exist."""
+ assert ExportFormat.JSON.value == "json"
+ assert ExportFormat.MARKDOWN.value == "markdown"
+
+
+class TestRouterRegistration:
+ """Tests for router configuration."""
+
+ def test_router_prefix(self) -> None:
+ """Test router has correct prefix."""
+ assert router.prefix == "/assistant"
+
+ def test_router_tags(self) -> None:
+ """Test router has correct tags."""
+ assert "assistant" in router.tags
+
+
+class TestExportSessionHelper:
+ """Tests for export functionality."""
+
+ def test_markdown_format_structure(self) -> None:
+ """Test Markdown export has correct structure."""
+ # Create a minimal session for testing
+ session = SessionDetailResponse(
+ id=UUID("11111111-1111-1111-1111-111111111111"),
+ investigation_id=UUID("22222222-2222-2222-2222-222222222222"),
+ title="Test",
+ created_at=datetime.now(UTC),
+ last_activity=datetime.now(UTC),
+ token_count=0,
+ messages=[
+ MessageResponse(
+ id=UUID("33333333-3333-3333-3333-333333333333"),
+ role=MessageRole.USER,
+ content="Hello!",
+ created_at=datetime.now(UTC),
+ ),
+ MessageResponse(
+ id=UUID("44444444-4444-4444-4444-444444444444"),
+ role=MessageRole.ASSISTANT,
+ content="Hi there!",
+ created_at=datetime.now(UTC),
+ ),
+ ],
+ )
+
+ # Build expected Markdown structure
+ lines = [
+ "# Assistant Session",
+ "",
+ f"**Session ID:** {session.id}",
+ f"**Created:** {session.created_at.isoformat()}",
+ f"**Messages:** {len(session.messages)}",
+ "",
+ "---",
+ "",
+ ]
+
+ for msg in session.messages:
+ role_label = msg.role.value.upper()
+ lines.append(f"## {role_label}")
+ lines.append("")
+ lines.append(msg.content)
+ lines.append("")
+
+ if msg.tool_calls:
+ lines.append("**Tool Calls:**")
+ for tc in msg.tool_calls:
+ lines.append(f"- `{tc.get('name', 'unknown')}`")
+ lines.append("")
+
+ lines.append("---")
+ lines.append("")
+
+ markdown = "\n".join(lines)
+
+ # Verify structure
+ assert "# Assistant Session" in markdown
+ assert "## USER" in markdown
+ assert "## ASSISTANT" in markdown
+ assert "Hello!" in markdown
+ assert "Hi there!" in markdown
+
+
+class TestHelperFunctions:
+ """Tests for helper functions."""
+
+ @pytest.mark.asyncio
+ async def test_create_investigation_for_session(self) -> None:
+ """Test create_investigation_for_session helper."""
+ from dataing.entrypoints.api.routes.assistant import (
+ create_investigation_for_session,
+ )
+
+ mock_db = AsyncMock()
+ mock_db.fetch_one.return_value = {"id": UUID("12345678-1234-5678-1234-567812345678")}
+
+ tenant_id = UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa")
+ user_id = UUID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb")
+
+ result = await create_investigation_for_session(mock_db, tenant_id, user_id)
+
+ assert result == UUID("12345678-1234-5678-1234-567812345678")
+ mock_db.fetch_one.assert_called_once()
+
+ @pytest.mark.asyncio
+ async def test_create_investigation_for_session_failure(self) -> None:
+ """Test create_investigation_for_session raises on failure."""
+ from dataing.entrypoints.api.routes.assistant import (
+ create_investigation_for_session,
+ )
+
+ mock_db = AsyncMock()
+ mock_db.fetch_one.return_value = None
+
+ tenant_id = UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa")
+ user_id = UUID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb")
+
+ with pytest.raises(RuntimeError, match="Failed to create investigation"):
+ await create_investigation_for_session(mock_db, tenant_id, user_id)
+
+ @pytest.mark.asyncio
+ async def test_get_assistant(self) -> None:
+ """Test get_assistant creates DataingAssistant."""
+ from dataing.entrypoints.api.routes.assistant import get_assistant
+
+ # Mock auth context
+ auth = MagicMock()
+ auth.tenant_id = UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa")
+
+ mock_db = AsyncMock()
+
+ # Patch the DataingAssistant and settings
+ with patch("dataing.entrypoints.api.routes.assistant.DataingAssistant") as mock_assistant:
+ with patch("dataing.entrypoints.api.routes.assistant.settings") as mock_settings:
+ mock_settings.anthropic_api_key = "test-key"
+ mock_settings.llm_model = "claude-sonnet-4-20250514"
+
+ await get_assistant(auth, mock_db)
+
+ mock_assistant.assert_called_once_with(
+ api_key="test-key",
+ tenant_id=auth.tenant_id,
+ model="claude-sonnet-4-20250514",
+ )
+
+
+class TestSSEEventTypes:
+ """Tests for SSE event type enum."""
+
+ def test_all_event_types(self) -> None:
+ """Test all SSE event types exist."""
+ from dataing.entrypoints.api.routes.assistant import SSEEventType
+
+ assert SSEEventType.TEXT.value == "text"
+ assert SSEEventType.TOOL_CALL.value == "tool_call"
+ assert SSEEventType.TOOL_RESULT.value == "tool_result"
+ assert SSEEventType.COMPLETE.value == "complete"
+ assert SSEEventType.ERROR.value == "error"
+ assert SSEEventType.HEARTBEAT.value == "heartbeat"
+
+
+class TestParentInvestigationLinking:
+ """Tests for parent/child investigation linking."""
+
+ @pytest.mark.asyncio
+ async def test_load_parent_investigation_context_no_parent(self) -> None:
+ """Test loading context when no parent investigation is linked."""
+ from dataing.entrypoints.api.routes.assistant import (
+ _load_parent_investigation_context,
+ )
+
+ mock_db = AsyncMock()
+ session_id = UUID("11111111-1111-1111-1111-111111111111")
+ tenant_id = UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa")
+
+ # Session has no parent_investigation_id
+ mock_db.fetch_one.return_value = {"parent_investigation_id": None}
+
+ result = await _load_parent_investigation_context(mock_db, session_id, tenant_id)
+
+ assert result is None
+ mock_db.fetch_one.assert_called_once()
+
+ @pytest.mark.asyncio
+ async def test_load_parent_investigation_context_session_not_found(self) -> None:
+ """Test loading context when session is not found."""
+ from dataing.entrypoints.api.routes.assistant import (
+ _load_parent_investigation_context,
+ )
+
+ mock_db = AsyncMock()
+ session_id = UUID("11111111-1111-1111-1111-111111111111")
+ tenant_id = UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa")
+
+ # Session not found
+ mock_db.fetch_one.return_value = None
+
+ result = await _load_parent_investigation_context(mock_db, session_id, tenant_id)
+
+ assert result is None
+
+ @pytest.mark.asyncio
+ async def test_load_parent_investigation_context_with_parent(self) -> None:
+ """Test loading context when parent investigation exists."""
+ from dataing.entrypoints.api.routes.assistant import (
+ _load_parent_investigation_context,
+ )
+
+ mock_db = AsyncMock()
+ session_id = UUID("11111111-1111-1111-1111-111111111111")
+ tenant_id = UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa")
+ parent_id = UUID("22222222-2222-2222-2222-222222222222")
+
+ # Session has parent_investigation_id
+ mock_db.fetch_one.side_effect = [
+ {"parent_investigation_id": parent_id},
+ {
+ "id": parent_id,
+ "dataset_id": "test_dataset",
+ "metric_name": "row_count",
+ "status": "completed",
+ "severity": "high",
+ "expected_value": 1000.0,
+ "actual_value": 500.0,
+ "deviation_pct": -50.0,
+ "anomaly_date": "2026-02-01",
+ "finding": {"root_cause": "Data missing"},
+ "events": [{"type": "step", "name": "analyze"}],
+ "metadata": {"source": "test"},
+ },
+ ]
+
+ result = await _load_parent_investigation_context(mock_db, session_id, tenant_id)
+
+ assert result is not None
+ assert "parent_investigation" in result
+ parent = result["parent_investigation"]
+ assert parent["id"] == str(parent_id)
+ assert parent["dataset_id"] == "test_dataset"
+ assert parent["metric_name"] == "row_count"
+ assert parent["status"] == "completed"
+ assert parent["finding"] == {"root_cause": "Data missing"}
+
+ @pytest.mark.asyncio
+ async def test_load_parent_investigation_context_parent_not_found(self) -> None:
+ """Test loading context when parent investigation doesn't exist."""
+ from dataing.entrypoints.api.routes.assistant import (
+ _load_parent_investigation_context,
+ )
+
+ mock_db = AsyncMock()
+ session_id = UUID("11111111-1111-1111-1111-111111111111")
+ tenant_id = UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa")
+ parent_id = UUID("22222222-2222-2222-2222-222222222222")
+
+ # Session has parent_investigation_id but investigation not found
+ mock_db.fetch_one.side_effect = [
+ {"parent_investigation_id": parent_id},
+ None, # Investigation not found
+ ]
+
+ result = await _load_parent_investigation_context(mock_db, session_id, tenant_id)
+
+ assert result is None
diff --git a/python-packages/dataing/uv.lock b/python-packages/dataing/uv.lock
index 04b29206a..a620b84ed 100644
--- a/python-packages/dataing/uv.lock
+++ b/python-packages/dataing/uv.lock
@@ -814,7 +814,7 @@ wheels = [
[[package]]
name = "dataing"
-version = "0.0.1"
+version = "1.20.0"
source = { editable = "." }
dependencies = [
{ name = "anthropic" },
@@ -822,6 +822,7 @@ dependencies = [
{ name = "bcrypt" },
{ name = "bond-agent" },
{ name = "cryptography" },
+ { name = "docker" },
{ name = "duckdb" },
{ name = "faker" },
{ name = "fastapi", extra = ["standard"] },
@@ -856,6 +857,9 @@ dev = [
{ name = "ruff" },
{ name = "testcontainers" },
]
+snapshot = [
+ { name = "pyarrow" },
+]
[package.dev-dependencies]
dev = [
@@ -871,6 +875,7 @@ requires-dist = [
{ name = "bcrypt", specifier = ">=5.0.0" },
{ name = "bond-agent", specifier = ">=0.1.2" },
{ name = "cryptography", specifier = ">=41.0.0" },
+ { name = "docker", specifier = ">=7.0.0" },
{ name = "duckdb", specifier = ">=0.9.0" },
{ name = "faker", specifier = ">=40.1.0" },
{ name = "fastapi", extras = ["standard"], specifier = ">=0.109.0" },
@@ -883,6 +888,7 @@ requires-dist = [
{ name = "opentelemetry-instrumentation-fastapi", specifier = ">=0.43b0" },
{ name = "opentelemetry-sdk", specifier = ">=1.22.0" },
{ name = "polars", specifier = ">=1.36.1" },
+ { name = "pyarrow", marker = "extra == 'snapshot'", specifier = ">=15.0.0" },
{ name = "pydantic", extras = ["email"], specifier = ">=2.5.0" },
{ name = "pydantic-ai", specifier = ">=0.0.14" },
{ name = "pyjwt", specifier = ">=2.10.1" },
@@ -901,7 +907,7 @@ requires-dist = [
{ name = "trino", specifier = ">=0.327.0" },
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.27.0" },
]
-provides-extras = ["dev"]
+provides-extras = ["dev", "snapshot"]
[package.metadata.requires-dev]
dev = [
@@ -3347,6 +3353,56 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/51/e4/b8b0a03ece72f47dce2307d36e1c34725b7223d209fc679315ffe6a4e2c3/py_key_value_shared-0.3.0-py3-none-any.whl", hash = "sha256:5b0efba7ebca08bb158b1e93afc2f07d30b8f40c2fc12ce24a4c0d84f42f9298", size = 19560, upload-time = "2025-11-17T16:50:05.954Z" },
]
+[[package]]
+name = "pyarrow"
+version = "23.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/33/ffd9c3eb087fa41dd79c3cf20c4c0ae3cdb877c4f8e1107a446006344924/pyarrow-23.0.0.tar.gz", hash = "sha256:180e3150e7edfcd182d3d9afba72f7cf19839a497cc76555a8dce998a8f67615", size = 1167185, upload-time = "2026-01-18T16:19:42.218Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/aa/c0/57fe251102ca834fee0ef69a84ad33cc0ff9d5dfc50f50b466846356ecd7/pyarrow-23.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5574d541923efcbfdf1294a2746ae3b8c2498a2dc6cd477882f6f4e7b1ac08d3", size = 34276762, upload-time = "2026-01-18T16:14:34.128Z" },
+ { url = "https://files.pythonhosted.org/packages/f8/4e/24130286548a5bc250cbed0b6bbf289a2775378a6e0e6f086ae8c68fc098/pyarrow-23.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:2ef0075c2488932e9d3c2eb3482f9459c4be629aa673b725d5e3cf18f777f8e4", size = 35821420, upload-time = "2026-01-18T16:14:40.699Z" },
+ { url = "https://files.pythonhosted.org/packages/ee/55/a869e8529d487aa2e842d6c8865eb1e2c9ec33ce2786eb91104d2c3e3f10/pyarrow-23.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:65666fc269669af1ef1c14478c52222a2aa5c907f28b68fb50a203c777e4f60c", size = 44457412, upload-time = "2026-01-18T16:14:49.051Z" },
+ { url = "https://files.pythonhosted.org/packages/36/81/1de4f0edfa9a483bbdf0082a05790bd6a20ed2169ea12a65039753be3a01/pyarrow-23.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:4d85cb6177198f3812db4788e394b757223f60d9a9f5ad6634b3e32be1525803", size = 47534285, upload-time = "2026-01-18T16:14:56.748Z" },
+ { url = "https://files.pythonhosted.org/packages/f2/04/464a052d673b5ece074518f27377861662449f3c1fdb39ce740d646fd098/pyarrow-23.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1a9ff6fa4141c24a03a1a434c63c8fa97ce70f8f36bccabc18ebba905ddf0f17", size = 48157913, upload-time = "2026-01-18T16:15:05.114Z" },
+ { url = "https://files.pythonhosted.org/packages/f4/1b/32a4de9856ee6688c670ca2def588382e573cce45241a965af04c2f61687/pyarrow-23.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:84839d060a54ae734eb60a756aeacb62885244aaa282f3c968f5972ecc7b1ecc", size = 50582529, upload-time = "2026-01-18T16:15:12.846Z" },
+ { url = "https://files.pythonhosted.org/packages/db/c7/d6581f03e9b9e44ea60b52d1750ee1a7678c484c06f939f45365a45f7eef/pyarrow-23.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a149a647dbfe928ce8830a713612aa0b16e22c64feac9d1761529778e4d4eaa5", size = 27542646, upload-time = "2026-01-18T16:15:18.89Z" },
+ { url = "https://files.pythonhosted.org/packages/3d/bd/c861d020831ee57609b73ea721a617985ece817684dc82415b0bc3e03ac3/pyarrow-23.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5961a9f646c232697c24f54d3419e69b4261ba8a8b66b0ac54a1851faffcbab8", size = 34189116, upload-time = "2026-01-18T16:15:28.054Z" },
+ { url = "https://files.pythonhosted.org/packages/8c/23/7725ad6cdcbaf6346221391e7b3eecd113684c805b0a95f32014e6fa0736/pyarrow-23.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:632b3e7c3d232f41d64e1a4a043fb82d44f8a349f339a1188c6a0dd9d2d47d8a", size = 35803831, upload-time = "2026-01-18T16:15:33.798Z" },
+ { url = "https://files.pythonhosted.org/packages/57/06/684a421543455cdc2944d6a0c2cc3425b028a4c6b90e34b35580c4899743/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:76242c846db1411f1d6c2cc3823be6b86b40567ee24493344f8226ba34a81333", size = 44436452, upload-time = "2026-01-18T16:15:41.598Z" },
+ { url = "https://files.pythonhosted.org/packages/c6/6f/8f9eb40c2328d66e8b097777ddcf38494115ff9f1b5bc9754ba46991191e/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b73519f8b52ae28127000986bf228fda781e81d3095cd2d3ece76eb5cf760e1b", size = 47557396, upload-time = "2026-01-18T16:15:51.252Z" },
+ { url = "https://files.pythonhosted.org/packages/10/6e/f08075f1472e5159553501fde2cc7bc6700944bdabe49a03f8a035ee6ccd/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:068701f6823449b1b6469120f399a1239766b117d211c5d2519d4ed5861f75de", size = 48147129, upload-time = "2026-01-18T16:16:00.299Z" },
+ { url = "https://files.pythonhosted.org/packages/7d/82/d5a680cd507deed62d141cc7f07f7944a6766fc51019f7f118e4d8ad0fb8/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1801ba947015d10e23bca9dd6ef5d0e9064a81569a89b6e9a63b59224fd060df", size = 50596642, upload-time = "2026-01-18T16:16:08.502Z" },
+ { url = "https://files.pythonhosted.org/packages/a9/26/4f29c61b3dce9fa7780303b86895ec6a0917c9af927101daaaf118fbe462/pyarrow-23.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:52265266201ec25b6839bf6bd4ea918ca6d50f31d13e1cf200b4261cd11dc25c", size = 27660628, upload-time = "2026-01-18T16:16:15.28Z" },
+ { url = "https://files.pythonhosted.org/packages/66/34/564db447d083ec7ff93e0a883a597d2f214e552823bfc178a2d0b1f2c257/pyarrow-23.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ad96a597547af7827342ffb3c503c8316e5043bb09b47a84885ce39394c96e00", size = 34184630, upload-time = "2026-01-18T16:16:22.141Z" },
+ { url = "https://files.pythonhosted.org/packages/aa/3a/3999daebcb5e6119690c92a621c4d78eef2ffba7a0a1b56386d2875fcd77/pyarrow-23.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b9edf990df77c2901e79608f08c13fbde60202334a4fcadb15c1f57bf7afee43", size = 35796820, upload-time = "2026-01-18T16:16:29.441Z" },
+ { url = "https://files.pythonhosted.org/packages/ec/ee/39195233056c6a8d0976d7d1ac1cd4fe21fb0ec534eca76bc23ef3f60e11/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36d1b5bc6ddcaff0083ceec7e2561ed61a51f49cce8be079ee8ed406acb6fdef", size = 44438735, upload-time = "2026-01-18T16:16:38.79Z" },
+ { url = "https://files.pythonhosted.org/packages/2c/41/6a7328ee493527e7afc0c88d105ecca69a3580e29f2faaeac29308369fd7/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4292b889cd224f403304ddda8b63a36e60f92911f89927ec8d98021845ea21be", size = 47557263, upload-time = "2026-01-18T16:16:46.248Z" },
+ { url = "https://files.pythonhosted.org/packages/c6/ee/34e95b21ee84db494eae60083ddb4383477b31fb1fd19fd866d794881696/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dfd9e133e60eaa847fd80530a1b89a052f09f695d0b9c34c235ea6b2e0924cf7", size = 48153529, upload-time = "2026-01-18T16:16:53.412Z" },
+ { url = "https://files.pythonhosted.org/packages/52/88/8a8d83cea30f4563efa1b7bf51d241331ee5cd1b185a7e063f5634eca415/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832141cc09fac6aab1cd3719951d23301396968de87080c57c9a7634e0ecd068", size = 50598851, upload-time = "2026-01-18T16:17:01.133Z" },
+ { url = "https://files.pythonhosted.org/packages/c6/4c/2929c4be88723ba025e7b3453047dc67e491c9422965c141d24bab6b5962/pyarrow-23.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:7a7d067c9a88faca655c71bcc30ee2782038d59c802d57950826a07f60d83c4c", size = 27577747, upload-time = "2026-01-18T16:18:02.413Z" },
+ { url = "https://files.pythonhosted.org/packages/64/52/564a61b0b82d72bd68ec3aef1adda1e3eba776f89134b9ebcb5af4b13cb6/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ce9486e0535a843cf85d990e2ec5820a47918235183a5c7b8b97ed7e92c2d47d", size = 34446038, upload-time = "2026-01-18T16:17:07.861Z" },
+ { url = "https://files.pythonhosted.org/packages/cc/c9/232d4f9855fd1de0067c8a7808a363230d223c83aeee75e0fe6eab851ba9/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:075c29aeaa685fd1182992a9ed2499c66f084ee54eea47da3eb76e125e06064c", size = 35921142, upload-time = "2026-01-18T16:17:15.401Z" },
+ { url = "https://files.pythonhosted.org/packages/96/f2/60af606a3748367b906bb82d41f0032e059f075444445d47e32a7ff1df62/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:799965a5379589510d888be3094c2296efd186a17ca1cef5b77703d4d5121f53", size = 44490374, upload-time = "2026-01-18T16:17:23.93Z" },
+ { url = "https://files.pythonhosted.org/packages/ff/2d/7731543050a678ea3a413955a2d5d80d2a642f270aa57a3cb7d5a86e3f46/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef7cac8fe6fccd8b9e7617bfac785b0371a7fe26af59463074e4882747145d40", size = 47527896, upload-time = "2026-01-18T16:17:33.393Z" },
+ { url = "https://files.pythonhosted.org/packages/5a/90/f3342553b7ac9879413aed46500f1637296f3c8222107523a43a1c08b42a/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15a414f710dc927132dd67c361f78c194447479555af57317066ee5116b90e9e", size = 48210401, upload-time = "2026-01-18T16:17:42.012Z" },
+ { url = "https://files.pythonhosted.org/packages/f3/da/9862ade205ecc46c172b6ce5038a74b5151c7401e36255f15975a45878b2/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e0d2e6915eca7d786be6a77bf227fbc06d825a75b5b5fe9bcbef121dec32685", size = 50579677, upload-time = "2026-01-18T16:17:50.241Z" },
+ { url = "https://files.pythonhosted.org/packages/c2/4c/f11f371f5d4740a5dafc2e11c76bcf42d03dfdb2d68696da97de420b6963/pyarrow-23.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4b317ea6e800b5704e5e5929acb6e2dc13e9276b708ea97a39eb8b345aa2658b", size = 27631889, upload-time = "2026-01-18T16:17:56.55Z" },
+ { url = "https://files.pythonhosted.org/packages/97/bb/15aec78bcf43a0c004067bd33eb5352836a29a49db8581fc56f2b6ca88b7/pyarrow-23.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:20b187ed9550d233a872074159f765f52f9d92973191cd4b93f293a19efbe377", size = 34213265, upload-time = "2026-01-18T16:18:07.904Z" },
+ { url = "https://files.pythonhosted.org/packages/f6/6c/deb2c594bbba41c37c5d9aa82f510376998352aa69dfcb886cb4b18ad80f/pyarrow-23.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:18ec84e839b493c3886b9b5e06861962ab4adfaeb79b81c76afbd8d84c7d5fda", size = 35819211, upload-time = "2026-01-18T16:18:13.94Z" },
+ { url = "https://files.pythonhosted.org/packages/e0/e5/ee82af693cb7b5b2b74f6524cdfede0e6ace779d7720ebca24d68b57c36b/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e438dd3f33894e34fd02b26bd12a32d30d006f5852315f611aa4add6c7fab4bc", size = 44502313, upload-time = "2026-01-18T16:18:20.367Z" },
+ { url = "https://files.pythonhosted.org/packages/9c/86/95c61ad82236495f3c31987e85135926ba3ec7f3819296b70a68d8066b49/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:a244279f240c81f135631be91146d7fa0e9e840e1dfed2aba8483eba25cd98e6", size = 47585886, upload-time = "2026-01-18T16:18:27.544Z" },
+ { url = "https://files.pythonhosted.org/packages/bb/6e/a72d901f305201802f016d015de1e05def7706fff68a1dedefef5dc7eff7/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c4692e83e42438dba512a570c6eaa42be2f8b6c0f492aea27dec54bdc495103a", size = 48207055, upload-time = "2026-01-18T16:18:35.425Z" },
+ { url = "https://files.pythonhosted.org/packages/f9/e5/5de029c537630ca18828db45c30e2a78da03675a70ac6c3528203c416fe3/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae7f30f898dfe44ea69654a35c93e8da4cef6606dc4c72394068fd95f8e9f54a", size = 50619812, upload-time = "2026-01-18T16:18:43.553Z" },
+ { url = "https://files.pythonhosted.org/packages/59/8d/2af846cd2412e67a087f5bda4a8e23dfd4ebd570f777db2e8686615dafc1/pyarrow-23.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:5b86bb649e4112fb0614294b7d0a175c7513738876b89655605ebb87c804f861", size = 28263851, upload-time = "2026-01-18T16:19:38.567Z" },
+ { url = "https://files.pythonhosted.org/packages/7b/7f/caab863e587041156f6786c52e64151b7386742c8c27140f637176e9230e/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ebc017d765d71d80a3f8584ca0566b53e40464586585ac64176115baa0ada7d3", size = 34463240, upload-time = "2026-01-18T16:18:49.755Z" },
+ { url = "https://files.pythonhosted.org/packages/c9/fa/3a5b8c86c958e83622b40865e11af0857c48ec763c11d472c87cd518283d/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:0800cc58a6d17d159df823f87ad66cefebf105b982493d4bad03ee7fab84b993", size = 35935712, upload-time = "2026-01-18T16:18:55.626Z" },
+ { url = "https://files.pythonhosted.org/packages/c5/08/17a62078fc1a53decb34a9aa79cf9009efc74d63d2422e5ade9fed2f99e3/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3a7c68c722da9bb5b0f8c10e3eae71d9825a4b429b40b32709df5d1fa55beb3d", size = 44503523, upload-time = "2026-01-18T16:19:03.958Z" },
+ { url = "https://files.pythonhosted.org/packages/cc/70/84d45c74341e798aae0323d33b7c39194e23b1abc439ceaf60a68a7a969a/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:bd5556c24622df90551063ea41f559b714aa63ca953db884cfb958559087a14e", size = 47542490, upload-time = "2026-01-18T16:19:11.208Z" },
+ { url = "https://files.pythonhosted.org/packages/61/d9/d1274b0e6f19e235de17441e53224f4716574b2ca837022d55702f24d71d/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54810f6e6afc4ffee7c2e0051b61722fbea9a4961b46192dcfae8ea12fa09059", size = 48233605, upload-time = "2026-01-18T16:19:19.544Z" },
+ { url = "https://files.pythonhosted.org/packages/39/07/e4e2d568cb57543d84482f61e510732820cddb0f47c4bb7df629abfed852/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:14de7d48052cf4b0ed174533eafa3cfe0711b8076ad70bede32cf59f744f0d7c", size = 50603979, upload-time = "2026-01-18T16:19:26.717Z" },
+ { url = "https://files.pythonhosted.org/packages/72/9c/47693463894b610f8439b2e970b82ef81e9599c757bf2049365e40ff963c/pyarrow-23.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0", size = 28338905, upload-time = "2026-01-18T16:19:32.93Z" },
+]
+
[[package]]
name = "pyasn1"
version = "0.6.1"
diff --git a/scripts/concat_files.py b/scripts/concat_files.py
index 28e295523..03bf1bb8f 100755
--- a/scripts/concat_files.py
+++ b/scripts/concat_files.py
@@ -16,11 +16,11 @@
SEARCH_PREFIXES = [
"python-packages",
- "frontend/jupyterlab-dataing/src",
+ # "frontend/jupyterlab-dataing/src",
# "frontend",
# "bond",
# "maistro",
- # "docs/feedback",
+ # "docs",
]
INCLUDE_ROOT_FILES = [
@@ -73,6 +73,8 @@
"site",
"output",
"tests",
+ "test-quickstart.sh",
+ "./docs/plans"
}
ENCODING = "utf-8"
diff --git a/uv.lock b/uv.lock
index dbeb7c286..b975236d2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -959,7 +959,7 @@ wheels = [
[[package]]
name = "dataing"
-version = "1.17.0"
+version = "1.20.0"
source = { editable = "." }
dependencies = [
{ name = "anthropic" },
@@ -1089,7 +1089,7 @@ dev = [
[[package]]
name = "dataing-cli"
-version = "1.17.0"
+version = "1.20.0"
source = { editable = "python-packages/dataing-cli" }
dependencies = [
{ name = "dataing-sdk" },
@@ -1119,7 +1119,7 @@ provides-extras = ["dev"]
[[package]]
name = "dataing-notebook"
-version = "1.17.0"
+version = "1.20.0"
source = { editable = "python-packages/dataing-notebook" }
dependencies = [
{ name = "dataing-sdk" },
@@ -1156,7 +1156,7 @@ provides-extras = ["dev", "rich", "server", "keyring", "graph"]
[[package]]
name = "dataing-sdk"
-version = "1.17.0"
+version = "1.20.0"
source = { editable = "python-packages/dataing-sdk" }
dependencies = [
{ name = "httpx" },