diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..fba4757f7 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,62 @@ +# Git +.git +.gitignore + +# Python virtual environments (including nested) +**/.venv +**/venv +**/*.egg-info +**/dist +**/build + +# Python cache (including nested) +**/__pycache__ +**/*.pyc +**/*.pyo +**/.pytest_cache +**/.mypy_cache +**/.ruff_cache +**/.coverage +**/htmlcov + +# Node (including nested) +**/node_modules +**/.npm +**/.pnpm-store + +# IDE +.idea +.vscode +*.swp +*.swo + +# Local config +.env +.env.* +!.env.example + +# Flow tracking +.flow + +# Demo fixtures (large) +demo/fixtures + +# Documentation build +site +docs/_build + +# Test artifacts +.hypothesis + +# OS files +.DS_Store +Thumbs.db + +# Logs +*.log +logs + +# Temporary +tmp +temp +*.tmp diff --git a/.flow/epics/fn-56.json b/.flow/epics/fn-56.json new file mode 100644 index 000000000..d38ed890f --- /dev/null +++ b/.flow/epics/fn-56.json @@ -0,0 +1,13 @@ +{ + "branch_name": "fn-56", + "created_at": "2026-02-02T22:00:58.309446Z", + "depends_on_epics": [], + "id": "fn-56", + "next_task": 1, + "plan_review_status": "unknown", + "plan_reviewed_at": null, + "spec_path": ".flow/specs/fn-56.md", + "status": "open", + "title": "Self-Debugging Chat Widget (Dogfooding)", + "updated_at": "2026-02-02T22:43:29.882512Z" +} diff --git a/.flow/specs/fn-56.md b/.flow/specs/fn-56.md new file mode 100644 index 000000000..6f8252ca1 --- /dev/null +++ b/.flow/specs/fn-56.md @@ -0,0 +1,278 @@ +# Dataing Assistant (fn-56) + +A unified AI assistant for Dataing that handles infrastructure debugging, data questions, and investigation support. + +## Overview + +**Problem**: Users need help with various Dataing tasks - debugging infrastructure issues, understanding data quality problems, querying connected datasources, and getting context on investigations. Currently they must use external tools or ask for human help. + +**Solution**: Persistent chat widget ("Dataing Assistant") that provides a unified AI assistant with access to: +- Local files, configs, and git history +- Docker container status and logs +- Connected datasources (reusing existing query tools) +- Investigation context and findings +- User's recent activity for contextual suggestions + +## Key Decisions (from interview) + +### Agent Configuration +- **LLM Model**: Claude Sonnet (fast, cost-effective) +- **Response time target**: First token under 3 seconds +- **Agent focus**: Balanced - explain root cause AND provide fix steps with code snippets +- **Out-of-scope handling**: Polite decline, redirect to docs +- **Tone**: Match existing Dataing UI voice + +### Tools & Capabilities (Priority Order) + +1. **File Access** + - Read any UTF-8 text file in allowlisted directories + - Smart chunking: request specific line ranges + - Grep-like search across files (max 100 results) + - Include logs, data samples (CSV/parquet first N rows) + - Centralized parsers in `core/parsing/` organized by file type + +2. **Git Access** + - Full read access via githunter tools + - blame_line, find_pr_discussion, get_file_experts + - Recent commits, branches, diffs + +3. **Docker Access** + - Container status via Docker API + - Log reading via pluggable LogProvider interface + - Auth: Configurable per deployment (socket, TCP+TLS, env auto-detect) + +4. **Log Providers** (pluggable interface) + - LocalFileLogProvider + - DockerLogProvider + - CloudWatchLogProvider (IAM role auth) + +5. **Datasource Access** + - Reuse existing query tools from investigation agents + - Full read access to connected datasources + - Unified tool registry for all capabilities + +6. **Environment Access** + - Read non-sensitive env vars (filter *SECRET*, *KEY*, *PASSWORD*, *TOKEN*) + - Compare current config with .env.example defaults + +### Security + +- **Path canonicalization** before allowlist check (prevent traversal) +- **Blocked patterns**: `.env`, `*.pem`, `*.key`, `*secret*`, `*credential*` +- **Security-blocked errors**: Suggest alternatives ("Can't read .env, but can check .env.example") +- **Security findings**: Alert immediately if exposed secrets discovered +- **Audit log**: Full log of every file read, search, and tool call +- **Tool indicators**: Show detailed progress ("Reading docker-compose.yml...") + +### Data Model + +**Debug chats are investigations** with parent/child relationships: +- Each chat session gets its own `investigation_id` +- Can be linked to existing investigations as parent OR child +- Child chats have full access to parent investigation context +- DebugChatSession model with FK to Investigation when linked + +**Storage**: Hybrid Redis/Postgres +- Recent sessions in Redis for fast access +- Old sessions archived to Postgres +- Retention: Configurable per tenant + +**Schema migration**: Add to existing migrations (013_dataing_assistant.sql) + +### User Experience + +- **Visibility**: All authenticated users (no restriction) +- **Widget position**: Fixed bottom-20 right-4 (above DemoToggle) +- **Panel width**: Resizable, remembers size per-user preference +- **Keyboard shortcut**: None for MVP +- **Markdown**: Full rendering (headers, lists, code blocks, links, tables) + +**Chat behavior**: +- Smart placeholder text with example questions +- Permanent history with session list (new sessions start fresh, can reopen old) +- Minimize to button (badge shows unread), preserves state +- Collapsible sections for long responses +- Copy code button always visible on code blocks +- Edit and resubmit previous messages + +**Streaming & errors**: +- Queue messages if user sends while response streaming +- Auto-retry 3x on errors before showing error +- Offline: Retry with exponential backoff + "Reconnecting..." indicator + +### Concurrency & Limits + +- **Message queueing**: Complete current response, then process next +- **Context limit**: Token-based, summarize when approaching model limit +- **Rate limiting**: Admin-set token budget per tenant +- **Limit exceeded**: Soft block with override for urgent issues +- **Usage display**: Always visible ("X of Y tokens used this month") + +### Context & Memory + +- **User context**: Full access to recent investigations, alerts, queries +- **Memory integration**: User confirms "This was helpful" to save to agent memory (fn-55) +- **Multi-tenancy**: Tenant isolation - each tenant gets isolated agent instance + +### Export + +- **Formats**: Both JSON and Markdown export +- **Sharing**: No sharing for MVP (export and send manually) + +### Testing & Telemetry + +- **Testing**: Unit tests with mocked LLM +- **Dry run**: No special mode, use real APIs in test environment +- **Telemetry**: Full integration with existing Dataing telemetry +- **Metrics**: Defer to later (analyze datasets first) +- **Analytics**: No query tracking (privacy-first) + +## Architecture + +### Backend Components + +``` +dataing/ + agents/ + assistant.py # DataingAssistant (was SelfDebugAgent) + tools/ + registry.py # Unified tool registry + local_files.py # File reading with safety + docker.py # Docker API access + log_providers/ + __init__.py # LogProvider protocol + local.py # LocalFileLogProvider + docker.py # DockerLogProvider + cloudwatch.py # CloudWatchLogProvider + core/ + parsing/ # Centralized file parsers + yaml_parser.py + json_parser.py + text_parser.py + log_parser.py + data_parser.py # CSV, parquet sampling + entrypoints/api/routes/ + assistant.py # API routes (was debug_chat.py) + models/ + assistant.py # DebugChatSession, DebugChatMessage +``` + +### Frontend Components + +``` +features/assistant/ + index.ts + AssistantWidget.tsx # Floating button + resizable panel + AssistantPanel.tsx # Chat interface + AssistantMessage.tsx # Message with collapsible sections + useAssistant.ts # State management hook + SessionList.tsx # Previous session selector +``` + +### Database Schema + +```sql +-- 013_dataing_assistant.sql + +CREATE TABLE assistant_sessions ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + investigation_id UUID NOT NULL, -- Each session IS an investigation + tenant_id UUID NOT NULL, + user_id UUID NOT NULL, + parent_investigation_id UUID REFERENCES investigations(id), + is_parent BOOLEAN DEFAULT false, + created_at TIMESTAMPTZ DEFAULT NOW(), + last_activity TIMESTAMPTZ DEFAULT NOW(), + token_count INTEGER DEFAULT 0, + metadata JSONB +); + +CREATE TABLE assistant_messages ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + session_id UUID REFERENCES assistant_sessions(id), + role TEXT NOT NULL, -- 'user', 'assistant', 'system', 'tool' + content TEXT NOT NULL, + tool_calls JSONB, -- For tool execution tracking + created_at TIMESTAMPTZ DEFAULT NOW(), + token_count INTEGER +); + +CREATE TABLE assistant_audit_log ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + session_id UUID REFERENCES assistant_sessions(id), + action TEXT NOT NULL, -- 'file_read', 'search', 'query', 'docker_status' + target TEXT NOT NULL, -- File path, query, etc. + result_summary TEXT, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX idx_assistant_sessions_tenant ON assistant_sessions(tenant_id); +CREATE INDEX idx_assistant_sessions_user ON assistant_sessions(user_id); +CREATE INDEX idx_assistant_messages_session ON assistant_messages(session_id); +``` + +## Quick Commands + +```bash +# Run backend +just dev-backend + +# Run frontend +just dev-frontend + +# Run tests +uv run pytest python-packages/dataing/tests/unit/agents/test_assistant.py -v + +# Generate OpenAPI client +just generate-client + +# Run migrations +just migrate +``` + +## Acceptance Criteria + +- [ ] Assistant widget visible on all authenticated pages +- [ ] Resizable panel that remembers size per-user +- [ ] Full markdown rendering with syntax-highlighted code blocks +- [ ] Copy code button on all code blocks +- [ ] Agent streams response in real-time with tool progress indicators +- [ ] Can read files from allowlisted directories with smart chunking +- [ ] Can search across files (grep-like) with result limits +- [ ] Can access git history via githunter tools +- [ ] Can check Docker container status via API +- [ ] Can read logs via pluggable LogProvider interface +- [ ] Can query connected datasources (reuses existing tools) +- [ ] Has full context of user's recent activity +- [ ] Sessions persist permanently with session history browser +- [ ] Parent/child investigation linking works +- [ ] Path traversal attempts rejected with helpful alternatives +- [ ] Security findings alert user immediately +- [ ] Full audit log of tool usage +- [ ] Token-based usage tracking with admin-set budgets +- [ ] Soft block on limit exceeded with override option +- [ ] Auto-retry 3x on errors +- [ ] "This was helpful" saves to agent memory +- [ ] Export to JSON and Markdown works + +## Tasks (Updated) + +1. **Create unified tool registry** - Central registry for all assistant tools +2. **Create centralized file parsers** - core/parsing/ module by file type +3. **Create DataingAssistant agent** - Main agent with unified tools +4. **Create log provider interface + implementations** - Pluggable log access +5. **Create Docker status tool** - Container status via Docker API +6. **Create assistant API routes** - Sessions, messages, streaming +7. **Create database migration** - 013_dataing_assistant.sql +8. **Create frontend AssistantWidget** - Resizable floating panel +9. **Create frontend AssistantPanel** - Chat UI with all features +10. **Integrate with existing query tools** - Datasource access +11. **Add investigation linking** - Parent/child relationships +12. **Add memory integration** - "This was helpful" feedback + +## References + +- Existing patterns: `agents/client.py`, `routes/investigations.py` +- Bond-agent tools: `/Users/bordumb/workspace/repositories/bond-agent/src/bond/tools/` +- SSE-starlette: https://pypi.org/project/sse-starlette/ +- shadcn/ui Sheet: https://ui.shadcn.com/docs/components/sheet diff --git a/.flow/tasks/fn-56.1.json b/.flow/tasks/fn-56.1.json new file mode 100644 index 000000000..0f13894e0 --- /dev/null +++ b/.flow/tasks/fn-56.1.json @@ -0,0 +1,28 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-02-02T23:38:26.263668Z", + "created_at": "2026-02-02T22:01:48.610812Z", + "depends_on": [ + "fn-56.7", + "fn-56.2", + "fn-56.9", + "fn-56.10" + ], + "epic": "fn-56", + "evidence": { + "files_created": [ + "python-packages/dataing/src/dataing/agents/assistant.py", + "python-packages/dataing/tests/unit/agents/test_assistant.py" + ], + "pre_commit_passed": true, + "tests_failed": 0, + "tests_passed": 22 + }, + "id": "fn-56.1", + "priority": null, + "spec_path": ".flow/tasks/fn-56.1.md", + "status": "done", + "title": "Create DataingAssistant agent (agents/assistant.py)", + "updated_at": "2026-02-02T23:41:27.015915Z" +} diff --git a/.flow/tasks/fn-56.1.md b/.flow/tasks/fn-56.1.md new file mode 100644 index 000000000..bc6f51617 --- /dev/null +++ b/.flow/tasks/fn-56.1.md @@ -0,0 +1,108 @@ +# fn-56.1 Create DataingAssistant agent + +## Description +Create the DataingAssistant agent that provides a unified AI assistant for Dataing with access to local files, Docker status, logs, git history, and connected datasources. + +## File to Create +`python-packages/dataing/src/dataing/agents/assistant.py` + +## Implementation + +```python +from bond import BondAgent, StreamHandlers +from bond.tools.github import github_toolset, GitHubAdapter +from bond.tools.githunter import githunter_toolset, GitHunterAdapter + +from dataing.agents.tools.registry import ToolRegistry, get_default_registry +from dataing.agents.tools.local_files import register_local_file_tools +from dataing.agents.tools.docker import register_docker_tools + +class DataingAssistant: + """Unified AI assistant for Dataing platform. + + Provides help with: + - Infrastructure debugging (Docker, logs, config files) + - Data questions via connected datasources + - Investigation context and findings + - Git history and code understanding + """ + + def __init__( + self, + tenant_id: str, + github_token: str | None = None, + repo_path: str = ".", + ): + # Setup tool registry with all available tools + # Create BondAgent with unified toolset + pass + + async def ask( + self, + question: str, + session_id: str | None = None, + handlers: StreamHandlers | None = None, + ) -> str: + """Ask the assistant a question.""" + pass +``` + +## Key Points +- Follow pattern from `agents/client.py:45-127` +- Use unified tool registry (`agents/tools/registry.py`) +- Include all tools: local files, Docker, logs, git +- System prompt should explain Dataing architecture (refer to CLAUDE.md content) +- Support `StreamHandlers` for real-time streaming output +- Gracefully degrade if optional dependencies unavailable +- LLM Model: Claude Sonnet (fast, cost-effective) +- Response time target: First token under 3 seconds + +## Tools to Include +1. Local file access (read, search, list) +2. Docker status (containers, health, stats) +3. Log providers (local, Docker, CloudWatch) +4. Git access (github_toolset, githunter_toolset) +5. Datasource queries (reuse from investigation agents) + +## References +- BondAgent pattern: `agents/client.py:45-127` +- Tool registry: `agents/tools/registry.py` +- Spec: `.flow/specs/fn-56.md` + +## Acceptance +- [ ] DataingAssistant class created in `agents/assistant.py` +- [ ] Uses unified tool registry with all tool categories +- [ ] System prompt includes Dataing architecture overview +- [ ] Supports StreamHandlers for real-time output +- [ ] Gracefully handles missing optional dependencies +- [ ] Unit test passes: `uv run pytest python-packages/dataing/tests/unit/agents/test_assistant.py -v` + +## Done summary +## Summary + +Created DataingAssistant agent - the main unified AI assistant for Dataing platform. + +### Features: +- **Multi-tool integration**: Local files, Docker, logs, git (bond-agent) +- **Streaming support**: Full StreamHandlers integration for real-time output +- **Multi-tenancy**: Tenant-isolated instances +- **Graceful degradation**: Works without optional dependencies (github token, etc.) + +### Tools included: +1. **File tools**: read_local_file, search_in_files, list_directory +2. **Docker tools**: list_containers, get_status, get_health, get_stats, find_unhealthy +3. **Log tools**: get_logs, search_logs, get_recent_errors +4. **Git tools**: githunter_toolset, github_toolset (from bond-agent) + +### System prompt: +- Explains Dataing platform capabilities +- Guides response format and approach +- Defines tool usage guidelines + +## Files Created +- `agents/assistant.py` - DataingAssistant class with all tools +- `tests/unit/agents/test_assistant.py` - 22 unit tests +## Evidence +- Commits: +- Tests: +- PRs: diff --git a/.flow/tasks/fn-56.10.json b/.flow/tasks/fn-56.10.json new file mode 100644 index 000000000..83f92c894 --- /dev/null +++ b/.flow/tasks/fn-56.10.json @@ -0,0 +1,25 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-02-02T23:16:17.741869Z", + "created_at": "2026-02-02T22:43:38.862577Z", + "depends_on": [ + "fn-56.7" + ], + "epic": "fn-56", + "evidence": { + "files_created": [ + "python-packages/dataing/src/dataing/agents/tools/docker.py", + "python-packages/dataing/tests/unit/agents/tools/test_docker.py" + ], + "pre_commit_passed": true, + "tests_failed": 0, + "tests_passed": 27 + }, + "id": "fn-56.10", + "priority": null, + "spec_path": ".flow/tasks/fn-56.10.md", + "status": "done", + "title": "Create Docker status tool", + "updated_at": "2026-02-02T23:22:09.352254Z" +} diff --git a/.flow/tasks/fn-56.10.md b/.flow/tasks/fn-56.10.md new file mode 100644 index 000000000..f40b9eb60 --- /dev/null +++ b/.flow/tasks/fn-56.10.md @@ -0,0 +1,43 @@ +# fn-56.10 Create Docker status tool + +## Description +TBD + +## Acceptance +- [ ] TBD + +## Done summary +## Summary + +Created Docker status tool for the Dataing Assistant with: + +1. **DockerStatusTool class** - Core functionality: + - `list_containers()` - List all containers with status + - `get_container_status()` - Detailed status for a container + - `get_container_health()` - Health check information + - `get_container_stats()` - Resource usage (CPU, memory, network) + - `find_unhealthy_containers()` - Find unhealthy/stopped containers + +2. **Agent tool functions** - Human-readable output: + - `list_docker_containers()` - Formatted container list with status indicators + - `get_docker_container_status()` - Detailed container info + - `get_docker_container_health()` - Health check results + - `get_docker_container_stats()` - Resource usage display + - `find_unhealthy_docker_containers()` - Unhealthy container report + +3. **Registry integration**: + - `register_docker_tools()` - Registers all tools with ToolRegistry + +Features: +- Async/await support for non-blocking Docker API calls +- Graceful error handling when Docker is unavailable +- Human-readable output formatting +- Emoji status indicators (🟢/🔴) for quick scanning + +## Files Created +- `agents/tools/docker.py` - Main implementation +- `tests/unit/agents/tools/test_docker.py` - 27 unit tests +## Evidence +- Commits: +- Tests: +- PRs: diff --git a/.flow/tasks/fn-56.11.json b/.flow/tasks/fn-56.11.json new file mode 100644 index 000000000..93730aca3 --- /dev/null +++ b/.flow/tasks/fn-56.11.json @@ -0,0 +1,23 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-02-02T23:43:10.542555Z", + "created_at": "2026-02-02T22:43:38.942802Z", + "depends_on": [], + "epic": "fn-56", + "evidence": { + "migration_file": "035_dataing_assistant.sql", + "tables_created": [ + "assistant_sessions", + "assistant_messages", + "assistant_audit_log" + ], + "tests_passed": true + }, + "id": "fn-56.11", + "priority": null, + "spec_path": ".flow/tasks/fn-56.11.md", + "status": "done", + "title": "Create database migration (013_dataing_assistant.sql)", + "updated_at": "2026-02-02T23:44:13.931632Z" +} diff --git a/.flow/tasks/fn-56.11.md b/.flow/tasks/fn-56.11.md new file mode 100644 index 000000000..9fa76ca4c --- /dev/null +++ b/.flow/tasks/fn-56.11.md @@ -0,0 +1,54 @@ +# fn-56.11 Create database migration (035_dataing_assistant.sql) + +## Description +Create database migration for Dataing Assistant tables: sessions, messages, and audit log. + +## File Created +`python-packages/dataing/migrations/035_dataing_assistant.sql` + +## Schema + +### assistant_sessions +- Links each session to its own investigation (investigation_id) +- Supports parent/child investigation linking +- Tracks token usage and last activity +- Stores user preferences in metadata JSONB + +### assistant_messages +- Messages in sessions (user, assistant, system, tool roles) +- Tracks tool calls as JSONB array +- Per-message token count + +### assistant_audit_log +- Audit trail of all tool usage +- Tracks action type, target, and result summary +- Full metadata in JSONB + +## Acceptance +- [x] Migration file created at `migrations/035_dataing_assistant.sql` +- [x] assistant_sessions table with investigation linking +- [x] assistant_messages table with tool_calls JSONB +- [x] assistant_audit_log table for security audit +- [x] Proper indexes for tenant, user, and session queries +- [x] Trigger to auto-update last_activity on new messages +- [x] ON DELETE CASCADE for foreign keys + +## Done summary +## Summary + +Created database migration for Dataing Assistant (035_dataing_assistant.sql). + +### Tables: +1. **assistant_sessions** - Sessions linked to investigations with parent/child support +2. **assistant_messages** - Chat messages with tool call tracking +3. **assistant_audit_log** - Security audit log for all tool usage + +### Features: +- Proper foreign keys to investigations, tenants, users +- Indexes for fast tenant/user/session queries +- Auto-update trigger for last_activity +- ON DELETE CASCADE for cleanup +## Evidence +- Commits: +- Tests: +- PRs: diff --git a/.flow/tasks/fn-56.12.json b/.flow/tasks/fn-56.12.json new file mode 100644 index 000000000..1cea8dc70 --- /dev/null +++ b/.flow/tasks/fn-56.12.json @@ -0,0 +1,23 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-02-03T00:05:42.793766Z", + "created_at": "2026-02-02T22:43:39.025602Z", + "depends_on": [ + "fn-56.6" + ], + "epic": "fn-56", + "evidence": { + "commit": "6295da89", + "frontend_updated": true, + "new_endpoint_added": true, + "parent_context_loading": true, + "tests_passing": 24 + }, + "id": "fn-56.12", + "priority": null, + "spec_path": ".flow/tasks/fn-56.12.md", + "status": "done", + "title": "Add investigation parent/child linking", + "updated_at": "2026-02-03T00:10:36.386823Z" +} diff --git a/.flow/tasks/fn-56.12.md b/.flow/tasks/fn-56.12.md new file mode 100644 index 000000000..39c804331 --- /dev/null +++ b/.flow/tasks/fn-56.12.md @@ -0,0 +1,56 @@ +# fn-56.12 Add investigation parent/child linking + +## Description +Add parent/child investigation linking so chat sessions can be linked to existing investigations and have full access to parent investigation context. + +## What Was Done + +### 1. Backend: Load parent investigation context +Added `_load_parent_investigation_context()` function in `routes/assistant.py` that: +- Loads session to check for `parent_investigation_id` +- Fetches parent investigation data (findings, events, metadata) +- Returns context dict for the assistant + +Updated `_process_message()` to call this and merge context. + +### 2. Backend: Add endpoint to list sessions by investigation +Added `GET /assistant/investigations/{investigation_id}/sessions` endpoint that: +- Lists all assistant sessions linked to an investigation as parent +- Returns same `ListSessionsResponse` format as regular list + +### 3. Frontend: Support parentInvestigationId +Updated `useAssistant.ts`: +- Added `parentInvestigationId` to `AssistantSession` interface +- Added `parentInvestigationId` to `UseAssistantOptions` +- Updated `createSession()` to accept and pass `parentInvestigationId` +- Updated `loadSession()` to load `parent_investigation_id` from API + +### 4. Regenerated OpenAPI client +Ran `just generate-client` to update frontend types. + +### 5. Added unit tests +Added 4 new tests in `test_assistant.py`: +- `test_load_parent_investigation_context_no_parent` +- `test_load_parent_investigation_context_session_not_found` +- `test_load_parent_investigation_context_with_parent` +- `test_load_parent_investigation_context_parent_not_found` + +## Acceptance +- [x] Sessions can be created with parent_investigation_id +- [x] Parent investigation context loaded when processing messages +- [x] Endpoint to list sessions by parent investigation +- [x] Frontend hook supports parentInvestigationId +- [x] OpenAPI client regenerated +- [x] Unit tests added and passing (24 total) + +## Done summary +Added parent/child investigation linking: backend loads parent context for chat, added endpoint to list sessions by investigation, frontend supports parentInvestigationId. + +### Files: +- entrypoints/api/routes/assistant.py - Added context loading + new endpoint +- features/assistant/useAssistant.ts - Added parentInvestigationId support +- tests/unit/entrypoints/api/routes/test_assistant.py - Added 4 tests +## Evidence +- Commits: +- Tests: +- PRs: diff --git a/.flow/tasks/fn-56.13.json b/.flow/tasks/fn-56.13.json new file mode 100644 index 000000000..ca39a001c --- /dev/null +++ b/.flow/tasks/fn-56.13.json @@ -0,0 +1,22 @@ +{ + "assignee": null, + "claim_note": "", + "claimed_at": null, + "created_at": "2026-02-02T22:43:39.106654Z", + "depends_on": [ + "fn-56.12" + ], + "epic": "fn-56", + "evidence": { + "commit": "0c918f6f", + "feedback_event_type_added": true, + "helpful_button_added": true, + "tests_passing": 24 + }, + "id": "fn-56.13", + "priority": null, + "spec_path": ".flow/tasks/fn-56.13.md", + "status": "done", + "title": "Add agent memory integration ('This was helpful')", + "updated_at": "2026-02-03T00:14:53.795942Z" +} diff --git a/.flow/tasks/fn-56.13.md b/.flow/tasks/fn-56.13.md new file mode 100644 index 000000000..f82fbf4fb --- /dev/null +++ b/.flow/tasks/fn-56.13.md @@ -0,0 +1,52 @@ +# fn-56.13 Add agent memory integration ('This was helpful') + +## Description +Add "This was helpful" button to assistant messages to enable agent memory integration. Users can mark helpful responses which are stored via the existing feedback system for future memory/fine-tuning use. + +## What Was Done + +### 1. Backend: Add feedback event type +- Added `FEEDBACK_ASSISTANT_MESSAGE = "feedback.assistant_message"` to EventType enum +- Added "assistant_message" target type to FeedbackCreate model +- Made investigation_id optional (assistant messages don't require one) + +### 2. Frontend: Update feedback API +- Added "assistant_message" to TargetType union +- Made investigation_id optional in FeedbackCreate interface +- Added `useSubmitFeedback()` hook for generic feedback submission + +### 3. Frontend: Add "This was helpful" button +- Updated AssistantMessage component to show thumbs up button on assistant messages +- Button only appears when message is not streaming and has content +- Shows loading state during submission +- Shows "Marked as helpful" confirmation after success + +### 4. Frontend: Wire up context +- Updated AssistantPanel to pass sessionInvestigationId to AssistantMessage +- Updated InvestigationFeedbackButtons REASON_OPTIONS with assistant_message entry + +### 5. Regenerated OpenAPI client + +## Acceptance +- [x] "This was helpful" button shows on assistant messages (not user/tool messages) +- [x] Button disabled during message streaming +- [x] Clicking button submits feedback via existing API +- [x] Shows confirmation after successful submission +- [x] Feedback stored in investigation_feedback_events table +- [x] OpenAPI client regenerated +- [x] TypeScript compilation passes +- [x] All tests pass (24) + +## Done summary +Added "This was helpful" button to assistant messages using existing feedback system. Feedback is stored and can be used for agent memory/fine-tuning. + +### Files: +- adapters/investigation_feedback/types.py - Added FEEDBACK_ASSISTANT_MESSAGE +- entrypoints/api/routes/investigation_feedback.py - Added assistant_message target +- features/assistant/AssistantMessage.tsx - Added thumbs up button +- features/assistant/AssistantPanel.tsx - Pass sessionInvestigationId +- lib/api/investigation-feedback.ts - Added target type + generic hook +## Evidence +- Commits: +- Tests: +- PRs: diff --git a/.flow/tasks/fn-56.2.json b/.flow/tasks/fn-56.2.json new file mode 100644 index 000000000..14529fb52 --- /dev/null +++ b/.flow/tasks/fn-56.2.json @@ -0,0 +1,30 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-02-02T22:58:49.042925Z", + "created_at": "2026-02-02T22:01:48.692788Z", + "depends_on": [ + "fn-56.7", + "fn-56.8" + ], + "epic": "fn-56", + "evidence": { + "commits": [ + "7775de88" + ], + "files_created": [ + "python-packages/dataing/src/dataing/agents/tools/local_files.py", + "python-packages/dataing/tests/unit/agents/tools/test_local_files.py" + ], + "test_count": 27, + "tests": [ + "tests/unit/agents/tools/test_local_files.py" + ] + }, + "id": "fn-56.2", + "priority": null, + "spec_path": ".flow/tasks/fn-56.2.md", + "status": "done", + "title": "Create local file reader tool with safety", + "updated_at": "2026-02-02T23:02:12.393355Z" +} diff --git a/.flow/tasks/fn-56.2.md b/.flow/tasks/fn-56.2.md new file mode 100644 index 000000000..8068e1839 --- /dev/null +++ b/.flow/tasks/fn-56.2.md @@ -0,0 +1,97 @@ +# fn-56.2 Create local file reader tool with safety + +## Description +Create a safe local file reader tool for the SelfDebugAgent with path allowlist and traversal protection. + +## File to Create +`python-packages/dataing/src/dataing/agents/tools/local_files.py` + +## Implementation + +```python +from pathlib import Path +from pydantic_ai import RunContext +from pydantic_ai.tools import Tool + +ALLOWED_DIRS = [ + "python-packages/", + "frontend/", + "demo/", + "docs/", +] + +ALLOWED_PATTERNS = [ + "docker-compose*.yml", + "*.md", + "justfile", + "pyproject.toml", + "package.json", +] + +BLOCKED_PATTERNS = [ + ".env", + "*.pem", + "*.key", + "*secret*", + "*credential*", + "*password*", +] + +async def read_local_file(ctx: RunContext[Path], file_path: str) -> str: + """Read a file from the Dataing repository. + + Args: + file_path: Path relative to repository root. + + Returns: + File contents (max 100KB) or error message. + """ + # 1. Canonicalize path + # 2. Check against allowlist + # 3. Check against blocklist + # 4. Read and return (truncate if >100KB) + pass + +local_files_toolset = [Tool(read_local_file)] +``` + +## Security Requirements +- MUST canonicalize paths before allowlist check (Path.resolve()) +- MUST reject any path containing `..` after resolution +- MUST reject files matching blocked patterns +- MUST limit file size to 100KB +- MUST NOT follow symlinks outside allowed directories + +## References +- Path validation pattern from practice-scout findings +- SQL validator for pattern matching: `safety/validator.py` +## Acceptance +- [ ] `local_files.py` created with `read_local_file` tool +- [ ] Path canonicalization implemented (prevents `../` traversal) +- [ ] Allowlist enforced for directories +- [ ] Blocklist enforced for sensitive files (.env, keys, secrets) +- [ ] File size limit of 100KB +- [ ] Symlinks outside allowed dirs rejected +- [ ] Returns helpful error messages for blocked paths +- [ ] Unit tests cover traversal attempts +## Done summary +Created local file reader tool with comprehensive safety features: + +**Security Features:** +- Directory allowlist: python-packages/, frontend/, demo/, docs/ +- Root file patterns: docker-compose*.yml, *.md, pyproject.toml +- Blocked patterns: .env, *.pem, *.key, *secret*, *credential* +- Path traversal prevention via canonicalization +- Symlink target validation +- File size limit (100KB) + +**Tool Functions:** +- `read_local_file` - Read file with safety checks and line-range support +- `search_in_files` - Search pattern across repository files +- `list_directory` - List files in directory + +27 unit tests covering all security requirements. +## Evidence +- Commits: 7775de88 +- Tests: tests/unit/agents/tools/test_local_files.py +- PRs: diff --git a/.flow/tasks/fn-56.3.json b/.flow/tasks/fn-56.3.json new file mode 100644 index 000000000..3e3b00b42 --- /dev/null +++ b/.flow/tasks/fn-56.3.json @@ -0,0 +1,30 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-02-02T23:44:43.250984Z", + "created_at": "2026-02-02T22:01:48.771211Z", + "depends_on": [ + "fn-56.1", + "fn-56.11" + ], + "epic": "fn-56", + "evidence": { + "endpoints": [ + "POST /sessions", + "GET /sessions", + "GET /sessions/{id}", + "POST /sessions/{id}/messages", + "GET /sessions/{id}/stream", + "DELETE /sessions/{id}", + "POST /sessions/{id}/export" + ], + "tests_failed": 0, + "tests_passed": 20 + }, + "id": "fn-56.3", + "priority": null, + "spec_path": ".flow/tasks/fn-56.3.md", + "status": "done", + "title": "Create assistant API routes with SSE streaming (routes/assistant.py)", + "updated_at": "2026-02-02T23:48:17.959030Z" +} diff --git a/.flow/tasks/fn-56.3.md b/.flow/tasks/fn-56.3.md new file mode 100644 index 000000000..c67f366c2 --- /dev/null +++ b/.flow/tasks/fn-56.3.md @@ -0,0 +1,77 @@ +# fn-56.3 Create assistant API routes with SSE streaming + +## Description +Create assistant API routes with SSE streaming support for the Dataing Assistant. + +## File Created +`python-packages/dataing/src/dataing/entrypoints/api/routes/assistant.py` + +## Endpoints + +1. `POST /assistant/sessions` - Create new session + - Returns: `{session_id: str, investigation_id: str, created_at: datetime}` + +2. `GET /assistant/sessions` - List user's sessions + - Returns: `{sessions: [{id, created_at, last_activity, message_count}]}` + +3. `GET /assistant/sessions/{session_id}` - Get session details + - Returns: Full session with messages + +4. `POST /assistant/sessions/{session_id}/messages` - Send message + - Body: `{content: str}` + - Returns: `{message_id: str, status: "processing"}` + +5. `GET /assistant/sessions/{session_id}/stream` - SSE stream + - Query: `?last_event_id=N` for resumption + - Events: `text`, `tool_call`, `tool_result`, `complete`, `error` + - Heartbeat: 15 seconds + +6. `DELETE /assistant/sessions/{session_id}` - End session + +7. `POST /assistant/sessions/{session_id}/export` - Export session + - Query: `?format=json|markdown` + +## Implementation Details + +- Uses EventSourceResponse from sse-starlette +- Streaming handlers forward text and tool calls to SSE queue +- Background task processes messages asynchronously +- Heartbeat sent every 15 seconds +- Client disconnect detection via request.is_disconnected() +- Audit logging for all tool calls +- Pydantic models for all request/response schemas + +## Acceptance +- [x] `assistant.py` created with all endpoints +- [x] Sessions linked to investigations (each session IS an investigation) +- [x] SSE streaming works with EventSourceResponse +- [x] Heartbeat sent every 15 seconds +- [x] `X-Accel-Buffering: no` header set +- [x] Client disconnect detection via `request.is_disconnected()` +- [x] Auth required on all endpoints +- [x] Pydantic models for request/response schemas +- [x] Export to JSON and Markdown formats +- [x] Router registered in routes/__init__.py +- [x] Unit tests pass: 20 tests + +## Done summary +## Summary + +Created assistant API routes with full SSE streaming support. + +### Endpoints: +1. POST/GET/DELETE /sessions - Session management +2. POST /sessions/{id}/messages - Send messages +3. GET /sessions/{id}/stream - SSE streaming +4. POST /sessions/{id}/export - Export (JSON/Markdown) + +### Features: +- Real-time streaming via EventSourceResponse +- 15-second heartbeat, client disconnect detection +- Audit logging for all tool calls +- Full Pydantic model validation +- 20 unit tests covering models and helpers +## Evidence +- Commits: +- Tests: +- PRs: diff --git a/.flow/tasks/fn-56.4.json b/.flow/tasks/fn-56.4.json new file mode 100644 index 000000000..7e52814c8 --- /dev/null +++ b/.flow/tasks/fn-56.4.json @@ -0,0 +1,21 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-02-02T23:48:53.330056Z", + "created_at": "2026-02-02T22:01:48.851384Z", + "depends_on": [ + "fn-56.3" + ], + "epic": "fn-56", + "evidence": { + "completed_in": "fn-56.3", + "dependency_injection": "per-request with tenant isolation", + "routes_registered": true + }, + "id": "fn-56.4", + "priority": null, + "spec_path": ".flow/tasks/fn-56.4.md", + "status": "done", + "title": "Register routes and add dependencies", + "updated_at": "2026-02-02T23:49:23.117059Z" +} diff --git a/.flow/tasks/fn-56.4.md b/.flow/tasks/fn-56.4.md new file mode 100644 index 000000000..7e6d0c31b --- /dev/null +++ b/.flow/tasks/fn-56.4.md @@ -0,0 +1,52 @@ +# fn-56.4 Register routes and add dependencies + +## Description +Register assistant routes and add dependency injection for DataingAssistant. + +## Status +**Completed** - Routes registered in fn-56.3, dependencies implemented in assistant.py. + +## What Was Done + +### Route Registration (`routes/__init__.py`) +```python +from dataing.entrypoints.api.routes.assistant import router as assistant_router + +api_router.include_router(assistant_router) # Dataing Assistant chat API +``` + +### Dependency Injection (`routes/assistant.py`) +The DataingAssistant is created per-request with tenant isolation: +```python +async def get_assistant( + auth: ApiKeyContext, + db: AppDatabase, +) -> DataingAssistant: + return DataingAssistant( + api_key=settings.anthropic_api_key, + tenant_id=auth.tenant_id, + model=settings.llm_model, + ) +``` + +**Note:** A singleton pattern is NOT appropriate for multi-tenancy. Each request creates +an assistant instance scoped to the authenticated tenant. This matches the pattern used +by other services like InvestigationService. + +## Acceptance +- [x] `assistant_router` imported and registered in `__init__.py` +- [x] `get_assistant()` creates per-request assistant with tenant isolation +- [x] Uses ANTHROPIC_API_KEY from settings +- [x] Uses LLM_MODEL from settings +- [x] Tenant-scoped (not singleton) for multi-tenancy + +## Done summary +## Summary + +Completed as part of fn-56.3. Routes registered in __init__.py, and get_assistant() +helper creates per-request assistants with tenant isolation. No singleton needed +for multi-tenant architecture. +## Evidence +- Commits: +- Tests: +- PRs: diff --git a/.flow/tasks/fn-56.5.json b/.flow/tasks/fn-56.5.json new file mode 100644 index 000000000..7e003c155 --- /dev/null +++ b/.flow/tasks/fn-56.5.json @@ -0,0 +1,25 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-02-02T23:49:52.928183Z", + "created_at": "2026-02-02T22:01:48.935587Z", + "depends_on": [], + "epic": "fn-56", + "evidence": { + "eslint": "pass", + "files_created": [ + "features/assistant/index.ts", + "features/assistant/AssistantWidget.tsx", + "features/assistant/AssistantPanel.tsx", + "features/assistant/AssistantMessage.tsx", + "features/assistant/useAssistant.ts" + ], + "typescript": "pass" + }, + "id": "fn-56.5", + "priority": null, + "spec_path": ".flow/tasks/fn-56.5.md", + "status": "done", + "title": "Create frontend chat widget components", + "updated_at": "2026-02-02T23:54:15.514766Z" +} diff --git a/.flow/tasks/fn-56.5.md b/.flow/tasks/fn-56.5.md new file mode 100644 index 000000000..2237c1070 --- /dev/null +++ b/.flow/tasks/fn-56.5.md @@ -0,0 +1,71 @@ +# fn-56.5 Create frontend chat widget components + +## Description +Create frontend chat widget components: floating button, slide-in panel, message list, and chat hook. + +## Files Created + +### `features/assistant/index.ts` +Exports all components and types. + +### `features/assistant/AssistantWidget.tsx` +Main widget with floating button (bottom-20 right-4 z-50) and Sheet panel. + +### `features/assistant/AssistantPanel.tsx` +Chat interface with: +- Message history (auto-scroll to bottom) +- Streaming message display +- Textarea input with send button (Enter to send, Shift+Enter for newline) +- Quick question suggestions for empty state +- Error banner with "New Chat" option + +### `features/assistant/AssistantMessage.tsx` +Message component with: +- Avatar icons (User/Bot/Tool) +- Role-based styling +- Tool call indicators +- Streaming spinner + +### `features/assistant/useAssistant.ts` +React hook for chat state: +- Session management (create, load, clear) +- Session ID persisted in localStorage +- Message history state +- SSE subscription for streaming +- Text and tool_call event handling +- Error handling with retry + +## App Integration +Updated `App.tsx` to include `` above the DemoToggle. + +## Acceptance +- [x] `features/assistant/` directory created with 5 files +- [x] Floating button positioned at `bottom-20 right-4 z-50` +- [x] Sheet opens on button click +- [x] Chat panel shows message history +- [x] Streaming messages display as they arrive +- [x] Textarea input with send button +- [x] Session ID persisted in localStorage +- [x] SSE subscription handles text, tool_call, complete events +- [x] TypeScript strict mode passes +- [x] ESLint passes +- [x] Prettier formatting applied + +## Done summary +## Summary + +Created Dataing Assistant frontend feature with floating chat widget. + +### Components: +1. **AssistantWidget** - Floating button + Sheet panel +2. **AssistantPanel** - Chat UI with input, messages, quick questions +3. **AssistantMessage** - Individual message display with streaming support +4. **useAssistant** - State hook with SSE streaming + +### Integration: +- Widget added to App.tsx +- Positioned above DemoToggle (bottom-20 right-4) +## Evidence +- Commits: +- Tests: +- PRs: diff --git a/.flow/tasks/fn-56.6.json b/.flow/tasks/fn-56.6.json new file mode 100644 index 000000000..a9399efde --- /dev/null +++ b/.flow/tasks/fn-56.6.json @@ -0,0 +1,23 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-02-02T23:56:34.348182Z", + "created_at": "2026-02-02T22:01:49.019252Z", + "depends_on": [ + "fn-56.4", + "fn-56.5" + ], + "epic": "fn-56", + "evidence": { + "api_wrapper_created": true, + "eslint_passes": true, + "openapi_generated": true, + "typescript_passes": true + }, + "id": "fn-56.6", + "priority": null, + "spec_path": ".flow/tasks/fn-56.6.md", + "status": "done", + "title": "Generate OpenAPI client and integrate widget into App", + "updated_at": "2026-02-03T00:00:16.475213Z" +} diff --git a/.flow/tasks/fn-56.6.md b/.flow/tasks/fn-56.6.md new file mode 100644 index 000000000..930a5e631 --- /dev/null +++ b/.flow/tasks/fn-56.6.md @@ -0,0 +1,55 @@ +# fn-56.6 Generate OpenAPI client and integrate widget into App + +## Description +Generate OpenAPI client and integrate AssistantWidget into App.tsx. + +## What Was Done + +### 1. Generated OpenAPI client +```bash +just generate-client +``` +This created `frontend/app/src/lib/api/generated/assistant/assistant.ts` with: +- `useCreateSessionApiV1AssistantSessionsPost` +- `useListSessionsApiV1AssistantSessionsGet` +- `useGetSessionApiV1AssistantSessionsSessionIdGet` +- `useDeleteSessionApiV1AssistantSessionsSessionIdDelete` +- `useSendMessageApiV1AssistantSessionsSessionIdMessagesPost` +- `useExportSessionApiV1AssistantSessionsSessionIdExportPost` + +### 2. Created API wrapper (`lib/api/assistant.ts`) +```typescript +export const useCreateAssistantSession = ... +export const useAssistantSessions = ... +export const assistantApi = { + createSession, getSession, sendMessage, getStreamUrl +} +``` + +### 3. Updated useAssistant hook +Refactored to use the generated API client instead of raw fetch calls. + +### 4. Widget integration (done in fn-56.5) +`` is already rendered in App.tsx above DemoToggle. + +## Acceptance +- [x] OpenAPI client regenerated: `just generate-client` +- [x] `lib/api/assistant.ts` wrapper created +- [x] `AssistantWidget` imported and rendered in App.tsx (done in fn-56.5) +- [x] Widget visible on all authenticated pages +- [x] TypeScript compilation passes +- [x] ESLint passes with no errors + +## Done summary +## Summary + +Generated OpenAPI client for assistant endpoints and created API wrapper. +Updated useAssistant hook to use generated client for type-safe API calls. + +### Files: +- lib/api/assistant.ts - API wrapper with cleaner hook names +- lib/api/generated/assistant/assistant.ts - Generated client +## Evidence +- Commits: +- Tests: +- PRs: diff --git a/.flow/tasks/fn-56.7.json b/.flow/tasks/fn-56.7.json new file mode 100644 index 000000000..946e10024 --- /dev/null +++ b/.flow/tasks/fn-56.7.json @@ -0,0 +1,28 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-02-02T22:47:40.803413Z", + "created_at": "2026-02-02T22:43:38.619900Z", + "depends_on": [], + "epic": "fn-56", + "evidence": { + "commits": [ + "82fd221a" + ], + "files_created": [ + "python-packages/dataing/src/dataing/agents/tools/__init__.py", + "python-packages/dataing/src/dataing/agents/tools/registry.py", + "python-packages/dataing/tests/unit/agents/tools/__init__.py", + "python-packages/dataing/tests/unit/agents/tools/test_registry.py" + ], + "tests": [ + "python-packages/dataing/tests/unit/agents/tools/test_registry.py" + ] + }, + "id": "fn-56.7", + "priority": null, + "spec_path": ".flow/tasks/fn-56.7.md", + "status": "done", + "title": "Create unified tool registry", + "updated_at": "2026-02-02T22:51:20.897653Z" +} diff --git a/.flow/tasks/fn-56.7.md b/.flow/tasks/fn-56.7.md new file mode 100644 index 000000000..463557bf3 --- /dev/null +++ b/.flow/tasks/fn-56.7.md @@ -0,0 +1,21 @@ +# fn-56.7 Create unified tool registry + +## Description +TBD + +## Acceptance +- [ ] TBD + +## Done summary +Created unified tool registry for Dataing Assistant: + +- `ToolCategory` enum: FILES, GIT, DOCKER, LOGS, DATASOURCE, ENVIRONMENT +- `ToolConfig` dataclass for tool metadata (name, category, description, priority) +- `TenantToolConfig` for per-tenant enable/disable overrides +- `ToolRegistry` class with methods: register, get_tool, get_tools_by_category, is_tool_enabled, get_enabled_tools, enable_tool, disable_tool +- Singleton pattern via `get_default_registry()` +- 27 unit tests covering all functionality +## Evidence +- Commits: 82fd221a +- Tests: python-packages/dataing/tests/unit/agents/tools/test_registry.py +- PRs: diff --git a/.flow/tasks/fn-56.8.json b/.flow/tasks/fn-56.8.json new file mode 100644 index 000000000..1ff6e86fd --- /dev/null +++ b/.flow/tasks/fn-56.8.json @@ -0,0 +1,35 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-02-02T22:51:34.224400Z", + "created_at": "2026-02-02T22:43:38.701427Z", + "depends_on": [], + "epic": "fn-56", + "evidence": { + "commits": [ + "4cf6b939" + ], + "files_created": [ + "python-packages/dataing/src/dataing/core/parsing/__init__.py", + "python-packages/dataing/src/dataing/core/parsing/text_parser.py", + "python-packages/dataing/src/dataing/core/parsing/yaml_parser.py", + "python-packages/dataing/src/dataing/core/parsing/json_parser.py", + "python-packages/dataing/src/dataing/core/parsing/log_parser.py", + "python-packages/dataing/src/dataing/core/parsing/data_parser.py" + ], + "test_count": 58, + "tests": [ + "tests/unit/core/parsing/test_text_parser.py", + "tests/unit/core/parsing/test_yaml_parser.py", + "tests/unit/core/parsing/test_json_parser.py", + "tests/unit/core/parsing/test_log_parser.py", + "tests/unit/core/parsing/test_data_parser.py" + ] + }, + "id": "fn-56.8", + "priority": null, + "spec_path": ".flow/tasks/fn-56.8.md", + "status": "done", + "title": "Create centralized file parsers (core/parsing/)", + "updated_at": "2026-02-02T22:58:37.329162Z" +} diff --git a/.flow/tasks/fn-56.8.md b/.flow/tasks/fn-56.8.md new file mode 100644 index 000000000..73df221f4 --- /dev/null +++ b/.flow/tasks/fn-56.8.md @@ -0,0 +1,22 @@ +# fn-56.8 Create centralized file parsers (core/parsing/) + +## Description +TBD + +## Acceptance +- [ ] TBD + +## Done summary +Created centralized file parsers in core/parsing/: + +- **TextParser**: UTF-8 text files with line-range chunking, search, and encoding fallback +- **YamlParser**: Safe YAML loading with multi-document support and format_summary for LLMs +- **JsonParser**: JSON parsing with schema inference and formatted summaries +- **LogParser**: Log files with level detection, timestamp parsing, JSON log support +- **DataParser**: CSV/Parquet sampling without full memory load + +All parsers include size limits, consistent error handling, and helpful summaries for LLM consumption. +## Evidence +- Commits: 4cf6b939 +- Tests: tests/unit/core/parsing/test_text_parser.py, tests/unit/core/parsing/test_yaml_parser.py, tests/unit/core/parsing/test_json_parser.py, tests/unit/core/parsing/test_log_parser.py, tests/unit/core/parsing/test_data_parser.py +- PRs: diff --git a/.flow/tasks/fn-56.9.json b/.flow/tasks/fn-56.9.json new file mode 100644 index 000000000..4b094b131 --- /dev/null +++ b/.flow/tasks/fn-56.9.json @@ -0,0 +1,31 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-02-02T23:02:29.684094Z", + "created_at": "2026-02-02T22:43:38.781579Z", + "depends_on": [ + "fn-56.7" + ], + "epic": "fn-56", + "evidence": { + "files_created": [ + "python-packages/dataing/src/dataing/agents/tools/log_providers/base.py", + "python-packages/dataing/src/dataing/agents/tools/log_providers/local.py", + "python-packages/dataing/src/dataing/agents/tools/log_providers/docker.py", + "python-packages/dataing/src/dataing/agents/tools/log_providers/cloudwatch.py", + "python-packages/dataing/src/dataing/agents/tools/log_providers/__init__.py", + "python-packages/dataing/tests/unit/agents/tools/log_providers/__init__.py", + "python-packages/dataing/tests/unit/agents/tools/log_providers/test_base.py", + "python-packages/dataing/tests/unit/agents/tools/log_providers/test_local.py" + ], + "pre_commit_passed": true, + "tests_failed": 0, + "tests_passed": 24 + }, + "id": "fn-56.9", + "priority": null, + "spec_path": ".flow/tasks/fn-56.9.md", + "status": "done", + "title": "Create log provider interface and implementations", + "updated_at": "2026-02-02T23:10:38.660807Z" +} diff --git a/.flow/tasks/fn-56.9.md b/.flow/tasks/fn-56.9.md new file mode 100644 index 000000000..7e7d2f3db --- /dev/null +++ b/.flow/tasks/fn-56.9.md @@ -0,0 +1,46 @@ +# fn-56.9 Create log provider interface and implementations + +## Description +TBD + +## Acceptance +- [ ] TBD + +## Done summary +## Summary + +Implemented log provider interface with three provider implementations: + +1. **LocalFileLogProvider** - Reads logs from local filesystem with: + - Pattern filtering on message AND raw line + - Time-based filtering + - Pagination support with proper truncation logic + - Rotation detection + +2. **DockerLogProvider** - Reads logs from Docker containers with: + - Container listing and status + - Log level detection + - Timestamp parsing + +3. **CloudWatchLogProvider** - Optional provider for AWS CloudWatch Logs with: + - IAM role authentication + - Log group/stream listing + - Filter patterns + +Fixed bugs: +- Pattern filter now checks both message and raw line (fixes level-only searches like "ERROR") +- Truncation logic now tracks last processed line correctly +- Added `_matches_pattern` helper for consistent search behavior + +## Files Changed +- `agents/tools/log_providers/base.py` - Protocol, base class, helper method +- `agents/tools/log_providers/local.py` - Local file provider +- `agents/tools/log_providers/docker.py` - Docker provider +- `agents/tools/log_providers/cloudwatch.py` - CloudWatch provider +- `agents/tools/log_providers/__init__.py` - Re-exports +- `tests/unit/agents/tools/log_providers/test_base.py` - Base tests +- `tests/unit/agents/tools/log_providers/test_local.py` - Local provider tests +## Evidence +- Commits: +- Tests: +- PRs: diff --git a/demo/docker-compose.demo.yml b/demo/docker-compose.demo.yml index 2f7cc04f3..4abaa0764 100644 --- a/demo/docker-compose.demo.yml +++ b/demo/docker-compose.demo.yml @@ -13,6 +13,33 @@ # 3. Run investigation on the connected datasource services: + # Override API to mount repo files for assistant file reading + api: + volumes: + # Mount repo root for assistant file reading + # PWD is set by 'just demo' or defaults to current directory + - ${PWD:-.}/demo:/repo/demo:ro + - ${PWD:-.}/python-packages:/repo/python-packages:ro + - ${PWD:-.}/frontend:/repo/frontend:ro + - ${PWD:-.}/docs:/repo/docs:ro + # Docker socket for container introspection + - /var/run/docker.sock:/var/run/docker.sock:ro + environment: + DATAING_REPO_ROOT: /repo + + # Override worker to mount repo files for assistant file reading (Temporal agent) + worker: + volumes: + # Mount repo root for assistant file reading + # PWD is set by 'just demo' or defaults to current directory + - ${PWD:-.}/demo:/repo/demo:ro + - ${PWD:-.}/python-packages:/repo/python-packages:ro + - ${PWD:-.}/frontend:/repo/frontend:ro + - ${PWD:-.}/docs:/repo/docs:ro + # Docker socket for container introspection + - /var/run/docker.sock:/var/run/docker.sock:ro + environment: + DATAING_REPO_ROOT: /repo # Demo PostgreSQL with DuckDB integration for analytics # Uses pg_duckdb extension - real PostgreSQL with DuckDB query engine # https://github.com/duckdb/pg_duckdb diff --git a/demo/fixtures/baseline/manifest.json b/demo/fixtures/baseline/manifest.json index e97f6dec0..b93ae36e9 100644 --- a/demo/fixtures/baseline/manifest.json +++ b/demo/fixtures/baseline/manifest.json @@ -1,7 +1,7 @@ { "name": "baseline", "description": "Clean e-commerce data with no anomalies", - "created_at": "2026-02-01T22:23:30.928901Z", + "created_at": "2026-02-02T02:25:55.796747Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" diff --git a/demo/fixtures/duplicates/manifest.json b/demo/fixtures/duplicates/manifest.json index e2330f230..3bf8d1b63 100644 --- a/demo/fixtures/duplicates/manifest.json +++ b/demo/fixtures/duplicates/manifest.json @@ -1,7 +1,7 @@ { "name": "duplicates", "description": "Retry logic creates duplicate order_items", - "created_at": "2026-02-01T22:23:33.696510Z", + "created_at": "2026-02-02T02:26:03.807337Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" @@ -48,56 +48,56 @@ ], "ground_truth": { "affected_order_ids": [ - "b8442c42-598a-4114-bcf6-52889371384c", - "1b25227b-007d-4597-be6c-bc9b6a234bee", - "168efc49-004b-4b2b-a5bc-54b630cd6260", - "e24fd01b-b477-4150-84d4-5ace57da0316", - "cfd604f6-6d3b-4534-a4f8-d4df9661629d", - "bddd3d2c-2739-4a4a-998e-bd4743529b00", - "41a95582-b6aa-4a95-91cb-1b6258d0618d", - "49530e30-4ba2-47d6-a236-04beb7943892", - "55f9489e-1bc1-42a8-8922-5a542bde7271", - "667853c4-0228-420d-abe9-e9024436c1f5", - "f0ac51c5-32af-48c1-bdee-4b43ca8f440f", - "c87a91fe-3e91-4d4a-b12f-902f5bd84f8c", - "9a31b301-250c-4177-8004-54e071b15c0b", - "f95009b0-bba1-4745-8761-0fc836c85147", - "789d77aa-dee5-44a4-96f9-9ff53727b1c6", - "fdfd4efe-f0da-4497-a516-28481302d0c9", - "dde5ca8b-2f9c-4506-99ea-e2167979b3b0", - "2a9da0fe-3ca8-4637-b28f-3e14ad05a79b", - "31d6f307-564e-4e2b-80c0-018d379dbfea", - "2f90592e-3543-46d7-8963-c5acb5055819", - "f03969ad-724a-4bb0-a383-176b0284dcf7", - "26593879-2a51-4a8f-8545-fe0c36f509ff", - "f11b9e5e-63ef-4c15-ab01-adf72b185ebb", - "8681ddf9-2ab6-4de4-8ee5-d289b2dea07a", - "ffaf74f5-caa2-4ace-8aff-05b32b162f4a", - "8fa94019-cfc7-4c72-9dcb-d9f5d2dcbe54", - "e1ec0dbf-d79d-4181-bb37-0967ee8f8058", - "b18ecd6c-2297-49f5-ac3f-97a3c6256332", - "7ea0bf19-de82-4adc-9353-deb1c53648e1", - "58715e9e-3abc-430d-84d0-cfb110a434a4", - "9637c1f3-0803-4f78-ac70-de7ecdb72e5d", - "6e29ee88-342a-4392-a9be-b222635ef119", - "5f4f6c8b-6a3b-4d2f-a6c4-14399efb34bc", - "0ef5d034-ed48-452b-aa7b-3db063c14bbb", - "b07a0dc3-c597-4242-9a0e-0c10331966f8", - "35d6454a-d338-4e3a-a608-06fb020a8c8c", - "d150026d-dedd-4b24-9e27-c9d1da989ef4", - "11713dbf-2abd-4179-9fde-b0772276b7da", - "757b1afa-3f0b-4598-a056-92c04788fae8", - "9bc8d67a-9a0c-485f-a442-687b49450356", - "f8767eef-23d5-4d95-b5ad-d0dc9d67686c", - "48c70fd5-01af-430e-b669-8489f6914881", - "9a724d63-9a29-486d-8c5b-95d64bad7a6a", - "d8937102-640b-47ab-a2d2-33585fef82d2", - "161b6a51-17fb-4645-ba17-f61bf1c7d4f4", - "f6afa085-f0df-43dc-97af-9a506f1fde07", - "de22257d-104a-4ba8-aa70-19cf68cc4a4d", - "74430204-f467-47c4-ad93-f4988c2dc563", - "390a483a-7552-4603-9071-1f9f4456d093", - "414180c5-3f03-453f-b303-f0e52cd51572" + "210828a2-cf31-4732-b244-d118001b8671", + "b6f178bc-e934-4456-9e2f-4ea2221de71d", + "abd40727-116d-470e-9eb6-4a03fb5f8b81", + "bfa54f05-2890-426b-88ef-0db9381321a5", + "bb4a77b6-aa6b-4584-bfa2-cb2b654c6a49", + "da203267-7066-4486-b256-a789bd113637", + "e7899412-9880-4705-84c1-f0b328fc0ee2", + "77617e2d-94c4-4005-82bf-2d25819631df", + "00a6940e-2515-4c06-8d6a-a72bd74fa36d", + "c47d0cb5-0e8a-4c92-b5d4-16e41da15362", + "ab93e414-d5c3-4635-ad93-b1fa5af23b69", + "cb842dcf-4fff-4289-92ff-f71f0157f349", + "5e6f01cc-6c4c-44f3-8a28-909605eeeeb8", + "78e980dc-23a0-4b31-a3af-084dc6a656fa", + "95596b6d-f086-4e04-802c-34af0f689ff5", + "0d0bb801-7185-4b8e-8623-a83c923a37fa", + "c4274b7a-02f0-4806-b710-f7e3be2ca8c0", + "a64350ad-6f50-4926-a4e3-871312749ff7", + "47c3b420-c831-4b73-9038-640477590c7e", + "7d23cc3b-825a-45a2-97e2-41e2fe55dc92", + "141229c1-dfcb-4f06-8d01-db0108252395", + "4838c36f-a7e7-48dd-8f9e-cdd69ce344f1", + "1984c8c9-e7f0-4241-8df0-13cf5f5c9481", + "f9c372ee-8174-4b60-8661-d394d0f4b6f2", + "81ec17e1-83c1-4eb3-9bb3-9e5045535a4a", + "7d4c2f77-7b18-4474-828c-0120a272b415", + "77684a4d-6d03-4207-a4b6-e5075a8416e7", + "088413e2-04e5-4b75-ac86-15e09ccd0d74", + "2a57a25b-6681-4d10-b6dd-23b4f6557007", + "d26d2720-bb2c-4532-b640-40d84eb537a7", + "7c325af6-f815-468b-b531-ca8809b0d340", + "4b921b18-0031-4957-abd0-dd2724f9e043", + "0b23e3a5-d05d-4d0f-8eb7-3251921cc803", + "d1ec0fb4-5830-441c-b51c-db3560a10002", + "39ce1cdb-b977-494e-92dd-78d0d28116f4", + "d908cdbf-db64-40e9-ab70-e9374372f39e", + "fffb4922-0388-4247-bc1c-3dc6a7a05d3e", + "9f381026-a7e0-413a-bd4c-5ef6db25a6f8", + "b9dc9cbd-36f5-4dfe-8308-da1067f9ce54", + "21d35a2c-9b39-401b-aea4-9edb9c472887", + "f2a70a85-dc8e-4c63-8905-4ab39682657b", + "f1d357d8-4dfe-44f0-971b-155875bd8ad2", + "1a133370-f77a-4467-8889-ebce02435d28", + "4a60d818-eb9c-4d79-b6f5-50ae30571a4b", + "e4289200-a9e7-47f4-a196-369a51ef06f6", + "60f6a3e8-c696-4740-ab4a-9f895841d1f5", + "9aef6619-c372-402d-b144-159040e1fbee", + "059d0f48-35b7-4b6a-8b85-3596f497f8e7", + "8b3069e5-e88b-42a0-83e8-ad8e3df9aa0c", + "5bd4a4df-050a-4418-ba95-a7dc57994d6c" ], "affected_order_count": 81, "duplicate_items": 84 diff --git a/demo/fixtures/late_arriving/manifest.json b/demo/fixtures/late_arriving/manifest.json index dcbbea14a..4d91e01bc 100644 --- a/demo/fixtures/late_arriving/manifest.json +++ b/demo/fixtures/late_arriving/manifest.json @@ -1,7 +1,7 @@ { "name": "late_arriving", "description": "Mobile app queues events offline, batch uploaded later", - "created_at": "2026-02-01T22:23:35.087147Z", + "created_at": "2026-02-02T02:26:06.908134Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" diff --git a/demo/fixtures/null_spike/manifest.json b/demo/fixtures/null_spike/manifest.json index 8bd789ee7..c417358ce 100644 --- a/demo/fixtures/null_spike/manifest.json +++ b/demo/fixtures/null_spike/manifest.json @@ -1,7 +1,7 @@ { "name": "null_spike", "description": "Mobile app bug causes NULL user_id in orders", - "created_at": "2026-02-01T22:23:31.794763Z", + "created_at": "2026-02-02T02:25:57.814319Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" @@ -64,106 +64,106 @@ ], "ground_truth": { "affected_order_ids": [ - "b688ab17-46f9-4441-9714-ed69c62db62d", - "222a798c-b5a5-4d3d-bed6-d03ecbf88285", - "3c766d1d-fa1e-4118-8f59-aff2d9b1ffe5", - "27d1c43a-6243-40e0-9a86-4e1dc9eb807c", - "c9e1f49d-cae6-475a-8568-1d0f7acc8be5", - "09e35fe5-6b9c-43c8-a6f6-2149f5895992", - "e38eecc9-e24f-4b52-b1d2-e592328f3962", - "6324d705-110a-4d02-89ef-c8225a739ffc", - "6a48cd61-49c4-4ab2-9c06-c1ca5cb634e1", - "3ea2bcc5-e695-431b-871c-972083904ba6", - "ff9cbb73-c129-4b49-ae59-f3e4d0f074f6", - "e7899ea9-6527-42fd-86fb-7a519e276245", - "89c9a5af-7d5e-451d-8c0a-e37ac69d1c51", - "59e97154-9794-4b54-95ce-9bbc875ab4a0", - "48afa17a-70be-46c4-920c-d2ee8f7e78d0", - "7f3ae3aa-c108-4045-aa3b-28da46c0990d", - "dff5a6c5-6ffb-43bd-b5bd-c933d7a0abd5", - "4f684893-5ba3-4b54-b335-295450dc3e72", - "49384b3e-9a7e-403f-a671-b73a0c00696f", - "94025178-7389-4b9b-8c1f-782b0a79bf16", - "71018331-e4cc-40e1-97c5-6f8edaa25394", - "dd28754d-d3c5-4957-bde7-2038ff8900e5", - "00e73881-17ef-4459-9a0d-2ffa3f6c72ef", - "c4564a14-fb0c-4fa2-b9be-0b6409b89915", - "29c9f54a-8a89-4509-b130-553c2291ede8", - "bc12695b-e6ee-4aaf-9925-d07bf09466b1", - "8a7e3ae2-0d7c-48f5-b4bc-b46840ffc2ed", - "3aa27b1f-b7df-467a-bfe7-6d81e59ba722", - "ad05e20e-52b6-4883-a855-2f3430ea9ed1", - "76a5423c-04c4-4566-8978-8e920a8c623a", - "62cd2cea-3234-40fb-9813-f44712af540f", - "f2af95c4-5929-4d96-a341-64218f9d90fb", - "0f269ed5-95df-4d89-a891-f8058a24ad64", - "8908ce10-6900-4663-80f2-5582dbcdab0e", - "4f0adc62-a433-4812-ad2b-9ba2ba60b215", - "de8f8781-1e12-4f14-87df-948ae0e55eb3", - "1b5686e2-e7a6-4094-b44d-a91cb6423178", - "557eab97-80b1-4649-8426-a0a8abc7ad08", - "c4f3a4d4-7431-40d8-aeb5-029c212316b0", - "65c18152-0826-4acc-bbc1-9eb754d7b149", - "f235c3ef-8bc5-470b-98ff-97f6c9a2e540", - "e857bc39-17d0-45bb-96b6-dd7bcfdeac1f", - "424d79fb-5412-4c62-9c26-518b290bd627", - "edf4e95b-669c-438a-a568-5afb0f002e58", - "b8e19ef8-113b-4f86-8281-30f6656e0cc0", - "af723078-ecd2-4dfa-8648-93769e203728", - "b722e3a8-9a8c-47c3-9479-fe6fcb5ae39d", - "8527c3aa-dd32-4055-9360-5a452d08e953", - "6aa1ff4f-2920-44ab-bbed-f12ece875821", - "978fee78-489b-40b6-b198-bb582538bd02", - "69194003-e741-4964-915d-b8a84e805224", - "320449db-fd32-405a-a1ac-b5d0a801f959", - "116cfa9e-e96c-47e5-8674-315a8cd93e00", - "1ac0619c-a463-4302-b895-545743550634", - "0a8b3da0-8eaf-4457-9aa9-1683617e7eb5", - "2b43676b-b8e8-46e3-89d9-e2b623b4796c", - "569a5f66-aba7-40fc-b5bc-54efb28da82e", - "806e6135-579e-41fd-b1cd-5a05d5162a01", - "5d7ceaf1-0f26-46e2-b910-f9e88ddf514c", - "8c250f6e-16e1-41f6-87f9-ff9987aa0f4a", - "8ba2eab3-cfeb-44f6-b1c4-972919faeedb", - "0b2e78d1-e4cb-46a1-90b3-5371330ff500", - "eb657602-843b-4488-b04d-1f27f1a72854", - "6356d69c-57af-4edf-b493-616e53d26de6", - "55c3d05a-5896-4497-b192-69e77bc45e7a", - "df9d0556-647b-4acd-b51d-1e134d13be2e", - "19f4c128-b849-42a8-a0ef-2f0bc9140044", - "0223b211-6249-48d8-ae4e-ed4e6323818a", - "9bef1760-70de-43e4-94d1-f9768e0dbd3b", - "3742955d-9290-4464-8cf0-cbf8f9055e6c", - "67e6d006-f3c4-4a8f-b85b-51679e965680", - "1fa4f148-607f-4ff7-96a5-2616425e4d14", - "42790d83-f00a-42db-a485-c101030bfe45", - "b6162c76-943e-4361-85f6-228c6f77525f", - "293d37e0-7808-4bcd-a6ab-ee72c6e991c2", - "a29e6502-8952-4043-a5c2-9b8326415659", - "ca231126-041d-4289-bbad-c616d7a64b51", - "9231416a-46ec-4178-a02d-8273824055a8", - "3675185b-5116-4a0d-a29b-a54b7e436bf3", - "c613442c-0821-4681-8bc9-a65e0578b933", - "6ff71e5f-4ff4-4524-97fb-88b7ce8f8b29", - "c571f97e-7483-411f-a00d-b0dfd546eed1", - "83df91c2-ce90-4cd5-8ae3-330576dd6dea", - "a5c4c809-eb89-4e20-a989-0ba4989779e8", - "4d3bf484-0498-4b91-832f-400890defdf9", - "14d0d13f-b789-43c3-81a7-d3ebb942f35c", - "cd087048-1ab4-4411-8354-75d7ee75cc90", - "5d27fa58-25fe-4fe0-acf5-49e7ec42019d", - "3d9e434e-9b4b-4174-93b5-c31aadd98983", - "d385975e-5d63-4f5c-9ec9-53792718800c", - "296a2f2d-6f52-4461-8dbe-cb34fd6df659", - "64a97d60-9fe2-4433-b433-ad713a160980", - "908a2cc4-c75a-4566-ac61-6b98feafd4c8", - "21bbe94a-52e3-4a28-a7c8-22056b824a92", - "a2c1c751-e151-454b-87d2-09583e7da070", - "f2cf8ef5-4c48-4e8b-98f1-38ea3bcede86", - "a145f4b5-cac3-45fa-8634-dc2420f0bb42", - "da60fd06-934e-4e7b-82cf-6fe10993c3f4", - "9d004671-afb1-412f-a7cf-bc5c51029b15", - "74dcfdba-7ed3-4da6-8d74-247c7a091d5f" + "9bd62fb1-efe9-4672-94af-657a23deaffc", + "ef48fc1e-718b-42bc-998b-64958ce7ba68", + "0cf82c21-258f-44ef-a643-f1ed0c9accb9", + "5e2b078c-fea1-4a9c-bcb5-66133c93bad5", + "a8222d7a-bc0d-4e4d-97de-baad8222e4dc", + "86753645-6595-4106-9768-1901aa531583", + "122e39ca-0c7b-4a1a-a308-62c3de0610cb", + "1f3f39fa-fc36-4365-b5dc-63c7546af5f3", + "acb53253-e005-4b21-93e2-c77dc967d27e", + "65970a72-93b1-4723-a248-d8908a9b1ba5", + "fccec367-5a4e-4d13-830b-ae1c021d4ca5", + "68a396f2-80bf-4dd8-8d99-bceef1c5dee1", + "99fd2506-9ca6-42fa-b45a-31b702775806", + "6e3c1b5a-59f6-4994-8a1c-9127dbbb77d6", + "c1d13afa-2af0-426e-8852-d9a1558318a8", + "70f1f2fd-0ba7-4319-93f6-b8232758fe8d", + "8f52e185-64ad-40b5-bc11-1966a53f32cf", + "eff83042-5937-4147-a080-488c422c102f", + "6839db6f-8fad-4547-b53c-d4270bf6dc95", + "9be0b9d6-43a0-4251-b2d8-9f2d98d6ad80", + "654e583d-aaf9-4d08-868c-685a1d540b2b", + "ddc8e975-daa3-44a6-81a9-d81787d6a77c", + "c784495e-005b-485b-88e4-191b1e59a2cb", + "6742c4ed-edd7-45aa-a67a-8908b5e71604", + "51fbaeac-3504-46e6-9cbb-bbba57c452dd", + "2a40a796-4fd0-49ae-9321-d9368349d85e", + "239a5569-ca70-4966-bc8c-840b824a5763", + "37b186e4-2a3c-4bb9-8e8b-2da002cbc4b9", + "ae8b1506-e856-43dd-b798-b94bf56ed133", + "8afd203d-cd2e-48f3-bcd9-045e693763b5", + "3cd56a07-ad06-4a8c-a05b-fe913fe557b9", + "c052b5c4-169f-4a0a-84ae-e315e73f4f8d", + "f43debe2-2a66-4ca5-98f7-ec8921d481d8", + "d347a7bb-f83e-42e0-8b7f-43f74156c521", + "4900c072-2e53-49cb-b970-be6b051f2bd6", + "a3131b98-f020-455a-b859-b3a2c43ce51e", + "3ba52b84-7c59-4172-8f17-eeac832c3cf9", + "e570db42-32d5-492a-abb4-1b8322891b55", + "19aa9bf5-5727-4e07-85a5-5ded73062a7c", + "ade3d6b4-80ab-4608-934e-f00f5d1a79cc", + "cf743945-7a90-4c96-b539-6b7f9dfe163f", + "d0593a0f-b41d-4ba0-8f66-15e79136d608", + "848c22b7-1eae-41f8-8a41-db57ccbfd86f", + "97a8e835-c8cb-46a4-be64-1f89c40cdd0e", + "a4b7487c-8ca2-4a42-8c39-eba72bda48e6", + "18022d03-f0cf-4169-856a-ac4c11c92dc0", + "20dbc45a-532e-4770-aacb-5a1e545c6742", + "2ba9c0b7-32c2-46b4-bec5-b8ccd9cd9613", + "df16319d-ee71-428e-a85d-165c8f179dd2", + "60d83e70-cd35-4883-b232-e4cdda3258f3", + "2f4118be-8198-4cfe-8eb6-550e5093ba10", + "a6e85353-a741-4f9f-b598-65a17de11d1b", + "ed3e934a-a704-4a1e-a300-60950f4147b7", + "1555adfa-1155-462e-9c57-5d5124eebe47", + "84ebe0b1-a19e-496d-a30a-18604b83e5b8", + "54ab361a-6c89-41f7-9da5-45923214d881", + "13eb13b8-2f26-4d03-a68a-02193e6a8dd7", + "fe546de6-3a27-4255-bad8-ceb5097c6573", + "f9b815e9-c6c7-44e2-baf3-7785f4a83c4d", + "ad03898b-589b-4eeb-bd3f-9a3402821221", + "8c22d7f1-40f8-4a41-b77c-5905378331c8", + "e01f686e-db2d-4ad5-8916-355ed7dc32d4", + "56cdd4ec-9ad5-40e5-ba94-11faf13731dd", + "18e9f574-045b-442e-8731-f1ddb981ca02", + "eeeb2ed4-832e-48d8-b2f3-4231f7b57788", + "19150f30-6259-429a-8b60-961bcb28b524", + "c5e9553b-03d5-4cbb-925d-fc7bf7c9912f", + "aee23b63-5ff9-432c-b5d2-c71b6064a56a", + "b7631907-40b8-46d8-840c-53e45750bb11", + "8a2c9773-ea61-4ce6-934e-86a2ae9c0e27", + "acb8b7f2-411f-4b34-a96f-4c21d0f9bf1f", + "8c481c3b-1beb-4851-b266-9bdb053fdde7", + "2b9f6c73-4bda-4bab-bc9f-a00fb7b55176", + "d5b90899-654b-4262-8263-c9ddd075747c", + "438c46c3-13fb-488d-a4c5-1e4b7b5a4965", + "70954d3d-62ae-4e5f-8b78-e05ee2f04f78", + "4dd4e464-5d86-4875-ad5c-782b669c86d8", + "5d637246-b288-46c6-80c2-f4145d6a3e87", + "2adca569-6cee-40e9-930b-868560712462", + "e0314784-cb41-44b6-82f5-4a28edb313a6", + "9f2325f8-51bb-4466-8d4e-9d7561752fc6", + "11a76a1a-a2e3-4da8-837b-a249888ea05b", + "0ae5cb9f-89d0-4d1e-981c-20100692616d", + "8e3867bc-24aa-439a-8bfe-feb0ec9b70d0", + "809b331b-82c0-4470-9cc9-b925a3fecb02", + "cfb73e92-34d6-4ef9-942e-f3383f899e5d", + "37e411de-7851-4e1c-b572-e6b1b533f75b", + "5e495da2-f966-472c-8d29-80d818e6b42b", + "8e90f34c-b735-4754-a84a-c9ef7e9c9e8b", + "927720e4-80b1-484d-9e97-5f611af1bb05", + "5efc5a36-814d-454a-8f8c-3e0273e48227", + "1a018694-4f7d-4987-aa41-6de78e6c7250", + "0a7e999a-602c-4203-8d53-3779cc46e3e7", + "0082d202-ab71-452d-9fdd-3a6ba372ddd4", + "45e56db9-a4c9-41b3-9a58-db5926daefd2", + "8fb1b1fe-78b8-4687-bdec-e4b5889a0413", + "ac69f775-2cb7-4baa-b637-23f3e8dd6dd1", + "ed90fe9c-4c78-42c7-9102-bbdde5015f95", + "b96e5a21-73b7-487b-9148-85ebd775b374", + "1faff227-b173-4ff2-83cb-5c6e2bd49ad0" ], "affected_row_count": 304 } diff --git a/demo/fixtures/orphaned_records/manifest.json b/demo/fixtures/orphaned_records/manifest.json index 292e9423a..7ec95f2de 100644 --- a/demo/fixtures/orphaned_records/manifest.json +++ b/demo/fixtures/orphaned_records/manifest.json @@ -1,7 +1,7 @@ { "name": "orphaned_records", "description": "User deletion job ran before order archival", - "created_at": "2026-02-01T22:23:35.954937Z", + "created_at": "2026-02-02T02:26:09.522005Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" @@ -48,44 +48,44 @@ ], "ground_truth": { "affected_order_ids": [ - "da60fd06-934e-4e7b-82cf-6fe10993c3f4", - "f48e9c02-a154-44c4-be8d-83be2adf9791", - "088556ad-c2e2-43a9-91f5-98220167710b", - "0a7b4efb-933f-4690-8fa4-7e38b9e2523c", - "e894de0d-e0fd-4af6-b7a3-365d844a6bd9", - "8a9e718b-9745-4a6b-af57-9a9a6bb44c8f", - "69ca8fa6-715f-49ca-8f04-007cfe49d5f3", - "3eaecfe8-8946-4ecc-81d8-c84def48094b", - "07fb3b19-2463-4f4f-835d-e05280df408a", - "80fd10af-17c7-4f07-9cb9-e0586fe2e87f", - "6efb50a0-c622-4367-8b37-5de11a79ceb4", - "811e2c5f-5b3c-43a3-9f59-193005faf910", - "237d5d99-ce5a-4bf1-9740-3be74fe04498", - "cb06073d-fde6-4b66-b161-864d04ecf496", - "20770f6f-ea61-479d-93e1-7ecf877a653e", - "4b6bcabe-c247-4f19-8fba-ee1903228987", - "c4855fc4-21ae-4ae8-baeb-2888043f9280", - "f20f78f1-1d54-423a-8bf0-0e62d7de5acf", - "813b3b55-6f3a-4a11-b8e4-cab53331fb89", - "78c816c6-35a2-4ce9-9976-d3685de02518", - "dc27e472-458a-436f-9ca7-dc24ecbe3acc", - "cfd19557-6df9-427c-ba68-887c909f2e41", - "539161d8-bcce-4664-bca1-f6b150a1af09", - "6ed313ec-ea56-4c74-9c9a-fb0717454031", - "99bcc15c-ca5d-4656-8c5d-f949810204ea", - "fb682c38-f874-4528-8c60-e686b02fd782", - "27e72667-dbcf-4daf-8113-540c4607dc9c", - "18844ca9-124a-4ffe-b857-afe371f34d59", - "716defa3-7ad1-49ce-9d84-6f7f9e8f6ff3", - "c084db0b-71b8-4242-8c8a-d47ea031145a", - "b5dd14e0-1641-4664-8e77-adbbe151a8a3", - "2c59763e-d176-4b40-a376-a26ae9f0b86b", - "1ff349e3-8d74-4278-8424-a7d5f48e9842", - "88b0130b-03b2-4114-891b-754529210fd4", - "5b13771b-dd25-4afe-a23d-7cef201b87c3", - "91c0d63f-8250-4a1a-afa2-c0ad01d4bd5b", - "3d17bdea-8ef5-4152-acaa-22430762d7ab", - "23dd14a0-8e5e-42cd-96c6-eadde927f469" + "ed90fe9c-4c78-42c7-9102-bbdde5015f95", + "1e08ed9b-9a8b-4ff0-b1ec-e7d8b6743a92", + "2b5b9226-04b1-41ec-a185-40ba085c720c", + "4768cffb-b740-4673-9216-b6a932521bde", + "3452c22e-811a-465b-aa2f-046ae68fb674", + "350c8353-c387-4b3a-a048-1f9e9a4a6d14", + "5c14f1b6-e2d8-4b73-b30e-18322d6783a7", + "1d80e09e-69a2-4840-8f50-77ed05e75129", + "ccf07bfb-ed01-49d7-9238-0bfa9c57dc57", + "4e23b6ef-ec84-4aab-959f-75911f8c00d4", + "f837445b-2ae1-48b1-a0cb-1863d0dbfaf7", + "8c1f1264-53fd-4e40-b1d3-1d91bbb88fa0", + "660a8f4e-0750-4bb0-bef8-8ad54efb401f", + "bc16792c-a66f-4ff5-a157-41073423e9bb", + "81144d4b-2e57-4fda-a6f4-d6d65dd862d6", + "3c4d992c-817a-440d-b9af-5f2e6d974792", + "6473fd1f-e7f0-441f-a348-fb6392c4adc2", + "b7cba48e-7a6a-44f6-839a-f28de597cb47", + "fab90286-19cf-445a-b9e2-3c8782a7e55d", + "136b18f2-fb70-4654-8499-da3735531c04", + "d28a745f-776d-438e-bd15-255e96f6f106", + "b90b44b2-4ada-4761-a802-c198409da30e", + "1c51b1b7-0a18-4a34-baa1-f33c6dfa45a1", + "9a6a55f0-681f-4801-b182-d8a6e0806138", + "718db26a-9ed2-413e-b5d3-41995ccc56c5", + "217eb098-3a9b-49bd-8009-f9fad6ed8950", + "39989da1-5732-4549-8f8c-f09b368b606c", + "c710e945-d4a3-4c8b-8e02-bd4f437e44ab", + "8177ba9d-90f5-4767-a80c-7c8027105cd5", + "170d68c7-1ae2-4a46-9e78-4f23169f5076", + "cfb79fda-2ffc-43ca-92a3-ebcf1e8af35b", + "6934cb84-4506-4705-b516-80798e921184", + "204e8dbf-a15a-4c50-b6a3-5a5114c563ad", + "272f53af-4f7b-4a3c-96ef-032da6966e63", + "e5a8bbe4-3f81-418b-98a0-0ebaccb0c0df", + "b96ed613-c0cb-4bb3-8574-1f26515caf3d", + "720f6cee-7c95-47d9-8adb-3634af09f7d0", + "136ecc1d-804d-4acd-9458-c87004b99d3a" ], "orphaned_order_count": 38, "deleted_user_count": 38 diff --git a/demo/fixtures/schema_drift/manifest.json b/demo/fixtures/schema_drift/manifest.json index 283aaece7..86b5cc7fe 100644 --- a/demo/fixtures/schema_drift/manifest.json +++ b/demo/fixtures/schema_drift/manifest.json @@ -1,7 +1,7 @@ { "name": "schema_drift", "description": "New product import job inserts price as string with currency", - "created_at": "2026-02-01T22:23:32.857208Z", + "created_at": "2026-02-02T02:25:59.992159Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" diff --git a/demo/fixtures/volume_drop/manifest.json b/demo/fixtures/volume_drop/manifest.json index 93de0cf48..b03ebf300 100644 --- a/demo/fixtures/volume_drop/manifest.json +++ b/demo/fixtures/volume_drop/manifest.json @@ -1,7 +1,7 @@ { "name": "volume_drop", "description": "CDN misconfiguration blocked tracking pixel for EU users", - "created_at": "2026-02-01T22:23:32.823651Z", + "created_at": "2026-02-02T02:25:59.874196Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" diff --git a/docker-compose.yml b/docker-compose.yml index 3f50db7d7..e0f33baa3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -174,6 +174,10 @@ services: REDIS_HOST: redis REDIS_PORT: 6379 LLM_MODEL: ${LLM_MODEL:-claude-sonnet-4-20250514} + DATAING_REPO_ROOT: /repo + volumes: + # Docker socket for container introspection (assistant tools) + - /var/run/docker.sock:/var/run/docker.sock:ro depends_on: db-migrate: condition: service_completed_successfully diff --git a/frontend/app/src/App.tsx b/frontend/app/src/App.tsx index 1e52c9331..503661b29 100644 --- a/frontend/app/src/App.tsx +++ b/frontend/app/src/App.tsx @@ -35,6 +35,8 @@ import { UsagePage } from "@/features/usage/usage-page"; import { NotificationsPage } from "@/features/notifications"; import { AdminPage } from "@/features/admin"; import { IssueList, IssueCreate, IssueWorkspace } from "@/features/issues"; +import { AssistantWidget } from "@/features/assistant"; +import { PageContextProvider } from "@/lib/assistant/page-context"; import { JwtLoginPage } from "@/features/auth/jwt-login-page"; import { SSOLoginPage } from "@/features/auth/sso-login-page"; import { SSOCallbackPage } from "@/features/auth/sso-callback-page"; @@ -113,6 +115,7 @@ function AppWithEntitlements() { return ( + {/* Public routes */} } /> @@ -252,6 +255,8 @@ function AppWithEntitlements() { } /> + {/* Assistant chat widget - bottom-right above DemoToggle */} + {/* CRITICAL: DO NOT REMOVE - Demo toggles for testing */} {/* Bottom-right: Plan tiers (free/pro/enterprise) */} @@ -262,6 +267,7 @@ function AppWithEntitlements() { onClose={handleCloseUpgradeModal} /> + ); } diff --git a/frontend/app/src/components/error-boundary.tsx b/frontend/app/src/components/error-boundary.tsx index 51c43a14e..d36859b5c 100644 --- a/frontend/app/src/components/error-boundary.tsx +++ b/frontend/app/src/components/error-boundary.tsx @@ -2,6 +2,7 @@ import { Component, ReactNode } from "react"; import { Button } from "@/components/ui/Button"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/Card"; import { AlertTriangle } from "lucide-react"; +import { emitReactError } from "@/lib/assistant/error-bus"; interface ErrorBoundaryProps { children: ReactNode; @@ -29,6 +30,7 @@ export class ErrorBoundary extends Component< componentDidCatch(error: Error, errorInfo: React.ErrorInfo): void { console.error("Error caught by boundary:", error, errorInfo); + emitReactError(error, errorInfo.componentStack ?? undefined); this.props.onError?.(error, errorInfo); } diff --git a/frontend/app/src/features/assistant/AssistantMessage.tsx b/frontend/app/src/features/assistant/AssistantMessage.tsx new file mode 100644 index 000000000..4bdb4d74c --- /dev/null +++ b/frontend/app/src/features/assistant/AssistantMessage.tsx @@ -0,0 +1,132 @@ +/** + * Message component for assistant chat. + * + * Renders user and assistant messages with markdown support. + */ + +import { useState } from "react"; +import { User, Bot, Wrench, Loader2, ThumbsUp, Check } from "lucide-react"; +import { cn } from "@/lib/utils"; +import { Button } from "@/components/ui/Button"; +import { useSubmitFeedback } from "@/lib/api/investigation-feedback"; +import type { AssistantMessage as AssistantMessageType } from "./useAssistant"; + +interface AssistantMessageProps { + message: AssistantMessageType; + sessionInvestigationId?: string; +} + +export function AssistantMessage({ + message, + sessionInvestigationId, +}: AssistantMessageProps) { + const isUser = message.role === "user"; + const isAssistant = message.role === "assistant"; + const isTool = message.role === "tool"; + + const [isMarkedHelpful, setIsMarkedHelpful] = useState(false); + const submitFeedback = useSubmitFeedback(); + + const handleMarkHelpful = () => { + submitFeedback.mutate( + { + target_type: "assistant_message", + target_id: message.id, + investigation_id: sessionInvestigationId, + rating: 1, + }, + { + onSuccess: () => { + setIsMarkedHelpful(true); + }, + }, + ); + }; + + return ( +
+ {/* Avatar */} +
+ {isUser && } + {isAssistant && } + {isTool && } +
+ + {/* Content */} +
+ {/* Role label */} +
+ + {isUser && "You"} + {isAssistant && "Assistant"} + {isTool && "Tool"} + + {message.isStreaming && ( + + )} +
+ + {/* Message content */} +
+ {message.content || (message.isStreaming && "...")} +
+ + {/* Tool calls */} + {message.toolCalls && message.toolCalls.length > 0 && ( +
+ {message.toolCalls.map((tool, index) => ( +
+ + {tool.name} +
+ ))} +
+ )} + + {/* Mark as helpful button (assistant messages only, when not streaming) */} + {isAssistant && !message.isStreaming && message.content && ( +
+ {isMarkedHelpful ? ( + + + Marked as helpful + + ) : ( + + )} +
+ )} +
+
+ ); +} diff --git a/frontend/app/src/features/assistant/AssistantPanel.tsx b/frontend/app/src/features/assistant/AssistantPanel.tsx new file mode 100644 index 000000000..f96f90175 --- /dev/null +++ b/frontend/app/src/features/assistant/AssistantPanel.tsx @@ -0,0 +1,193 @@ +/** + * Chat panel component for the assistant widget. + * + * Contains message history, input field, and streaming indicators. + */ + +import { useState, useRef, useEffect } from "react"; +import { Send, Loader2, Plus, AlertCircle } from "lucide-react"; +import { Button } from "@/components/ui/Button"; +import { Textarea } from "@/components/ui/textarea"; +import { AssistantMessage } from "./AssistantMessage"; +import { useAssistant } from "./useAssistant"; + +// Example placeholder questions +const PLACEHOLDER_QUESTIONS = [ + "Why is my container unhealthy?", + "What caused the null spike in orders?", + "Show me recent errors in the logs", + "Explain the schema for customers table", +]; + +export function AssistantPanel() { + const [input, setInput] = useState(""); + const messagesEndRef = useRef(null); + const textareaRef = useRef(null); + + const { + messages, + session, + isLoading, + isStreaming, + error, + sendMessage, + createSession, + clearSession, + } = useAssistant({ + onError: (err) => console.error("Assistant error:", err), + }); + + // Auto-scroll to bottom on new messages + useEffect(() => { + messagesEndRef.current?.scrollIntoView({ behavior: "smooth" }); + }, [messages]); + + // Auto-create session if none exists + useEffect(() => { + if (!session && !isLoading) { + createSession(); + } + }, [session, isLoading, createSession]); + + const handleSubmit = async (e?: React.FormEvent) => { + e?.preventDefault(); + if (!input.trim() || isStreaming) return; + + const message = input; + setInput(""); + await sendMessage(message); + + // Reset textarea height + if (textareaRef.current) { + textareaRef.current.style.height = "auto"; + } + }; + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === "Enter" && !e.shiftKey) { + e.preventDefault(); + handleSubmit(); + } + }; + + // Auto-resize textarea + const handleInputChange = (e: React.ChangeEvent) => { + setInput(e.target.value); + e.target.style.height = "auto"; + e.target.style.height = `${Math.min(e.target.scrollHeight, 150)}px`; + }; + + const handleQuickQuestion = (question: string) => { + setInput(question); + textareaRef.current?.focus(); + }; + + return ( +
+ {/* Messages area */} +
+ {/* Empty state */} + {messages.length === 0 && !isLoading && ( +
+
+

How can I help you today?

+

+ Ask about infrastructure, data issues, or investigations. +

+
+ + {/* Quick questions */} +
+ {PLACEHOLDER_QUESTIONS.map((question, index) => ( + + ))} +
+
+ )} + + {/* Loading state */} + {isLoading && messages.length === 0 && ( +
+ +
+ )} + + {/* Messages */} + {messages.map((message) => ( + + ))} + + {/* Scroll anchor */} +
+
+ + {/* Error banner */} + {error && ( +
+ + {error} + +
+ )} + + {/* Input area */} +
+
+
+