diff --git a/.claude/agents/reviewer.md b/.claude/agents/reviewer.md new file mode 100644 index 00000000..72cfe616 --- /dev/null +++ b/.claude/agents/reviewer.md @@ -0,0 +1,8 @@ +--- +name: reviewer +description: Read-only reviewer for code and architecture reviews. Cannot edit or write files. +tools: Read, Glob, Grep, Bash +--- + +You are a read-only reviewer. You can inspect code, run non-mutating +commands, and produce findings — but you must never modify files. diff --git a/.claude/skills/eval-skills/SKILL.md b/.claude/skills/eval-skills/SKILL.md index 3d7a0551..a04548cb 100644 --- a/.claude/skills/eval-skills/SKILL.md +++ b/.claude/skills/eval-skills/SKILL.md @@ -1,6 +1,7 @@ --- name: eval-skills description: Run structured agent-in-the-loop evaluations on skills to measure quality and track improvements. Use after modifying a SKILL.md, after changing `CLAUDE.md` or guidance docs that skills depend on, or periodically to benchmark skill quality and catch regressions. +argument-hint: "[skill-name ...] (e.g. local-code-review review-architecture)" --- # Eval Skills @@ -10,10 +11,17 @@ improvements across iterations. ## Steps -### 1. Pick the skill to evaluate +### 1. Determine which skills to evaluate -Identify which skill to eval. Each skill has test cases in -`.claude/skills//evals/evals.json`. +This skill accepts an optional list of skill names via `$ARGUMENTS`. + +- If skill names are provided (e.g. `/eval-skills local-code-review review-architecture`), + evaluate only those skills. +- If no arguments are provided, ask the user which skills they want to + evaluate. List all skills that have `evals/evals.json` files and let + the user pick. Accept "all" to evaluate every skill with evals. + +Each skill's test cases live in `.claude/skills//evals/evals.json`. ### 2. Create an iteration directory diff --git a/.claude/skills/local-code-review/SKILL.md b/.claude/skills/local-code-review/SKILL.md index 21bbf99c..696f6efd 100644 --- a/.claude/skills/local-code-review/SKILL.md +++ b/.claude/skills/local-code-review/SKILL.md @@ -1,18 +1,32 @@ --- name: local-code-review description: Review local code changes in Databao repositories before a commit or PR. Use when the user wants a review of staged or unstaged diffs, local branches, or pre-merge changes. Focus on correctness, regressions, missing tests, API/CLI behavior changes, executor or tooling changes, dependency or plugin-loading risks, and user-visible behavior changes. -compatibility: git must be installed and rg (ripgrep) is recommended. +argument-hint: "[scope: staged | branch | files:]" +context: fork +agent: reviewer --- # Local Code Review -This skill is for review, not implementation. Default to read-only inspection. Do not edit files unless the user explicitly switches from review to fixing issues. +You are reviewing code changes for the Databao CLI project. +You have NO prior context about why these changes were made — review +purely on merit. -If the user didn't request an explicit review, ask the user if a review is wanted before doing it. +## Scope + +Review scope: $ARGUMENTS + +If no scope was provided, default to `branch`. + +Accepted scopes: + +- `staged` — review only staged changes (`git diff --cached`) +- `branch` — review the branch diff against main (default) +- `files:` — review specific files or directories (e.g. `files:src/databao_cli/mcp/`) ## Review Goal -Find the highest-signal problems in the current local changes: +Find the highest-signal problems in the changes under review: - correctness bugs - regressions in user-visible behavior @@ -25,18 +39,21 @@ Find the highest-signal problems in the current local changes: Keep summaries brief. ## Steps + ### 1. Scope Discovery Start by identifying what changed: 1. Run `git status --short`. -2. Inspect both staged and unstaged changes with `git diff --stat` and `git diff --cached --stat`. +2. Inspect the relevant changes based on the scope you were given: + - **branch**: diff from the merge base with the main branch + - **staged**: `git diff --cached --stat` and `git diff --cached` + - **files**: read the specified files and their recent git history 3. Read the actual diffs for changed files before reading large surrounding files. -4. If the user asks for a branch review rather than just working tree changes, diff from the merge base with the main branch. Prefer `rg`, `git diff`, and targeted file reads over broad scans. -#### Databao Review Priorities +### Databao Review Priorities Pay extra attention to these repository-specific areas: @@ -133,7 +150,7 @@ Format the results using Markdown. ## Guardrails -- Do not rewrite the code during review mode. +- Include short code snippets to illustrate suggested fixes, but keep them conceptual — avoid pasting full rewrites or verbose replacement blocks. - Do not bury findings under a long summary. - Do not claim tests passed unless you ran them. - Do not over-index on style when behavior risks exist. diff --git a/.claude/skills/local-code-review/evals/evals.json b/.claude/skills/local-code-review/evals/evals.json index 93e9b75e..232c4057 100644 --- a/.claude/skills/local-code-review/evals/evals.json +++ b/.claude/skills/local-code-review/evals/evals.json @@ -3,7 +3,7 @@ "evals": [ { "id": 1, - "prompt": "Review the changes on my current branch before I open a PR. Keep this in review mode only and focus on correctness, regressions, and missing tests.", + "prompt": "/local-code-review branch", "expected_output": "Agent determines branch review scope from git, inspects the actual diff, and returns a findings-first review or an explicit no-findings verdict with file references and a testing note.", "assertions": [ "Agent determines scope from the current branch diff against the main branch or merge base", @@ -15,35 +15,36 @@ }, { "id": 2, - "prompt": "Please review my local changes before I commit. Do not fix anything. Focus on user-visible behavior, integration risks, and whether tests are missing.", - "expected_output": "Agent performs working-tree scope discovery from git, stays read-only, and returns severity-ordered findings tied to the local diff.", + "prompt": "/local-code-review staged", + "expected_output": "Agent performs staged-changes scope discovery from git, stays read-only, and returns severity-ordered findings tied to the staged diff.", "assertions": [ - "Agent runs `git status --short` to establish review scope", - "Agent inspects both `git diff --stat` and `git diff --cached --stat`", + "Agent inspects staged changes via git diff --cached", "Agent reads the diff for changed files before reading large surrounding files", "Output orders findings by severity and ties them to concrete behavior risks", - "Output includes open questions or assumptions" + "Output includes open questions or assumptions", + "Agent does NOT edit any files" ] }, { "id": 3, - "prompt": "I changed a few files in this repo. What do you think?", - "expected_output": "Agent asks whether the user wants an explicit code review before beginning repository inspection.", + "prompt": "/local-code-review files:src/databao_cli/mcp/", + "expected_output": "Agent reviews the specified files/directory, reading them and their recent git history.", "assertions": [ - "Agent asks a clarifying question about whether a review is wanted", - "Agent does NOT start with git inspection or a full review before that clarification", - "Agent keeps the response concise and framed as a clarification request" + "Agent focuses review on files under src/databao_cli/mcp/", + "Agent reads the actual file contents or diffs for the specified path", + "Output presents findings with file and line references", + "Agent does NOT edit any files" ] }, { "id": 4, - "prompt": "Review my current branch and run any safe validation that makes sense, but do not mutate files.", - "expected_output": "Agent chooses only non-mutating validation that exists in this repo, then reports findings and validation results.", + "prompt": "/local-code-review", + "expected_output": "Agent defaults to branch scope when no argument is provided and runs a branch review.", "assertions": [ + "Agent defaults to reviewing the branch diff against main", "Agent inspects the repo's available tooling before choosing validation commands", - "If validation is run, agent uses non-mutating commands such as targeted `uv run pytest` or check-only linters", - "Agent does NOT run mutating commands such as formatter fixes or broad wrapper commands without confirming they are non-mutating", - "Output reports what validation was run or why validation was skipped" + "If validation is run, agent uses non-mutating commands", + "Agent does NOT run mutating commands such as formatter fixes" ] } ] diff --git a/.claude/skills/review-architecture/SKILL.md b/.claude/skills/review-architecture/SKILL.md index b9320b9e..51fa4ad0 100755 --- a/.claude/skills/review-architecture/SKILL.md +++ b/.claude/skills/review-architecture/SKILL.md @@ -1,12 +1,28 @@ --- name: review-architecture description: Review architecture quality, maintainability, and developer experience before or after significant changes. Use when introducing a new CLI command or MCP tool, refactoring core module boundaries, diagnosing repeated dev friction, or preparing a PR with broad structural impact. +argument-hint: "[scope: branch | module: | full]" +context: fork +agent: reviewer --- # Review Architecture -Review architecture quality, maintainability, and developer experience (Dev UX) -before or after significant changes. +You are reviewing the architecture of the Databao CLI project. +You have NO prior context about why these changes were made — review +purely on merit. + +## Scope + +Review scope: $ARGUMENTS + +If no scope was provided, default to `branch`. + +Accepted scopes: + +- `branch` — review architecture of code changed on the current branch (default) +- `module:` — review a specific module (e.g. `module:src/databao_cli/mcp/`) +- `full` — review the full project architecture ## Primary sources of truth diff --git a/.claude/skills/review-architecture/evals/evals.json b/.claude/skills/review-architecture/evals/evals.json index 1071ddc2..d022bd5e 100644 --- a/.claude/skills/review-architecture/evals/evals.json +++ b/.claude/skills/review-architecture/evals/evals.json @@ -3,7 +3,7 @@ "evals": [ { "id": 1, - "prompt": "Review the architecture of the MCP server code.", + "prompt": "/review-architecture module:src/databao_cli/mcp/", "expected_output": "A structured report covering the MCP module with Current State, Risks/Gaps, Recommendations, Doc Sync, and Validation Plan.", "assertions": [ "Output contains a 'Current State' section with 3-6 bullets", @@ -15,14 +15,25 @@ }, { "id": 2, - "prompt": "I'm about to add a new CLI command for managing API keys. Review the architecture to see if there are concerns.", + "prompt": "/review-architecture module:src/databao_cli/commands/", "expected_output": "A focused review of the command structure and extension patterns.", "assertions": [ "Output examines src/databao_cli/commands/ structure", - "Output references the Click command registration pattern in __main__.py", - "Output provides actionable recommendations for the new command", + "Output references the Click command registration pattern", + "Output provides actionable recommendations", "Output separates immediate actions from longer-term improvements" ] + }, + { + "id": 3, + "prompt": "/review-architecture full", + "expected_output": "A comprehensive architecture review covering all major modules.", + "assertions": [ + "Output reviews multiple modules (commands, mcp, ui, project)", + "Output references primary sources of truth (docs/architecture.md, CLAUDE.md)", + "Output contains all five report sections (Current State, Risks/Gaps, Recommendations, Doc Sync, Validation Plan)", + "Agent does NOT edit any files" + ] } ] } diff --git a/CLAUDE.md b/CLAUDE.md index 669784f3..e566464a 100755 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -5,8 +5,12 @@ Claude Code entrypoint for agent instructions in this repository. ## Agent Delta (Claude Code) - Prefer concise updates with clear file/command references. -- Execute directly when safe; ask questions only if truly blocked. - YouTrack MCP must be configured (see DEVELOPMENT.md). Use get_issue / update_issue tools. +- **Pacing rule**: pause for user confirmation between phases (plan → + implement → validate → commit → PR). Present what you intend to do or + what you just did, then wait for a go-ahead before moving to the next + phase. Small, safe actions within a phase (running tests, reading files) + do not require a pause. ## References @@ -112,19 +116,21 @@ style issues; do not manually fix formatting. ## After Completing Work -1. **Implement** — write the code changes to satisfy the ticket - requirements. -2. **Write tests** — add or update unit tests covering the new behavior; - run `make test` to verify they pass. -3. **Test & lint** — run `make check` then `make test-cov-check`. Fix any +Each numbered step below is a **phase**. Present the outcome of each +phase and wait for user confirmation before starting the next one. + +1. **Plan** — outline the approach and list files you intend to change. +2. **Implement** — write the code changes to satisfy the ticket + requirements, including tests. Run `make test` to verify they pass. +3. **Validate** — run `make check` then `make test-cov-check`. Fix any failures before proceeding. -4. **Review the code** — review the code locally. Fix any high-severity - findings before proceeding. -5. **Architecture review** — review architecture quality of the changed - code. Fix any high-severity issues before proceeding. -6. **Branch** — use the `create-branch` skill. -7. **Commit & PR** — use the `create-pr` skill (stages, commits, pauses - for confirmation, pushes, and opens the PR). -8. **Update YouTrack** — move the ticket to **Review** state and add +4. **Review** — run `local-code-review` and `review-architecture` skills. + Both run in forked sub-agent context (no prior conversation state) + and can run **in parallel**. +5. **Branch & commit** — use the `create-branch` skill, then stage and + commit following **Commit Messages** conventions. +6. **PR** — use the `create-pr` skill (pushes and opens the PR). +7. **Update YouTrack** — move the ticket to **Review** state and add a comment with the PR URL. -9. Never commit directly to `main`. + +Never commit directly to `main`. diff --git a/tests/test_build.py b/tests/test_build.py index 3067e7b5..a624950c 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -8,8 +8,9 @@ def test_databao_build_duckdb_datasource(project_layout: ProjectLayout, tmp_path_factory: TempPathFactory) -> None: test_db = tmp_path_factory.mktemp("duckdb") / "test_db.duckdb" - duckdb.connect(str(test_db)) - duckdb.execute("CREATE TABLE t1 AS SELECT 1 AS i, 2 AS j;") + conn = duckdb.connect(str(test_db)) + conn.execute("CREATE TABLE t1 AS SELECT 1 AS i, 2 AS j;") + conn.close() dce_domain_manager = DatabaoContextDomainManager(domain_dir=project_layout.root_domain_dir) dce_domain_manager.create_datasource_config( diff --git a/tests/test_index.py b/tests/test_index.py index 2d2a3310..8399f0d5 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -8,8 +8,9 @@ def test_databao_index_duckdb_datasource(project_layout: ProjectLayout, tmp_path_factory: TempPathFactory) -> None: test_db = tmp_path_factory.mktemp("duckdb") / "test_db.duckdb" - duckdb.connect(str(test_db)) - duckdb.execute("CREATE TABLE t1 AS SELECT 1 AS i, 2 AS j;") + conn = duckdb.connect(str(test_db)) + conn.execute("CREATE TABLE t1 AS SELECT 1 AS i, 2 AS j;") + conn.close() dce_domain_manager = DatabaoContextDomainManager(domain_dir=project_layout.root_domain_dir) dce_domain_manager.create_datasource_config(