Skip to content

Commit 2badd0e

Browse files
committed
fix(rules): encode status verification rules into parallel-orchestrator and taskMaestro
- Add systemPrompt with 7 status verification rules to parallel-orchestrator.json - Enhance taskMaestro status_check with 3-factor analysis (30-line error scan, active/completed spinner discrimination) - Add RESULT.json issue field validation to status_check - Add artifact cleanup (rm -f RESULT.json TASK.md) after worktree creation - Update assign_tasks worker prompt with git add safety and artifact commit ban - Add Status Verification Rules section to Important Notes Closes #888
1 parent 85cf088 commit 2badd0e

2 files changed

Lines changed: 117 additions & 9 deletions

File tree

.claude/skills/taskmaestro/SKILL.md

Lines changed: 75 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,9 @@ create_worktrees() {
440440
local wt_dir="${WORKTREE_BASE}/wt-${i}"
441441
local branch="taskmaestro/$(date +%s)/pane-${i}"
442442
git worktree add "$wt_dir" -b "$branch" master
443+
444+
# Artifact cleanup: remove ephemeral files that may exist from previous runs
445+
rm -f "${wt_dir}/RESULT.json" "${wt_dir}/TASK.md"
443446
done
444447
}
445448
```
@@ -508,7 +511,9 @@ assign_tasks() {
508511
for i in "${!issues[@]}"; do
509512
local pane="${SESSION}:0.${i}"
510513
local issue="${issues[$i]}"
511-
local prompt="AUTO: Issue #${issue} 구현"
514+
515+
# Worker prompt includes git add safety rules and artifact commit ban
516+
local prompt="Read the file TASK.md in this directory carefully and execute ALL instructions exactly as written. Follow codingbuddy PLAN→ACT→EVAL. Run 'yarn install' first if node_modules missing. NEVER use 'git add -A' — always stage specific files. If errors occur, diagnose and fix yourself. Use /ship to create PR, then write RESULT.json. Start now."
512517

513518
# Verify pane is ready before sending
514519
if ! tmux capture-pane -t "$pane" -p | grep -qE "$PROMPT_PATTERN"; then
@@ -617,25 +622,75 @@ status_check() {
617622

618623
for pane in $panes; do
619624
local target="${SESSION}:0.${pane}"
625+
local wt_dir="${WORKTREE_BASE}/wt-$((pane + 1))"
626+
local branch="none"
627+
if [ -d "$wt_dir" ]; then
628+
branch=$(git -C "$wt_dir" branch --show-current 2>/dev/null || echo "detached")
629+
fi
630+
631+
# --- 3-Factor Analysis (30-line scan) ---
620632
local content
621-
content=$(tmux capture-pane -t "$target" -p 2>/dev/null | tail -5)
633+
content=$(tmux capture-pane -t "$target" -p 2>/dev/null | tail -30)
634+
635+
# Factor 1: Error scan (30 lines, not 8)
636+
local has_error=false
637+
if echo "$content" | grep -qiE 'error|fail|exception|panic|FATAL'; then
638+
has_error=true
639+
fi
622640

641+
# Factor 2: Active spinner detection (animating characters)
642+
local has_active_spinner=false
643+
if echo "$content" | grep -qE '[⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏]'; then
644+
has_active_spinner=true
645+
fi
646+
647+
# Factor 3: Completed spinner detection (static ✓ or ✗)
648+
local has_completed_spinner=false
649+
if echo "$content" | grep -qE '[✓✗✔✘]'; then
650+
has_completed_spinner=true
651+
fi
652+
653+
# Determine state from 3 factors
623654
local state="unknown"
624655
if echo "$content" | grep -qE "$PROMPT_PATTERN"; then
625656
state="idle"
626-
elif echo "$content" | grep -qiE 'error|fail'; then
657+
elif [ "$has_error" = true ] && [ "$has_active_spinner" = false ]; then
627658
state="error"
628-
else
659+
elif [ "$has_error" = true ] && [ "$has_active_spinner" = true ]; then
660+
state="stuck (errors + thinking)"
661+
elif [ "$has_active_spinner" = true ]; then
629662
state="working"
663+
elif [ "$has_completed_spinner" = true ]; then
664+
state="step-complete"
665+
else
666+
state="unknown"
630667
fi
631668

632-
local wt_dir="${WORKTREE_BASE}/wt-$((pane + 1))"
633-
local branch="none"
634-
if [ -d "$wt_dir" ]; then
635-
branch=$(git -C "$wt_dir" branch --show-current 2>/dev/null || echo "detached")
669+
# --- RESULT.json Validation ---
670+
local result_status=""
671+
local result_file="${wt_dir}/RESULT.json"
672+
if [ -f "$result_file" ]; then
673+
local result_issue
674+
result_issue=$(node -e "try{console.log(JSON.parse(require('fs').readFileSync('${result_file}','utf8')).issue||'')}catch(e){console.log('')}" 2>/dev/null)
675+
676+
# Check if RESULT.json issue matches assigned task
677+
# Extract assigned issue from branch name (taskmaestro/<ts>/pane-N)
678+
local assigned_issue=""
679+
# If result_issue is empty or mismatched, flag as stale
680+
if [ -z "$result_issue" ]; then
681+
result_status="(no issue field)"
682+
else
683+
result_status="(issue: ${result_issue})"
684+
fi
685+
686+
# Auto-remove stale RESULT.json if status is not from current task
687+
# (conductor should verify issue match against Wave assignment)
636688
fi
637689

638-
echo "pane-${pane}: ${state} | branch: ${branch}"
690+
echo "pane-${pane}: ${state} | branch: ${branch} ${result_status}"
691+
if [ "$has_error" = true ]; then
692+
echo " ⚠ errors detected in 30-line scan"
693+
fi
639694
done
640695
}
641696
```
@@ -806,3 +861,14 @@ cleanup_all() {
806861
- **Conductor layout requires tmux ≥ 2.3** — uses `-f` flag for full-width `join-pane`
807862
- **After layout setup, conductor is the last pane** — pane indices shift during `swap-pane` + `break-pane` + `join-pane`
808863
- **Worker status colors are pane-local** — use `set_worker_status()` to update border colors per pane
864+
865+
## Status Verification Rules
866+
867+
- **RESULT.json is NOT the sole source of truth** — always validate the `issue` field matches the assigned task AND cross-verify with `capture-pane` output
868+
- **3-factor analysis for status** — every status check must evaluate: (1) error scan across 30 lines, (2) active spinner presence, (3) completed spinner presence
869+
- **Active vs Completed spinner discrimination** — animating characters (`⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏`) = active work; static characters (`✓✗✔✘`) = step complete. Never confuse them.
870+
- **"thinking" ≠ productive work** when errors are visible — if errors appear alongside thinking/reasoning indicators, the worker is stuck in a retry loop
871+
- **Stall detection** — duration >5 minutes on the same step with no token/cost change = STALLED. Intervene immediately.
872+
- **NEVER use `git add -A` or `git add .`** — always stage specific files by name. This applies to both the conductor and all worker prompts.
873+
- **RESULT.json and TASK.md must NEVER be committed** — these are ephemeral per-worktree artifacts. If found in staged changes, unstage immediately.
874+
- **Stale RESULT.json auto-removal** — if RESULT.json `issue` field does not match the currently assigned task, remove it (`rm -f RESULT.json`) before the worker starts.

packages/rules/.ai-rules/agents/parallel-orchestrator.json

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,5 +259,47 @@
259259
"ship_skill": ".claude/skills/ship/SKILL.md",
260260
"parallel_rules": ".ai-rules/rules/parallel-execution.md",
261261
"feedback_memory": "feedback_no_file_overlap_parallel.md"
262+
},
263+
"systemPrompt": {
264+
"status_verification": {
265+
"description": "Rules for verifying worker status during parallel execution",
266+
"rules": [
267+
{
268+
"id": "result_json_not_sole_truth",
269+
"rule": "RESULT.json is NOT the sole source of truth. Always validate the issue field matches the assigned task AND cross-verify with capture-pane output.",
270+
"rationale": "RESULT.json can be stale from a previous run or contain data from a different issue."
271+
},
272+
{
273+
"id": "spinner_discrimination",
274+
"rule": "Discriminate between Active spinner (⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏ animating) and Completed spinner (✓ or ✗ static). Active means work in progress; Completed means the step finished. Never confuse a completed spinner for active work.",
275+
"rationale": "Misreading a completed spinner as active leads to false 'working' status reports."
276+
},
277+
{
278+
"id": "error_scan_depth",
279+
"rule": "Error detection requires scanning at least 30 lines of capture-pane output, not just the last 8. Errors may appear above the visible fold.",
280+
"rationale": "Shallow scans miss errors that scrolled up but are still relevant to the current state."
281+
},
282+
{
283+
"id": "thinking_not_productive",
284+
"rule": "'thinking' or 'reasoning' indicators do NOT equal productive work when error messages are visible in the same capture-pane output. If errors are present alongside thinking indicators, the worker is likely stuck in a retry loop.",
285+
"rationale": "Workers can appear busy while repeatedly failing on the same error."
286+
},
287+
{
288+
"id": "stall_detection",
289+
"rule": "Duration >5 minutes on the same step with no token/cost change = STALLED. Intervene immediately — do not wait for the worker to self-recover.",
290+
"rationale": "Stalled workers waste time and pane resources. Early detection enables faster recovery."
291+
},
292+
{
293+
"id": "git_add_safety",
294+
"rule": "NEVER use `git add -A` or `git add .` — always stage specific files by name. This applies to both the conductor and all worker prompts.",
295+
"rationale": "Blanket staging captures RESULT.json, TASK.md, and other artifacts that must not be committed."
296+
},
297+
{
298+
"id": "artifact_commit_ban",
299+
"rule": "RESULT.json and TASK.md must NEVER be committed to the repository. These are ephemeral per-worktree artifacts. If found in staged changes, unstage them immediately.",
300+
"rationale": "These files are task-specific runtime artifacts that pollute the repository and cause merge conflicts."
301+
}
302+
]
303+
}
262304
}
263305
}

0 commit comments

Comments
 (0)