Skip to content

Commit 690ce26

Browse files
sjarmakclaude
andcommitted
fix: make RepoQA large-repo tasks runnable with proper Dockerfiles and verifiers
Previous commit created task scaffolding but with non-functional stub Dockerfiles (bare python:3.11-slim with no repo clone or MCP config). Fixes: - ccb_understand baseline Dockerfiles now use ccb-repo-* base images (k8s, kafka, rust, envoy, sklearn, pandas) or clone-as-claude for repos without base images (firefox, vscode, grafana) - Added Dockerfile.sg_only for all 14 ccb_understand tasks with SOURCEGRAPH_REPO_NAME, clone manifest, and sg_only_mode sentinel - Added 4 Dockerfile variants for all 14 ccx-onboard-search tasks (Dockerfile, Dockerfile.sg_only, Dockerfile.artifact_only, Dockerfile.artifact_baseline) - Added sgonly_verifier_wrapper.sh to all 28 tasks - Added set -eo pipefail to all test.sh (ABC audit O.d fix) - test.sh now sources sgonly_verifier_wrapper.sh for clone-at-verify - Filled in sg-evals mirror names for sklearn, pandas, vscode, grafana - Updated generator script with REPO_INFRA mapping and Dockerfile generation functions ABC audit: Grade A (was D), 0 critical failures (was 1). Ground truth: 14/14 functions verified against live Sourcegraph/GitHub. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent fe4bdec commit 690ce26

File tree

141 files changed

+7215
-304
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

141 files changed

+7215
-304
lines changed
Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
1-
FROM python:3.11-slim
2-
RUN apt-get update && apt-get install -y git curl && rm -rf /var/lib/apt/lists/*
1+
FROM ubuntu:22.04
2+
ENV DEBIAN_FRONTEND=noninteractive
3+
RUN apt-get update && apt-get install -y --no-install-recommends \
4+
git ca-certificates curl python3 python3-pip ripgrep && \
5+
rm -rf /var/lib/apt/lists/*
36
RUN pip install --no-cache-dir numpy
4-
WORKDIR /app
5-
RUN mkdir -p /logs/agent /logs/verifier
7+
RUN adduser --disabled-password --gecos '' claude 2>/dev/null || true
8+
RUN mkdir -p /workspace /app /logs/agent /logs/verifier && \
9+
chown -R claude:claude /workspace /app /logs
10+
USER claude
11+
WORKDIR /workspace
12+
RUN git config --global user.email "agent@example.com" && \
13+
git config --global user.name "Agent" && \
14+
git config --global safe.directory '*'
15+
RUN git clone --depth 1 https://github.com/sg-evals/kubernetes--8c9c67c0.git /workspace/repo && \
16+
mv /workspace/repo/* /workspace/repo/.* /workspace/ 2>/dev/null; \
17+
rm -rf /workspace/repo
18+
USER root
19+
ENTRYPOINT []
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
FROM ubuntu:22.04
2+
ENV DEBIAN_FRONTEND=noninteractive
3+
RUN apt-get update && apt-get install -y --no-install-recommends \
4+
git ca-certificates curl python3 python3-pip ripgrep && \
5+
rm -rf /var/lib/apt/lists/*
6+
RUN pip install --no-cache-dir numpy
7+
RUN adduser --disabled-password --gecos '' claude 2>/dev/null || true
8+
RUN mkdir -p /workspace /app /logs/agent /logs/verifier && \
9+
chown -R claude:claude /workspace /app /logs
10+
USER claude
11+
WORKDIR /workspace
12+
RUN git config --global user.email "agent@example.com" && \
13+
git config --global user.name "Agent" && \
14+
git config --global safe.directory '*'
15+
RUN git clone --depth 1 https://github.com/sg-evals/kubernetes--8c9c67c0.git /workspace/repo && \
16+
mv /workspace/repo/* /workspace/repo/.* /workspace/ 2>/dev/null; \
17+
rm -rf /workspace/repo
18+
USER root
19+
RUN touch /tmp/.artifact_only_mode && echo '/workspace' > /tmp/.artifact_only_workdir
20+
ENTRYPOINT []
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
FROM ubuntu:22.04
2+
ENV SOURCEGRAPH_REPO_NAME=sg-evals/kubernetes--8c9c67c0
3+
ENV DEBIAN_FRONTEND=noninteractive
4+
RUN apt-get update && apt-get install -y --no-install-recommends \
5+
git curl python3 python3-pip ca-certificates && \
6+
rm -rf /var/lib/apt/lists/*
7+
RUN pip install --no-cache-dir numpy
8+
WORKDIR /workspace
9+
RUN git init && git config user.email "agent@example.com" && git config user.name "Agent"
10+
RUN mkdir -p /app /logs/agent /logs/verifier
11+
RUN touch /tmp/.artifact_only_mode && echo '/workspace' > /tmp/.artifact_only_workdir
12+
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
13+
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true
14+
ENTRYPOINT []
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM ubuntu:22.04
2+
ENV SOURCEGRAPH_REPO_NAME=sg-evals/kubernetes--8c9c67c0
3+
ENV DEBIAN_FRONTEND=noninteractive
4+
RUN apt-get update && apt-get install -y --no-install-recommends \
5+
git curl python3 python3-pip ripgrep ca-certificates && \
6+
rm -rf /var/lib/apt/lists/*
7+
RUN pip install --no-cache-dir numpy
8+
WORKDIR /workspace
9+
RUN git init && git config user.email "agent@example.com" && git config user.name "Agent"
10+
RUN mkdir -p /app /logs/agent /logs/verifier
11+
RUN echo '{"workdir": "/workspace", "repos": [{"mirror": "sg-evals/kubernetes--8c9c67c0", "target_dir": "."}]}' > /tmp/.sg_only_clone_manifest.json
12+
RUN touch /tmp/.sg_only_mode
13+
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
14+
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true
15+
ENTRYPOINT []
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
#!/bin/bash
2+
# SG-only verifier wrapper: restore full repo + overlay agent changes
3+
#
4+
# Source this at the TOP of test.sh for build-requiring tasks that use
5+
# sg_only_env mode. It detects /tmp/.sg_only_mode and:
6+
#
7+
# PRIMARY PATH (clone manifest):
8+
# 1. Reads clone manifest from /tmp/.sg_only_clone_manifest.json
9+
# 2. Backs up agent-written files (non-empty, non-git, non-test)
10+
# 3. Clones each mirror repo with --depth 1
11+
# 4. Re-runs inject_defects.sh if specified in manifest
12+
# 5. Overlays agent changes on top
13+
#
14+
# LEGACY FALLBACK (pre-v2 images):
15+
# If manifest is missing but /repo_full/ exists, restores from /repo_full/
16+
# as before. This ensures unregenerated images still work during rollout.
17+
#
18+
# For non-sg_only runs, this script is a no-op.
19+
#
20+
# Usage in test.sh:
21+
# #!/bin/bash
22+
# # Source the sg_only wrapper (no-op if not in sg_only mode)
23+
# if [ -f /tests/sgonly_verifier_wrapper.sh ]; then
24+
# source /tests/sgonly_verifier_wrapper.sh
25+
# fi
26+
# # ... rest of test.sh as normal ...
27+
28+
if [ ! -f /tmp/.sg_only_mode ]; then
29+
# Not in sg_only mode — nothing to do
30+
return 0 2>/dev/null || exit 0
31+
fi
32+
33+
# Idempotency guard: skip if already sourced (avoids double-clone when
34+
# test.sh sources this wrapper and then eval.sh sources it again)
35+
if [ -n "${_SG_ONLY_RESTORED:-}" ]; then
36+
return 0 2>/dev/null || exit 0
37+
fi
38+
export _SG_ONLY_RESTORED=1
39+
40+
echo "[sg_only_verifier] Detected sg_only mode, restoring full repo..."
41+
42+
# ---------------------------------------------------------------------------
43+
# Helper: back up agent-written files from a directory
44+
# ---------------------------------------------------------------------------
45+
backup_agent_files() {
46+
local srcdir="$1"
47+
if [ ! -d "$srcdir" ]; then
48+
return
49+
fi
50+
cd "$srcdir"
51+
mkdir -p /tmp/agent_work
52+
find . -type f -size +0 \
53+
! -path './.git/*' \
54+
! -path './tests/*' \
55+
! -path './.claude/*' \
56+
-print0 | while IFS= read -r -d '' f; do
57+
mkdir -p "/tmp/agent_work/$(dirname "$f")"
58+
cp "$f" "/tmp/agent_work/$f"
59+
done
60+
echo "[sg_only_verifier] Backed up agent-written files from $srcdir"
61+
}
62+
63+
# ---------------------------------------------------------------------------
64+
# Helper: overlay agent-written files back onto a directory
65+
# ---------------------------------------------------------------------------
66+
overlay_agent_files() {
67+
local targetdir="$1"
68+
if [ ! -d /tmp/agent_work ]; then
69+
return
70+
fi
71+
cd /tmp/agent_work
72+
find . -type f -print0 | while IFS= read -r -d '' f; do
73+
local target="${targetdir}/${f#./}"
74+
mkdir -p "$(dirname "$target")"
75+
cp "$f" "$target"
76+
done
77+
echo "[sg_only_verifier] Overlaid agent changes onto $targetdir"
78+
}
79+
80+
# ---------------------------------------------------------------------------
81+
# PRIMARY PATH: clone manifest
82+
# ---------------------------------------------------------------------------
83+
MANIFEST="/tmp/.sg_only_clone_manifest.json"
84+
85+
if [ -f "$MANIFEST" ]; then
86+
echo "[sg_only_verifier] Found clone manifest, using clone-at-verify strategy"
87+
88+
# Parse manifest with python3 (always available in our images)
89+
WORKDIR=$(python3 -c "import json; m=json.load(open('$MANIFEST')); print(m.get('workdir', '/workspace'))")
90+
echo "[sg_only_verifier] Working directory: $WORKDIR"
91+
92+
# 1. Back up agent-written files
93+
backup_agent_files "$WORKDIR"
94+
95+
# 2. Clone each mirror repo
96+
REPO_COUNT=$(python3 -c "import json; m=json.load(open('$MANIFEST')); print(len(m.get('repos', [])))")
97+
for i in $(seq 0 $((REPO_COUNT - 1))); do
98+
MIRROR=$(python3 -c "import json; m=json.load(open('$MANIFEST')); print(m['repos'][$i]['mirror'])")
99+
TARGET_DIR=$(python3 -c "import json; m=json.load(open('$MANIFEST')); print(m['repos'][$i].get('target_dir', '.'))")
100+
CLONE_URL="https://github.com/${MIRROR}.git"
101+
102+
if [ "$TARGET_DIR" = "." ]; then
103+
CLONE_TARGET="$WORKDIR"
104+
else
105+
CLONE_TARGET="${WORKDIR}/${TARGET_DIR}"
106+
fi
107+
108+
echo "[sg_only_verifier] Cloning $MIRROR -> $CLONE_TARGET"
109+
110+
# Remove existing directory contents (truncated files) but preserve .git
111+
# for target_dir="." we need to be careful with the working directory
112+
if [ "$TARGET_DIR" = "." ]; then
113+
# For root workspace: remove everything except .git, then clone into temp and move
114+
TMPCLONE=$(mktemp -d)
115+
if git clone --depth 1 "$CLONE_URL" "$TMPCLONE" 2>/dev/null; then
116+
# Remove old files (except .git and tests)
117+
find "$CLONE_TARGET" -mindepth 1 -maxdepth 1 \
118+
! -name '.git' ! -name 'tests' ! -name '.claude' \
119+
-exec rm -rf {} + 2>/dev/null || true
120+
# Copy cloned files (except .git)
121+
cd "$TMPCLONE"
122+
find . -mindepth 1 -maxdepth 1 ! -name '.git' -exec cp -a {} "$CLONE_TARGET/" \;
123+
# If workspace has no HEAD (bare git init), use mirror .git
124+
# so that git diff HEAD works for diff-based verifiers.
125+
if ! git -C "$CLONE_TARGET" rev-parse HEAD >/dev/null 2>&1; then
126+
rm -rf "$CLONE_TARGET/.git"
127+
cp -a "$TMPCLONE/.git" "$CLONE_TARGET/.git"
128+
echo "[sg_only_verifier] Replaced empty .git with mirror .git for diff baseline"
129+
fi
130+
cd /
131+
rm -rf "$TMPCLONE"
132+
echo "[sg_only_verifier] Restored $MIRROR to $CLONE_TARGET"
133+
else
134+
echo "[sg_only_verifier] WARNING: Failed to clone $CLONE_URL"
135+
rm -rf "$TMPCLONE"
136+
fi
137+
else
138+
# For subdirectory: remove and re-clone
139+
rm -rf "$CLONE_TARGET"
140+
if git clone --depth 1 "$CLONE_URL" "$CLONE_TARGET" 2>/dev/null; then
141+
echo "[sg_only_verifier] Restored $MIRROR to $CLONE_TARGET"
142+
else
143+
echo "[sg_only_verifier] WARNING: Failed to clone $CLONE_URL"
144+
fi
145+
fi
146+
done
147+
148+
# 3. Re-run inject_defects if specified
149+
INJECT_SCRIPT=$(python3 -c "import json; m=json.load(open('$MANIFEST')); print(m.get('inject_defects', ''))")
150+
if [ -n "$INJECT_SCRIPT" ] && [ -f "$INJECT_SCRIPT" ]; then
151+
echo "[sg_only_verifier] Running defect injection: $INJECT_SCRIPT"
152+
cd "$WORKDIR"
153+
chmod +x "$INJECT_SCRIPT"
154+
bash "$INJECT_SCRIPT"
155+
echo "[sg_only_verifier] Defect injection complete"
156+
fi
157+
158+
# 4. Overlay agent changes
159+
overlay_agent_files "$WORKDIR"
160+
161+
# Return to working directory
162+
cd "$WORKDIR"
163+
echo "[sg_only_verifier] Clone-at-verify restore complete, proceeding with tests"
164+
165+
return 0 2>/dev/null || exit 0
166+
fi
167+
168+
# ---------------------------------------------------------------------------
169+
# LEGACY FALLBACK: /repo_full/ restore (for pre-v2 images)
170+
# ---------------------------------------------------------------------------
171+
echo "[sg_only_verifier] No clone manifest found, trying legacy /repo_full/ restore..."
172+
173+
# Read the working directory
174+
WORKDIR="$(cat /tmp/.sg_only_workdir 2>/dev/null || echo '/app')"
175+
echo "[sg_only_verifier] Working directory: $WORKDIR"
176+
177+
if [ ! -d /repo_full ]; then
178+
echo "[sg_only_verifier] WARNING: /repo_full not found, cannot restore"
179+
return 0 2>/dev/null || exit 0
180+
fi
181+
182+
# 1. Find files the agent wrote (non-empty, non-git, non-test files)
183+
backup_agent_files "$WORKDIR"
184+
185+
# 2. Restore full repo from backup
186+
rsync -a --delete /repo_full/ "$WORKDIR/"
187+
echo "[sg_only_verifier] Restored full repo from /repo_full/"
188+
189+
# 3. Overlay agent's changes
190+
overlay_agent_files "$WORKDIR"
191+
192+
# Return to working directory
193+
cd "$WORKDIR"
194+
echo "[sg_only_verifier] Legacy restore complete, proceeding with tests"

benchmarks/ccb_mcp_onboarding/ccx-onboard-search-201/tests/test.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
#!/bin/bash
2+
set -eo pipefail
23
# RepoQA SR-QA Verification Script
4+
5+
# Source the sg_only wrapper (no-op if not in sg_only mode)
6+
if [ -f /tests/sgonly_verifier_wrapper.sh ]; then
7+
source /tests/sgonly_verifier_wrapper.sh
8+
fi
9+
310
echo "Starting RepoQA verifier..." 1>&2
411
cd /app || { echo "ERROR: Cannot cd to /app"; exit 1; }
512
mkdir -p /logs/verifier
Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
1-
FROM python:3.11-slim
2-
RUN apt-get update && apt-get install -y git curl && rm -rf /var/lib/apt/lists/*
1+
FROM ubuntu:22.04
2+
ENV DEBIAN_FRONTEND=noninteractive
3+
RUN apt-get update && apt-get install -y --no-install-recommends \
4+
git ca-certificates curl python3 python3-pip ripgrep && \
5+
rm -rf /var/lib/apt/lists/*
36
RUN pip install --no-cache-dir numpy
4-
WORKDIR /app
5-
RUN mkdir -p /logs/agent /logs/verifier
7+
RUN adduser --disabled-password --gecos '' claude 2>/dev/null || true
8+
RUN mkdir -p /workspace /app /logs/agent /logs/verifier && \
9+
chown -R claude:claude /workspace /app /logs
10+
USER claude
11+
WORKDIR /workspace
12+
RUN git config --global user.email "agent@example.com" && \
13+
git config --global user.name "Agent" && \
14+
git config --global safe.directory '*'
15+
RUN git clone --depth 1 https://github.com/sg-evals/kubernetes--8c9c67c0.git /workspace/repo && \
16+
mv /workspace/repo/* /workspace/repo/.* /workspace/ 2>/dev/null; \
17+
rm -rf /workspace/repo
18+
USER root
19+
ENTRYPOINT []
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
FROM ubuntu:22.04
2+
ENV DEBIAN_FRONTEND=noninteractive
3+
RUN apt-get update && apt-get install -y --no-install-recommends \
4+
git ca-certificates curl python3 python3-pip ripgrep && \
5+
rm -rf /var/lib/apt/lists/*
6+
RUN pip install --no-cache-dir numpy
7+
RUN adduser --disabled-password --gecos '' claude 2>/dev/null || true
8+
RUN mkdir -p /workspace /app /logs/agent /logs/verifier && \
9+
chown -R claude:claude /workspace /app /logs
10+
USER claude
11+
WORKDIR /workspace
12+
RUN git config --global user.email "agent@example.com" && \
13+
git config --global user.name "Agent" && \
14+
git config --global safe.directory '*'
15+
RUN git clone --depth 1 https://github.com/sg-evals/kubernetes--8c9c67c0.git /workspace/repo && \
16+
mv /workspace/repo/* /workspace/repo/.* /workspace/ 2>/dev/null; \
17+
rm -rf /workspace/repo
18+
USER root
19+
RUN touch /tmp/.artifact_only_mode && echo '/workspace' > /tmp/.artifact_only_workdir
20+
ENTRYPOINT []
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
FROM ubuntu:22.04
2+
ENV SOURCEGRAPH_REPO_NAME=sg-evals/kubernetes--8c9c67c0
3+
ENV DEBIAN_FRONTEND=noninteractive
4+
RUN apt-get update && apt-get install -y --no-install-recommends \
5+
git curl python3 python3-pip ca-certificates && \
6+
rm -rf /var/lib/apt/lists/*
7+
RUN pip install --no-cache-dir numpy
8+
WORKDIR /workspace
9+
RUN git init && git config user.email "agent@example.com" && git config user.name "Agent"
10+
RUN mkdir -p /app /logs/agent /logs/verifier
11+
RUN touch /tmp/.artifact_only_mode && echo '/workspace' > /tmp/.artifact_only_workdir
12+
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
13+
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true
14+
ENTRYPOINT []
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM ubuntu:22.04
2+
ENV SOURCEGRAPH_REPO_NAME=sg-evals/kubernetes--8c9c67c0
3+
ENV DEBIAN_FRONTEND=noninteractive
4+
RUN apt-get update && apt-get install -y --no-install-recommends \
5+
git curl python3 python3-pip ripgrep ca-certificates && \
6+
rm -rf /var/lib/apt/lists/*
7+
RUN pip install --no-cache-dir numpy
8+
WORKDIR /workspace
9+
RUN git init && git config user.email "agent@example.com" && git config user.name "Agent"
10+
RUN mkdir -p /app /logs/agent /logs/verifier
11+
RUN echo '{"workdir": "/workspace", "repos": [{"mirror": "sg-evals/kubernetes--8c9c67c0", "target_dir": "."}]}' > /tmp/.sg_only_clone_manifest.json
12+
RUN touch /tmp/.sg_only_mode
13+
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
14+
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true
15+
ENTRYPOINT []

0 commit comments

Comments
 (0)