Skip to content

Commit 4cd7eb9

Browse files
sjarmakclaude
andcommitted
feat: add SWEAP image migration script (jefzda → ghcr.io/sg-evals)
scripts/migrate_sweap_to_ghcr.py handles pull/retag/push of 11 images and updates 33 Dockerfiles. Requires manual GHCR auth first: docker login ghcr.io -u USER -p TOKEN python3 scripts/migrate_sweap_to_ghcr.py --push --update Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 4b457d6 commit 4cd7eb9

File tree

3 files changed

+141
-1
lines changed

3 files changed

+141
-1
lines changed

docs/ops/SCRIPT_INDEX.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ Generated from `scripts/registry.json` by `scripts/generate_script_index.py`.
173173

174174
- `scripts/migrate_dockerfiles_clone_as_claude.py` - Migration script for migrate dockerfiles clone as claude.
175175
- `scripts/migrate_dockerfiles_to_mirrors.py` - Migration script for migrate dockerfiles to mirrors.
176+
- `scripts/migrate_sweap_to_ghcr.py` - Migration script for migrate sweap to ghcr.
176177
- `scripts/migrate_to_sg_evals.sh` - Migration script for migrate to sg evals.
177178
- `scripts/migrate_to_sg_evals_batch2.sh` - Migration script for migrate to sg evals batch2.
178179
- `scripts/migrate_validation_result_sidecar.py` - Migration script for migrate validation result sidecar.

scripts/migrate_sweap_to_ghcr.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#!/usr/bin/env python3
2+
"""Migrate SWEAP images from jefzda/sweap-images to ghcr.io/sg-evals/sweap-images.
3+
4+
Two modes:
5+
--push Pull from jefzda, retag, push to GHCR, clean up (requires docker login to ghcr.io)
6+
--update Update Dockerfile FROM lines to use ghcr.io (no Docker required)
7+
--dry-run Show what would be done without making changes
8+
9+
Usage:
10+
# First, push images (requires GHCR write access):
11+
docker login ghcr.io -u USERNAME -p TOKEN
12+
python3 scripts/migrate_sweap_to_ghcr.py --push
13+
14+
# Then, update Dockerfiles:
15+
python3 scripts/migrate_sweap_to_ghcr.py --update
16+
17+
# Or do both:
18+
python3 scripts/migrate_sweap_to_ghcr.py --push --update
19+
"""
20+
21+
import argparse
22+
import glob
23+
import os
24+
import re
25+
import subprocess
26+
import sys
27+
28+
SRC_REGISTRY = "jefzda/sweap-images"
29+
DST_REGISTRY = "ghcr.io/sg-evals/sweap-images"
30+
31+
def find_sweap_references():
32+
"""Find all Dockerfiles referencing jefzda/sweap-images and extract tags."""
33+
tag_to_files = {}
34+
for f in sorted(glob.glob("benchmarks/csb_*/*/environment/Dockerfile*")):
35+
with open(f) as fh:
36+
for line in fh:
37+
m = re.match(r"FROM\s+(jefzda/sweap-images:(\S+))", line)
38+
if m:
39+
full_ref = m.group(1)
40+
tag = m.group(2)
41+
tag_to_files.setdefault(tag, []).append(f)
42+
return tag_to_files
43+
44+
45+
def push_images(tag_to_files, dry_run=False):
46+
"""Pull from jefzda, retag to GHCR, push, clean up. One at a time (disk-safe)."""
47+
tags = sorted(tag_to_files.keys())
48+
print(f"Migrating {len(tags)} images to {DST_REGISTRY}...\n")
49+
50+
failed = []
51+
for i, tag in enumerate(tags, 1):
52+
src = f"{SRC_REGISTRY}:{tag}"
53+
dst = f"{DST_REGISTRY}:{tag}"
54+
print(f"[{i}/{len(tags)}] {tag[:60]}...")
55+
56+
if dry_run:
57+
print(f" DRY RUN: would pull {src}, tag as {dst}, push, clean\n")
58+
continue
59+
60+
try:
61+
subprocess.run(["docker", "pull", src], check=True, capture_output=True, text=True)
62+
subprocess.run(["docker", "tag", src, dst], check=True, capture_output=True, text=True)
63+
subprocess.run(["docker", "push", dst], check=True, capture_output=True, text=True)
64+
# Clean up both to save disk
65+
subprocess.run(["docker", "rmi", src, dst], capture_output=True, text=True)
66+
print(f" OK\n")
67+
except subprocess.CalledProcessError as e:
68+
print(f" FAILED: {e.stderr.strip()}\n")
69+
failed.append(tag)
70+
71+
if failed:
72+
print(f"\n{len(failed)} images failed to migrate:")
73+
for t in failed:
74+
print(f" {t}")
75+
return False
76+
return True
77+
78+
79+
def update_dockerfiles(tag_to_files, dry_run=False):
80+
"""Replace jefzda/sweap-images with ghcr.io/sg-evals/sweap-images in all Dockerfiles."""
81+
total_files = 0
82+
for tag, files in sorted(tag_to_files.items()):
83+
for f in files:
84+
with open(f) as fh:
85+
content = fh.read()
86+
new_content = content.replace(SRC_REGISTRY, DST_REGISTRY)
87+
if new_content != content:
88+
if dry_run:
89+
print(f" DRY RUN: would update {f}")
90+
else:
91+
with open(f, "w") as fh:
92+
fh.write(new_content)
93+
total_files += 1
94+
95+
action = "Would update" if dry_run else "Updated"
96+
print(f"\n{action} {total_files} Dockerfiles ({SRC_REGISTRY}{DST_REGISTRY})")
97+
return total_files
98+
99+
100+
def main():
101+
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
102+
parser.add_argument("--push", action="store_true", help="Pull/retag/push images to GHCR")
103+
parser.add_argument("--update", action="store_true", help="Update Dockerfile FROM lines")
104+
parser.add_argument("--dry-run", action="store_true", help="Show what would be done")
105+
args = parser.parse_args()
106+
107+
if not args.push and not args.update:
108+
parser.print_help()
109+
sys.exit(1)
110+
111+
os.chdir(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
112+
tag_to_files = find_sweap_references()
113+
114+
if not tag_to_files:
115+
print("No jefzda/sweap-images references found. Migration may already be complete.")
116+
sys.exit(0)
117+
118+
print(f"Found {len(tag_to_files)} unique tags across {sum(len(v) for v in tag_to_files.values())} Dockerfiles\n")
119+
120+
if args.push:
121+
ok = push_images(tag_to_files, dry_run=args.dry_run)
122+
if not ok and not args.dry_run:
123+
print("\nSome pushes failed. Fix and rerun with --push before --update.")
124+
sys.exit(1)
125+
126+
if args.update:
127+
update_dockerfiles(tag_to_files, dry_run=args.dry_run)
128+
129+
130+
if __name__ == "__main__":
131+
main()

scripts/registry.json

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1098,6 +1098,14 @@
10981098
"language": "python",
10991099
"summary": "Data/run management script for migrate results."
11001100
},
1101+
{
1102+
"name": "migrate_sweap_to_ghcr.py",
1103+
"path": "scripts/migrate_sweap_to_ghcr.py",
1104+
"category": "migration",
1105+
"status": "maintained",
1106+
"language": "python",
1107+
"summary": "Migration script for migrate sweap to ghcr."
1108+
},
11011109
{
11021110
"name": "migrate_to_sg_evals.sh",
11031111
"path": "scripts/migrate_to_sg_evals.sh",
@@ -1882,7 +1890,7 @@
18821890
"generation": 11,
18831891
"infra_mirrors": 23,
18841892
"library_helpers": 7,
1885-
"migration": 5,
1893+
"migration": 6,
18861894
"misc": 104,
18871895
"qa_quality": 10,
18881896
"submission_reporting": 7,

0 commit comments

Comments
 (0)