Skip to content

Commit ef4eba1

Browse files
committed
Add script to generate report of actual revert commits with autorevert annotations
1 parent db4893e commit ef4eba1

File tree

1 file changed

+223
-0
lines changed

1 file changed

+223
-0
lines changed
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
"""
2+
Generate a report of actual revert commits in pytorch/pytorch over a given period
3+
and annotate whether each has a matching non-dry-run autorevert decision recorded
4+
by misc.autorevert_events_v2.
5+
6+
Columns:
7+
- revert_time (UTC)
8+
- original_sha (the commit being reverted)
9+
- category (from -c flag in the bot command comment, else from message, else 'uncategorized')
10+
- reason (short reason parsed from the revert commit message, if present)
11+
- author (GitHub login from the bot command 'on behalf of' attribution in the message)
12+
- comment_url (link to the bot command comment if present)
13+
- has_autorevert (yes/no) — whether misc.autorevert_events_v2 recorded a revert for original_sha
14+
15+
Usage examples:
16+
- python -m pytorch_auto_revert.testers.actual_reverts_report --start "2025-09-16 22:18:51" --end "2025-09-24 00:00:00"
17+
- python -m pytorch_auto_revert.testers.actual_reverts_report --start "2025-09-16 22:18:51" --format csv > reverts.csv
18+
19+
This script uses the ClickHouse client configuration from environment variables
20+
as done by the project entrypoint (CLICKHOUSE_HOST, CLICKHOUSE_PORT, CLICKHOUSE_USERNAME,
21+
CLICKHOUSE_PASSWORD, CLICKHOUSE_DATABASE). You may also use a .env file.
22+
"""
23+
24+
from __future__ import annotations
25+
26+
import argparse
27+
import csv
28+
import os
29+
from datetime import datetime, timezone
30+
from typing import Any, Iterable, List, Tuple
31+
32+
from dotenv import load_dotenv
33+
34+
from ..clickhouse_client_helper import CHCliFactory, ensure_utc_datetime
35+
36+
37+
def parse_utc(s: str) -> datetime:
38+
"""Parse a timestamp as UTC. Supports naive (assumed UTC) or TZ-aware strings."""
39+
# Allow common formats: ISO8601 or "YYYY-MM-DD HH:MM:SS"
40+
try:
41+
dt = datetime.fromisoformat(s)
42+
except ValueError:
43+
dt = datetime.strptime(s, "%Y-%m-%d %H:%M:%S")
44+
return ensure_utc_datetime(dt)
45+
46+
47+
def setup_ch_from_env() -> None:
48+
host = os.environ.get("CLICKHOUSE_HOST", "")
49+
port = int(os.environ.get("CLICKHOUSE_PORT", "8443"))
50+
username = os.environ.get("CLICKHOUSE_USERNAME", "")
51+
password = os.environ.get("CLICKHOUSE_PASSWORD", "")
52+
database = os.environ.get("CLICKHOUSE_DATABASE", "default")
53+
CHCliFactory.setup_client(host, port, username, password, database)
54+
55+
56+
def run_query(
57+
start: datetime, end: datetime
58+
) -> Tuple[List[str], List[Tuple[Any, ...]]]:
59+
"""Run the ClickHouse query and return (headers, rows)."""
60+
client = CHCliFactory().client
61+
62+
sql = """
63+
WITH
64+
toDateTime64({start:DateTime64(9)}, 9) AS start_ts,
65+
toDateTime64({end:DateTime64(9)}, 9) AS end_ts
66+
67+
-- 1) Per-revert-commit rows (only bot-driven commits with author + comment id)
68+
, revert_by_sha AS (
69+
SELECT
70+
commit.id AS revert_sha,
71+
min(commit.timestamp) AS revert_time,
72+
anyHeavy(commit.message) AS message,
73+
regexpExtract(message, '(?s)This reverts commit ([0-9a-fA-F]{40})', 1) AS original_sha,
74+
regexpExtract(message, '(?s)on behalf of https://github.com/([A-Za-z0-9-]+)', 1) AS command_author,
75+
nullIf(trim(BOTH ' ' FROM regexpExtract(message, '(?s)due to (.*?)(?: \\([[]comment|$)', 1)), '') AS reason,
76+
regexpExtract(message,
77+
'(?s)\\[comment\\]\\((https://github.com/pytorch/pytorch/pull/\\d+#issuecomment-\\d+)\\)', 1
78+
) AS comment_url,
79+
toInt64OrNull(regexpExtract(message, '#issuecomment-(\\d+)', 1)) AS comment_id,
80+
regexpExtract(message, '-c\\s+(\\w+)', 1) AS category_hint
81+
FROM default.push
82+
ARRAY JOIN commits AS commit
83+
WHERE tupleElement(repository, 'full_name') = 'pytorch/pytorch'
84+
AND commit.timestamp >= start_ts AND commit.timestamp < end_ts
85+
AND match(commit.message, '(?s)This reverts commit [0-9a-fA-F]{40}')
86+
GROUP BY commit.id
87+
HAVING comment_id IS NOT NULL AND command_author != ''
88+
)
89+
90+
-- 2) Join comment to get authoritative category from -c flag
91+
, revert_enriched AS (
92+
SELECT
93+
r.revert_sha,
94+
r.revert_time,
95+
r.original_sha,
96+
r.command_author,
97+
r.reason,
98+
r.comment_url,
99+
lowerUTF8(nullIf(ic.ic_body_category, '')) AS comment_category,
100+
lowerUTF8(nullIf(r.category_hint, '')) AS message_category
101+
FROM revert_by_sha AS r
102+
LEFT JOIN (
103+
SELECT id, regexpExtract(body, '-c\\s+(\\w+)', 1) AS ic_body_category
104+
FROM default.issue_comment
105+
) AS ic
106+
ON ic.id = r.comment_id
107+
)
108+
109+
-- 3) Aggregate to one row per original_sha (earliest revert attempt)
110+
, per_original AS (
111+
SELECT
112+
original_sha,
113+
argMin(tuple(
114+
revert_time,
115+
reason,
116+
command_author,
117+
comment_url,
118+
if(comment_category IN ('nosignal','ignoredsignal','landrace','weird','ghfirst'), comment_category,
119+
if(message_category IN ('nosignal','ignoredsignal','landrace','weird','ghfirst'), message_category,
120+
'uncategorized'))
121+
), revert_time) AS fields
122+
FROM revert_enriched
123+
GROUP BY original_sha
124+
)
125+
126+
-- 4) Autorevert decisions (non-dry-run) keyed by original sha
127+
, auto AS (
128+
SELECT commit_sha
129+
FROM misc.autorevert_events_v2
130+
WHERE dry_run = 0 AND action = 'revert'
131+
GROUP BY commit_sha
132+
)
133+
134+
SELECT
135+
tupleElement(fields, 1) AS revert_time,
136+
original_sha,
137+
tupleElement(fields, 5) AS category,
138+
tupleElement(fields, 2) AS reason,
139+
tupleElement(fields, 3) AS author,
140+
tupleElement(fields, 4) AS comment_url,
141+
if(auto.commit_sha != '', 'yes', 'no') AS has_autorevert
142+
FROM per_original
143+
LEFT JOIN auto ON auto.commit_sha = per_original.original_sha
144+
ORDER BY revert_time
145+
"""
146+
147+
res = client.query(sql, parameters={"start": start, "end": end})
148+
headers = [
149+
"revert_time",
150+
"original_sha",
151+
"category",
152+
"reason",
153+
"author",
154+
"comment_url",
155+
"has_autorevert",
156+
]
157+
rows = [tuple(row) for row in res.result_rows]
158+
return headers, rows
159+
160+
161+
def print_table(headers: List[str], rows: List[Tuple[Any, ...]]) -> None:
162+
# Pretty print with simple width calculation and trimming long cells
163+
widths = [len(h) for h in headers]
164+
# Cap for reason and URL columns to keep output readable
165+
caps = {headers.index("reason"): 100, headers.index("comment_url"): 120}
166+
for row in rows:
167+
for i, val in enumerate(row):
168+
sval = "" if val is None else str(val)
169+
if i in caps and len(sval) > caps[i]:
170+
sval = sval[: caps[i] - 1] + "…"
171+
widths[i] = max(widths[i], len(sval))
172+
173+
def fmt_row(vals: Iterable[Any]) -> str:
174+
parts: List[str] = []
175+
for i, v in enumerate(vals):
176+
sval = "" if v is None else str(v)
177+
if i in caps and len(sval) > caps[i]:
178+
sval = sval[: caps[i] - 1] + "…"
179+
parts.append(sval.ljust(widths[i]))
180+
return " | ".join(parts)
181+
182+
print(fmt_row(headers))
183+
print("-+-".join("-" * w for w in widths))
184+
for row in rows:
185+
print(fmt_row(row))
186+
187+
188+
def write_csv(headers: List[str], rows: List[Tuple[Any, ...]], fp) -> None:
189+
writer = csv.writer(fp)
190+
writer.writerow(headers)
191+
for row in rows:
192+
writer.writerow(row)
193+
194+
195+
def main() -> None:
196+
load_dotenv()
197+
198+
ap = argparse.ArgumentParser(
199+
description="Build actual reverts table for a date range (UTC)"
200+
)
201+
ap.add_argument(
202+
"--start", required=True, help="Start time UTC (e.g. '2025-09-16 22:18:51')"
203+
)
204+
ap.add_argument("--end", default=None, help="End time UTC (default: now)")
205+
ap.add_argument(
206+
"--format", choices=["table", "csv"], default="table", help="Output format"
207+
)
208+
args = ap.parse_args()
209+
210+
start = parse_utc(args.start)
211+
end = parse_utc(args.end) if args.end else datetime.now(timezone.utc)
212+
213+
setup_ch_from_env()
214+
headers, rows = run_query(start, end)
215+
216+
if args.format == "csv":
217+
write_csv(headers, rows, fp=os.sys.stdout)
218+
else:
219+
print_table(headers, rows)
220+
221+
222+
if __name__ == "__main__":
223+
main()

0 commit comments

Comments
 (0)