Skip to content

Commit 985773f

Browse files
committed
[autorevert] enhance actual reverts report to support reverse view with autorevert decisions
1 parent ef4eba1 commit 985773f

File tree

1 file changed

+113
-10
lines changed

1 file changed

+113
-10
lines changed

aws/lambda/pytorch-auto-revert/pytorch_auto_revert/testers/actual_reverts_report.py

Lines changed: 113 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
"""
2-
Generate a report of actual revert commits in pytorch/pytorch over a given period
3-
and annotate whether each has a matching non-dry-run autorevert decision recorded
4-
by misc.autorevert_events_v2.
2+
Generate a report of:
3+
1) actual revert commits in pytorch/pytorch over a given period with a left join
4+
to autorevert decisions (default view), or
5+
2) autorevert decisions (reverts) over a given period with a left join to actual
6+
revert commits (reverse view).
57
6-
Columns:
8+
Default view columns (actual → autorevert):
79
- revert_time (UTC)
810
- original_sha (the commit being reverted)
911
- category (from -c flag in the bot command comment, else from message, else 'uncategorized')
@@ -12,9 +14,17 @@
1214
- comment_url (link to the bot command comment if present)
1315
- has_autorevert (yes/no) — whether misc.autorevert_events_v2 recorded a revert for original_sha
1416
17+
Reverse view columns (autorevert → actual):
18+
- ts (UTC)
19+
- sha
20+
- workflows
21+
- signal_keys
22+
- matching_actual_revert
23+
1524
Usage examples:
1625
- python -m pytorch_auto_revert.testers.actual_reverts_report --start "2025-09-16 22:18:51" --end "2025-09-24 00:00:00"
1726
- python -m pytorch_auto_revert.testers.actual_reverts_report --start "2025-09-16 22:18:51" --format csv > reverts.csv
27+
- python -m pytorch_auto_revert.testers.actual_reverts_report --start "2025-09-16 22:18:51" --mode auto-to-actual
1828
1929
This script uses the ClickHouse client configuration from environment variables
2030
as done by the project entrypoint (CLICKHOUSE_HOST, CLICKHOUSE_PORT, CLICKHOUSE_USERNAME,
@@ -53,10 +63,10 @@ def setup_ch_from_env() -> None:
5363
CHCliFactory.setup_client(host, port, username, password, database)
5464

5565

56-
def run_query(
66+
def run_query_actual_to_auto(
5767
start: datetime, end: datetime
5868
) -> Tuple[List[str], List[Tuple[Any, ...]]]:
59-
"""Run the ClickHouse query and return (headers, rows)."""
69+
"""Default view: actual reverts → left join autorevert decisions."""
6070
client = CHCliFactory().client
6171

6272
sql = """
@@ -158,11 +168,91 @@ def run_query(
158168
return headers, rows
159169

160170

171+
def run_query_auto_to_actual(
172+
start: datetime, end: datetime
173+
) -> Tuple[List[str], List[Tuple[Any, ...]]]:
174+
"""Reverse view: autorevert decisions (reverts) → left join actual reverts."""
175+
client = CHCliFactory().client
176+
177+
sql = """
178+
WITH
179+
toDateTime64({start:DateTime64(9)}, 9) AS start_ts,
180+
toDateTime64({end:DateTime64(9)}, 9) AS end_ts
181+
182+
-- A) Detect actual revert commits (bot-driven) within window
183+
, revert_by_sha AS (
184+
SELECT
185+
commit.id AS revert_sha,
186+
min(commit.timestamp) AS revert_time,
187+
anyHeavy(commit.message) AS message,
188+
regexpExtract(message, '(?s)This reverts commit ([0-9a-fA-F]{40})', 1) AS original_sha,
189+
toInt64OrNull(regexpExtract(message, '#issuecomment-(\\d+)', 1)) AS comment_id,
190+
regexpExtract(message, '(?s)on behalf of https://github.com/([A-Za-z0-9-]+)', 1) AS command_author,
191+
regexpExtract(message,
192+
'(?s)\\[comment\\]\\((https://github.com/pytorch/pytorch/pull/\\d+#issuecomment-\\d+)\\)', 1
193+
) AS comment_url
194+
FROM default.push
195+
ARRAY JOIN commits AS commit
196+
WHERE tupleElement(repository, 'full_name') = 'pytorch/pytorch'
197+
AND commit.timestamp >= start_ts AND commit.timestamp < end_ts
198+
AND match(commit.message, '(?s)This reverts commit [0-9a-fA-F]{40}')
199+
GROUP BY commit.id
200+
HAVING comment_id IS NOT NULL AND command_author != ''
201+
)
202+
203+
-- B) Map original_sha → earliest matching revert_sha within window
204+
, per_original AS (
205+
SELECT
206+
original_sha,
207+
argMin(revert_sha, revert_time) AS matching_revert_sha,
208+
argMin(comment_url, revert_time) AS matching_comment_url
209+
FROM revert_by_sha
210+
GROUP BY original_sha
211+
)
212+
213+
-- C) Autorevert decisions (non-dry-run) within window
214+
, auto AS (
215+
SELECT ts, commit_sha, workflows, source_signal_keys
216+
FROM misc.autorevert_events_v2
217+
WHERE repo = 'pytorch/pytorch'
218+
AND dry_run = 0 AND action = 'revert'
219+
AND ts >= start_ts AND ts < end_ts
220+
)
221+
222+
SELECT
223+
auto.ts AS ts,
224+
toString(auto.commit_sha) AS sha,
225+
auto.workflows AS workflows,
226+
auto.source_signal_keys AS signal_keys,
227+
per_original.matching_comment_url AS comment_url,
228+
toString(per_original.matching_revert_sha) AS revert_sha
229+
FROM auto
230+
LEFT JOIN per_original ON per_original.original_sha = auto.commit_sha
231+
ORDER BY ts
232+
"""
233+
234+
res = client.query(sql, parameters={"start": start, "end": end})
235+
headers = [
236+
"ts (utc)",
237+
"sha",
238+
"workflows",
239+
"signal keys",
240+
"comment_url",
241+
"revert sha",
242+
]
243+
rows = [tuple(row) for row in res.result_rows]
244+
return headers, rows
245+
246+
161247
def print_table(headers: List[str], rows: List[Tuple[Any, ...]]) -> None:
162248
# Pretty print with simple width calculation and trimming long cells
163249
widths = [len(h) for h in headers]
164-
# Cap for reason and URL columns to keep output readable
165-
caps = {headers.index("reason"): 100, headers.index("comment_url"): 120}
250+
# Cap for long columns when present to keep output readable
251+
caps = {}
252+
if "reason" in headers:
253+
caps[headers.index("reason")] = 100
254+
if "comment_url" in headers:
255+
caps[headers.index("comment_url")] = 120
166256
for row in rows:
167257
for i, val in enumerate(row):
168258
sval = "" if val is None else str(val)
@@ -196,12 +286,22 @@ def main() -> None:
196286
load_dotenv()
197287

198288
ap = argparse.ArgumentParser(
199-
description="Build actual reverts table for a date range (UTC)"
289+
description=(
290+
"Report reverts vs autorevert decisions for a date range (UTC).\n"
291+
"Default view: actual reverts → left join autorevert decisions.\n"
292+
"Reverse view: autorevert decisions → left join actual reverts."
293+
)
200294
)
201295
ap.add_argument(
202296
"--start", required=True, help="Start time UTC (e.g. '2025-09-16 22:18:51')"
203297
)
204298
ap.add_argument("--end", default=None, help="End time UTC (default: now)")
299+
ap.add_argument(
300+
"--mode",
301+
choices=["actual-to-auto", "auto-to-actual"],
302+
default="actual-to-auto",
303+
help="Which view to generate",
304+
)
205305
ap.add_argument(
206306
"--format", choices=["table", "csv"], default="table", help="Output format"
207307
)
@@ -211,7 +311,10 @@ def main() -> None:
211311
end = parse_utc(args.end) if args.end else datetime.now(timezone.utc)
212312

213313
setup_ch_from_env()
214-
headers, rows = run_query(start, end)
314+
if args.mode == "auto-to-actual":
315+
headers, rows = run_query_auto_to_actual(start, end)
316+
else:
317+
headers, rows = run_query_actual_to_auto(start, end)
215318

216319
if args.format == "csv":
217320
write_csv(headers, rows, fp=os.sys.stdout)

0 commit comments

Comments
 (0)