11"""
2- Generate a report of actual revert commits in pytorch/pytorch over a given period
3- and annotate whether each has a matching non-dry-run autorevert decision recorded
4- by misc.autorevert_events_v2.
2+ Generate a report of:
3+ 1) actual revert commits in pytorch/pytorch over a given period with a left join
4+ to autorevert decisions (default view), or
5+ 2) autorevert decisions (reverts) over a given period with a left join to actual
6+ revert commits (reverse view).
57
6- Columns :
8+ Default view columns (actual → autorevert) :
79- revert_time (UTC)
810- original_sha (the commit being reverted)
911- category (from -c flag in the bot command comment, else from message, else 'uncategorized')
1214- comment_url (link to the bot command comment if present)
1315- has_autorevert (yes/no) — whether misc.autorevert_events_v2 recorded a revert for original_sha
1416
17+ Reverse view columns (autorevert → actual):
18+ - ts (UTC)
19+ - sha
20+ - workflows
21+ - signal_keys
22+ - matching_actual_revert
23+
1524Usage examples:
1625- python -m pytorch_auto_revert.testers.actual_reverts_report --start "2025-09-16 22:18:51" --end "2025-09-24 00:00:00"
1726- python -m pytorch_auto_revert.testers.actual_reverts_report --start "2025-09-16 22:18:51" --format csv > reverts.csv
27+ - python -m pytorch_auto_revert.testers.actual_reverts_report --start "2025-09-16 22:18:51" --mode auto-to-actual
1828
1929This script uses the ClickHouse client configuration from environment variables
2030as done by the project entrypoint (CLICKHOUSE_HOST, CLICKHOUSE_PORT, CLICKHOUSE_USERNAME,
@@ -53,10 +63,10 @@ def setup_ch_from_env() -> None:
5363 CHCliFactory .setup_client (host , port , username , password , database )
5464
5565
56- def run_query (
66+ def run_query_actual_to_auto (
5767 start : datetime , end : datetime
5868) -> Tuple [List [str ], List [Tuple [Any , ...]]]:
59- """Run the ClickHouse query and return (headers, rows) ."""
69+ """Default view: actual reverts → left join autorevert decisions ."""
6070 client = CHCliFactory ().client
6171
6272 sql = """
@@ -158,11 +168,91 @@ def run_query(
158168 return headers , rows
159169
160170
171+ def run_query_auto_to_actual (
172+ start : datetime , end : datetime
173+ ) -> Tuple [List [str ], List [Tuple [Any , ...]]]:
174+ """Reverse view: autorevert decisions (reverts) → left join actual reverts."""
175+ client = CHCliFactory ().client
176+
177+ sql = """
178+ WITH
179+ toDateTime64({start:DateTime64(9)}, 9) AS start_ts,
180+ toDateTime64({end:DateTime64(9)}, 9) AS end_ts
181+
182+ -- A) Detect actual revert commits (bot-driven) within window
183+ , revert_by_sha AS (
184+ SELECT
185+ commit.id AS revert_sha,
186+ min(commit.timestamp) AS revert_time,
187+ anyHeavy(commit.message) AS message,
188+ regexpExtract(message, '(?s)This reverts commit ([0-9a-fA-F]{40})', 1) AS original_sha,
189+ toInt64OrNull(regexpExtract(message, '#issuecomment-(\\ d+)', 1)) AS comment_id,
190+ regexpExtract(message, '(?s)on behalf of https://github.com/([A-Za-z0-9-]+)', 1) AS command_author,
191+ regexpExtract(message,
192+ '(?s)\\ [comment\\ ]\\ ((https://github.com/pytorch/pytorch/pull/\\ d+#issuecomment-\\ d+)\\ )', 1
193+ ) AS comment_url
194+ FROM default.push
195+ ARRAY JOIN commits AS commit
196+ WHERE tupleElement(repository, 'full_name') = 'pytorch/pytorch'
197+ AND commit.timestamp >= start_ts AND commit.timestamp < end_ts
198+ AND match(commit.message, '(?s)This reverts commit [0-9a-fA-F]{40}')
199+ GROUP BY commit.id
200+ HAVING comment_id IS NOT NULL AND command_author != ''
201+ )
202+
203+ -- B) Map original_sha → earliest matching revert_sha within window
204+ , per_original AS (
205+ SELECT
206+ original_sha,
207+ argMin(revert_sha, revert_time) AS matching_revert_sha,
208+ argMin(comment_url, revert_time) AS matching_comment_url
209+ FROM revert_by_sha
210+ GROUP BY original_sha
211+ )
212+
213+ -- C) Autorevert decisions (non-dry-run) within window
214+ , auto AS (
215+ SELECT ts, commit_sha, workflows, source_signal_keys
216+ FROM misc.autorevert_events_v2
217+ WHERE repo = 'pytorch/pytorch'
218+ AND dry_run = 0 AND action = 'revert'
219+ AND ts >= start_ts AND ts < end_ts
220+ )
221+
222+ SELECT
223+ auto.ts AS ts,
224+ toString(auto.commit_sha) AS sha,
225+ auto.workflows AS workflows,
226+ auto.source_signal_keys AS signal_keys,
227+ per_original.matching_comment_url AS comment_url,
228+ toString(per_original.matching_revert_sha) AS revert_sha
229+ FROM auto
230+ LEFT JOIN per_original ON per_original.original_sha = auto.commit_sha
231+ ORDER BY ts
232+ """
233+
234+ res = client .query (sql , parameters = {"start" : start , "end" : end })
235+ headers = [
236+ "ts (utc)" ,
237+ "sha" ,
238+ "workflows" ,
239+ "signal keys" ,
240+ "comment_url" ,
241+ "revert sha" ,
242+ ]
243+ rows = [tuple (row ) for row in res .result_rows ]
244+ return headers , rows
245+
246+
161247def print_table (headers : List [str ], rows : List [Tuple [Any , ...]]) -> None :
162248 # Pretty print with simple width calculation and trimming long cells
163249 widths = [len (h ) for h in headers ]
164- # Cap for reason and URL columns to keep output readable
165- caps = {headers .index ("reason" ): 100 , headers .index ("comment_url" ): 120 }
250+ # Cap for long columns when present to keep output readable
251+ caps = {}
252+ if "reason" in headers :
253+ caps [headers .index ("reason" )] = 100
254+ if "comment_url" in headers :
255+ caps [headers .index ("comment_url" )] = 120
166256 for row in rows :
167257 for i , val in enumerate (row ):
168258 sval = "" if val is None else str (val )
@@ -196,12 +286,22 @@ def main() -> None:
196286 load_dotenv ()
197287
198288 ap = argparse .ArgumentParser (
199- description = "Build actual reverts table for a date range (UTC)"
289+ description = (
290+ "Report reverts vs autorevert decisions for a date range (UTC).\n "
291+ "Default view: actual reverts → left join autorevert decisions.\n "
292+ "Reverse view: autorevert decisions → left join actual reverts."
293+ )
200294 )
201295 ap .add_argument (
202296 "--start" , required = True , help = "Start time UTC (e.g. '2025-09-16 22:18:51')"
203297 )
204298 ap .add_argument ("--end" , default = None , help = "End time UTC (default: now)" )
299+ ap .add_argument (
300+ "--mode" ,
301+ choices = ["actual-to-auto" , "auto-to-actual" ],
302+ default = "actual-to-auto" ,
303+ help = "Which view to generate" ,
304+ )
205305 ap .add_argument (
206306 "--format" , choices = ["table" , "csv" ], default = "table" , help = "Output format"
207307 )
@@ -211,7 +311,10 @@ def main() -> None:
211311 end = parse_utc (args .end ) if args .end else datetime .now (timezone .utc )
212312
213313 setup_ch_from_env ()
214- headers , rows = run_query (start , end )
314+ if args .mode == "auto-to-actual" :
315+ headers , rows = run_query_auto_to_actual (start , end )
316+ else :
317+ headers , rows = run_query_actual_to_auto (start , end )
215318
216319 if args .format == "csv" :
217320 write_csv (headers , rows , fp = os .sys .stdout )
0 commit comments