diff --git a/bugzooka/analysis/jsonparser.py b/bugzooka/analysis/jsonparser.py index cfea3cd..a5e5603 100644 --- a/bugzooka/analysis/jsonparser.py +++ b/bugzooka/analysis/jsonparser.py @@ -1,50 +1,61 @@ -import re -import json import logging logger = logging.getLogger(__name__) +SEPARATOR = "\u2500" * 60 -def extract_json_changepoints(json_data): + +def extract_json_changepoints(json_data, max_prs=None): """ - Extract changepoints from JSON changepoint summaries. + Extract changepoint summaries from JSON data. + + Each changepoint entry produces a multi-line block with version info, + regressed metrics, and the PRs introduced between nightlies. :param json_data: List of changepoint records - :return: list of changepoint strings + :param max_prs: Maximum PRs to display per changepoint (None = all) + :return: list of formatted changepoint summary strings (one per entry) """ - changepoints = [] - for entry in json_data: - if not entry.get("is_changepoint", False): - continue + cp_entries = [e for e in json_data if e.get("is_changepoint", False)] + total = len(cp_entries) - build_url = entry.get("buildUrl", "N/A") + changepoints = [] + for idx, entry in enumerate(cp_entries, 1): + github_ctx = entry.get("github_context", {}) + current_version = github_ctx.get( + "current_version", entry.get("ocpVersion", "unknown") + ) + previous_version = github_ctx.get("previous_version", "unknown") + prs = entry.get("prs", []) metrics = entry.get("metrics", {}) + regressed = [] for metric_name, metric_data in metrics.items(): percentage = metric_data.get("percentage_change", 0) - if percentage != 0: # only flag actual changepoints - label_string = metric_data.get("labels", "") - url = re.sub(r"X+-X+", "ocp-qe-perfscale", build_url.strip(), count=1) - changepoints.append( - f"{label_string} {metric_name} regression detection --- {percentage} % changepoint --- {url}" - ) - - return changepoints + if percentage != 0: + sign = "+" if percentage > 0 else "" + regressed.append(f"{metric_name}: {sign}{percentage:.2f}%") + if not regressed: + continue -def summarize_orion_json(json_path): - """ - Summarize a given json file. + regressed_summary = ", ".join(regressed) + lines = [ + f"{SEPARATOR}", + f" Changepoint {idx} of {total}: {regressed_summary}", + f"{SEPARATOR}", + f"Version: {current_version}", + f"Previous: {previous_version}", + ] + + if prs: + display_prs = prs[:max_prs] if max_prs is not None else prs + lines.append(f"\nPRs between nightlies ({len(prs)}):") + for pr in display_prs: + lines.append(f" {pr}") + if max_prs is not None and len(prs) > max_prs: + lines.append(f" ... and {len(prs) - max_prs} more") + + changepoints.append("\n".join(lines)) - :param json_path: json file path - :return: summary of the json file - """ - with open(json_path, "r") as f: - json_data = json.load(f) - summaries = [] - changepoints = extract_json_changepoints(json_data) - for entry in json_data: - if entry.get("is_changepoint", False): - for cp in changepoints: - summaries.append(f"\n--- Test Case: {cp} ---") - return "".join(summaries) + return changepoints diff --git a/bugzooka/analysis/log_analyzer.py b/bugzooka/analysis/log_analyzer.py index 48d67a0..fddcf95 100644 --- a/bugzooka/analysis/log_analyzer.py +++ b/bugzooka/analysis/log_analyzer.py @@ -25,7 +25,7 @@ from bugzooka.integrations import mcp_client as mcp_module from bugzooka.integrations.mcp_client import initialize_global_resources_async from bugzooka.core.config import get_prompt_config -from bugzooka.analysis.prow_analyzer import analyze_prow_artifacts +from bugzooka.analysis.prow_analyzer import analyze_prow_artifacts, ProwAnalysisResult from bugzooka.core.utils import extract_job_details logger = logging.getLogger(__name__) @@ -129,7 +129,14 @@ def download_and_analyze_logs(text): """Extract job details, download and analyze logs.""" job_url, job_name = extract_job_details(text) if job_url is None or job_name is None: - return None, None, None, None + return ProwAnalysisResult( + errors=None, + categorization_message=None, + requires_llm=None, + is_install_issue=None, + step_name=None, + full_errors_for_file=None, + ) directory_path = download_prow_logs(job_url) return analyze_prow_artifacts(directory_path, job_name) diff --git a/bugzooka/analysis/log_summarizer.py b/bugzooka/analysis/log_summarizer.py index 3f9747e..37f4327 100644 --- a/bugzooka/analysis/log_summarizer.py +++ b/bugzooka/analysis/log_summarizer.py @@ -152,6 +152,52 @@ def download_prow_logs(url, output_dir="/tmp/"): return log_dir +def construct_visualization_url(view_url, step_name): + """ + Build a gcsweb URL pointing to the step's artifacts directory. + + :param view_url: prow view URL + :param step_name: raw step name from junit_operator.xml + :return: gcsweb URL string, or None if the log folder cannot be resolved + """ + try: + gcs_path = view_url.split("view/gs/")[1] + base = "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/" + artifact_root = f"gs://{gcs_path}/artifacts/" + top_folders = list_gcs_files(artifact_root) + + # Find the folder that actually contains the step as a subfolder. + # The junit step_name often includes the log_folder as a prefix + # (e.g. "payload-control-plane-6nodes-openshift-qe-orion-udn-density") + # while the GCS folder is just "openshift-qe-orion-udn-density". + for entry in top_folders: + if not entry.rstrip().endswith("/"): + continue + folder = entry.strip("/").split("/")[-1] + # Try with prefix stripped first, then the raw step_name + candidates = [step_name] + prefix = folder + "-" + if step_name.startswith(prefix): + candidates.insert(0, step_name[len(prefix) :]) + for candidate in candidates: + step_artifacts = f"{artifact_root}{folder}/{candidate}/artifacts/" + try: + files = list_gcs_files(step_artifacts) + except Exception: + continue + artifacts_path = f"{gcs_path}/artifacts/{folder}/{candidate}/artifacts/" + html_files = [f for f in files if f.endswith(".html")] + if html_files: + html_name = html_files[0].strip("/").split("/")[-1] + return f"{base}{artifacts_path}{html_name}" + return f"{base}{artifacts_path}" + + return None + except Exception as e: + logger.error("Failed to construct visualization URL: %s", e) + return None + + def get_logjuicer_extract(directory_path, job_name): """Extracts erros using logjuicer using fallback mechanism. diff --git a/bugzooka/analysis/prow_analyzer.py b/bugzooka/analysis/prow_analyzer.py index 6d36ad3..f0c4e8f 100644 --- a/bugzooka/analysis/prow_analyzer.py +++ b/bugzooka/analysis/prow_analyzer.py @@ -4,15 +4,28 @@ import re from collections import deque from pathlib import Path +from typing import Optional, NamedTuple + from bugzooka.core.constants import BUILD_LOG_TAIL, MAINTENANCE_ISSUE from bugzooka.analysis.failure_keywords import FAILURE_KEYWORDS from bugzooka.analysis.log_summarizer import search_prow_errors from bugzooka.analysis.xmlparser import summarize_junit_operator_xml -from bugzooka.analysis.jsonparser import summarize_orion_json +from bugzooka.analysis.jsonparser import extract_json_changepoints logger = logging.getLogger(__name__) +class ProwAnalysisResult(NamedTuple): + """Result of analyzing prow artifacts for a failed job.""" + + errors: Optional[list] + categorization_message: Optional[str] + requires_llm: Optional[bool] + is_install_issue: Optional[bool] + step_name: Optional[str] + full_errors_for_file: Optional[list] + + def get_cluster_operator_errors(directory_path): """ Extracts errors from the clusteroperators.json. @@ -49,30 +62,71 @@ def scan_orion_jsons(directory_path): Extracts errors from orion jsons. :param directory_path: directory path for the artifacts - :return: list of errors + :return: tuple of (preview_results, full_results) where preview has + truncated PRs and full has all PRs """ base_dir = Path(f"{directory_path}/orion") json_files = base_dir.glob("*.json") + preview_results = [] + full_results = [] for json_file in json_files: - json_content = summarize_orion_json(json_file) - if json_content != "": - return [json_content] - return [] + try: + with open(json_file, "r") as f: + json_data = json.load(f) + if isinstance(json_data, list): + full = extract_json_changepoints(json_data) + preview = extract_json_changepoints(json_data, max_prs=5) + full_results.extend(full) + preview_results.extend(preview) + except (json.JSONDecodeError, OSError) as e: + logger.warning("Failed to parse orion JSON '%s': %s", json_file, e) + return preview_results, full_results + + +def _trim_job_prefix(step_name, job_name): + """ + Remove the redundant job test-identifier prefix from the step name. + + The step name often starts with the job's test identifier (a suffix of the + job name). E.g. job "...-aws-4.22-nightly-x86-payload-control-plane-6nodes" + produces step "payload-control-plane-6nodes-openshift-qe-orion-udn-l3". + This function strips the overlapping prefix to yield "openshift-qe-orion-udn-l3". + + :param step_name: lowered step name + :param job_name: full job name + :return: trimmed step name + """ + if not job_name: + return step_name + job_parts = job_name.lower().split("-") + for i in range(len(job_parts)): + suffix = "-".join(job_parts[i:]) + if step_name.startswith(suffix + "-"): + trimmed = step_name[len(suffix) + 1 :] + if trimmed: + return trimmed + break + return step_name -def categorize_prow_failure(step_name, step_phase): +def categorize_prow_failure(step_name, step_phase, job_name=""): """ Categorize prow failures. :param step_name: step name :param step_phase: step phase + :param job_name: full job name used to strip redundant prefixes :return: categorized preview tag message """ - step_name = step_name.lower() + step_name = _trim_job_prefix(step_name.lower(), job_name) + step_name = re.sub(r"-?[Xx]{3,}-?", "-", step_name).strip("-") for keyword, (_, description) in FAILURE_KEYWORDS.items(): if keyword in step_name: - return f"{step_phase} phase: {description}" + short_name = step_name[step_name.index(keyword) :] + if len(short_name) > len(keyword) + 1: + return f"{step_phase} phase: {short_name} failure" + return f"{step_phase} phase: {step_name} failure" return f"{step_phase} phase: {step_name} step failure" @@ -83,7 +137,8 @@ def analyze_prow_artifacts(directory_path, job_name): :param directory_path: directory path for the artifacts :param job_name: job name to base line with - :return: tuple of (list of errors, categorization_message, requires_llm, is_install_issue) + :return: ProwAnalysisResult with errors, categorization, and optional + full_errors_for_file (untruncated PR data for file upload) """ step_summary = "" categorization_message = "" @@ -93,11 +148,15 @@ def analyze_prow_artifacts(directory_path, job_name): ) build_file_path = os.path.join(directory_path, "build-log.txt") if not os.path.isfile(build_file_path): - return ( - ["Prow maintanence issues, couldn't even find the build-log.txt file"], - MAINTENANCE_ISSUE, - False, - True, + return ProwAnalysisResult( + errors=[ + "Prow maintanence issues, couldn't even find the build-log.txt file" + ], + categorization_message=MAINTENANCE_ISSUE, + requires_llm=False, + is_install_issue=True, + step_name=None, + full_errors_for_file=None, ) with open(build_file_path, "r", errors="replace", encoding="utf-8") as f: matched_line = next( @@ -112,7 +171,14 @@ def analyze_prow_artifacts(directory_path, job_name): matched_line = ( "Couldn't identify the failure step, likely a maintanence issue" ) - return [matched_line], MAINTENANCE_ISSUE, False, True + return ProwAnalysisResult( + errors=[matched_line], + categorization_message=MAINTENANCE_ISSUE, + requires_llm=False, + is_install_issue=True, + step_name=None, + full_errors_for_file=None, + ) junit_operator_file_path = os.path.join(directory_path, "junit_operator.xml") # Defaults in case XML parsing yields no values step_phase, step_name, step_summary = None, None, "" @@ -130,45 +196,57 @@ def analyze_prow_artifacts(directory_path, job_name): e, ) if step_name and step_phase: - categorization_message = categorize_prow_failure(step_name, step_phase) + categorization_message = categorize_prow_failure( + step_name, step_phase, job_name + ) else: - categorization_message = categorize_prow_failure(matched_line, "unknown") + categorization_message = categorize_prow_failure( + matched_line, "unknown", job_name + ) step_summary = "" cluster_operators_file_path = os.path.join(directory_path, "clusteroperators.json") if not os.path.isfile(cluster_operators_file_path): with open(build_file_path, "r", errors="replace", encoding="utf-8") as f: build_log_content = list(deque(f, maxlen=BUILD_LOG_TAIL)) - return ( - [ + return ProwAnalysisResult( + errors=[ "\n Somehow couldn't find clusteroperators.json file", matched_line + "\n", (step_summary or "") + "\n".join(build_log_content), ], - categorization_message, - False, - False, + categorization_message=categorization_message, + requires_llm=False, + is_install_issue=False, + step_name=step_name, + full_errors_for_file=None, ) cluster_operator_errors = get_cluster_operator_errors(directory_path) if len(cluster_operator_errors) == 0: - orion_errors = scan_orion_jsons(directory_path) - if len(orion_errors) == 0: - return ( - [matched_line] + orion_preview, orion_full = scan_orion_jsons(directory_path) + if len(orion_preview) == 0: + return ProwAnalysisResult( + errors=[matched_line] + [step_summary or ""] + search_prow_errors(directory_path, job_name), - categorization_message, - True, - False, + categorization_message=categorization_message, + requires_llm=True, + is_install_issue=False, + step_name=step_name, + full_errors_for_file=None, ) - return ( - [matched_line + "\n"] + orion_errors, - categorization_message, - False, - False, + return ProwAnalysisResult( + errors=[matched_line + "\n"] + orion_preview, + categorization_message=categorization_message, + requires_llm=False, + is_install_issue=False, + step_name=step_name, + full_errors_for_file=[matched_line + "\n"] + orion_full, ) - return ( - [matched_line + "\n"] + cluster_operator_errors, - categorization_message, - False, - False, + return ProwAnalysisResult( + errors=[matched_line + "\n"] + cluster_operator_errors, + categorization_message=categorization_message, + requires_llm=False, + is_install_issue=False, + step_name=step_name, + full_errors_for_file=None, ) diff --git a/bugzooka/integrations/slack_fetcher.py b/bugzooka/integrations/slack_fetcher.py index e4b4bd1..6247821 100644 --- a/bugzooka/integrations/slack_fetcher.py +++ b/bugzooka/integrations/slack_fetcher.py @@ -1,4 +1,3 @@ -import io import time import re import os @@ -9,10 +8,7 @@ JEDI_BOT_SLACK_USER_ID, SUMMARY_LOOKBACK_SECONDS, ) -from bugzooka.core.constants import ( - MAX_CONTEXT_SIZE, - MAX_PREVIEW_CONTENT, -) +from bugzooka.core.constants import MAX_PREVIEW_CONTENT from bugzooka.analysis.log_analyzer import ( download_and_analyze_logs, filter_errors_with_llm, @@ -21,6 +17,7 @@ from bugzooka.analysis.log_summarizer import ( classify_failure_type, build_summary_sections, + construct_visualization_url, ) from bugzooka.analysis.prompts import RAG_AWARE_PROMPT from bugzooka.integrations.inference_client import ( @@ -203,42 +200,53 @@ def _filter_new_messages(self, messages): ) return new_messages + def _get_failure_desc(self, categorization_message): + """Extract the failure description from a categorization message for display.""" + display_tag = re.sub(r"openshift-qe[\s-]?", "", categorization_message) + parts = display_tag.split(" phase: ", 1) + return parts[1].strip() if len(parts) == 2 else display_tag + def _send_error_logs_preview( - self, errors_list, categorization_message, max_ts, is_install_issue=False + self, + errors_list, + categorization_message, + max_ts, + is_install_issue=False, + full_errors_for_file=None, + viz_url=None, ): """Send error logs preview to Slack (either as message or file).""" - errors_log_preview = "\n".join(errors_list or [])[:MAX_PREVIEW_CONTENT] - errors_list_string = "\n".join(errors_list or [])[:MAX_CONTEXT_SIZE] - - if len(errors_list_string) > MAX_PREVIEW_CONTENT: - preview_message = ( - f":checking: *Error Logs Preview ({categorization_message})*\n" - "Here are the first few lines of the error log:\n" - f"```{errors_log_preview.strip()}```\n" - "_(Log preview truncated. Full log attached below.)_" - ) - self.logger.info("📤 Uploading full error log with preview message") - log_bytes = io.BytesIO(errors_list_string.strip().encode("utf-8")) - self.client.files_upload_v2( - channel=self.channel_id, - file=log_bytes, - filename="full_errors.log", - title="Full Error Log", - thread_ts=max_ts, - initial_comment=preview_message, - ) - else: - self.logger.info("📤 Trying to just send the preview message") - message_block = self.get_slack_message_blocks( - markdown_header=f":checking: *Error Logs Preview ({categorization_message})*\n", - content_text=f"{errors_log_preview.strip()}", - ) - self.client.chat_postMessage( - channel=self.channel_id, - text="Error Logs Preview", - blocks=message_block, - thread_ts=max_ts, - ) + errors_preview = "\n".join(errors_list or []) + is_changepoint = full_errors_for_file is not None + preview_limit = 2048 if is_changepoint else MAX_PREVIEW_CONTENT + errors_log_preview = errors_preview[:preview_limit] + # Use full untruncated content for file upload when available + errors_for_file = ( + "\n".join(full_errors_for_file) if full_errors_for_file else errors_preview + ) + failure_desc = self._get_failure_desc(categorization_message) + header_text = f":red_circle: *{failure_desc}* :red_circle:\n" + if viz_url: + header_text += f"<{viz_url}|View Changepoint Visualization>\n" + header_text += "\nError Logs Preview" + + needs_file = len(errors_for_file) > preview_limit + + # Always post the preview message first + message_block = self.get_slack_message_blocks( + markdown_header=f"{header_text}\n", + content_text=f"{errors_log_preview.strip()}", + ) + self.client.chat_postMessage( + channel=self.channel_id, + text="Error Logs Preview", + blocks=message_block, + thread_ts=max_ts, + ) + + # Return file content for the caller to upload at the right point + # in the thread (just before job history). + pending_file = errors_for_file.strip() if needs_file else None if is_install_issue: retrigger_message = ( @@ -256,6 +264,22 @@ def _send_error_logs_preview( thread_ts=max_ts, ) + return pending_file + + def _upload_full_error_log(self, content, max_ts): + """Upload full error log file to the thread.""" + self.logger.info("Uploading full error log file") + self.client.files_upload_v2( + channel=self.channel_id, + content=content, + filename="full_errors.txt", + title="Full Error Log", + thread_ts=max_ts, + ) + # Brief pause so the file upload completes before the next message, + # keeping correct ordering in the Slack thread. + time.sleep(1) + def _send_analysis_result(self, response, max_ts): """Send the final analysis result to Slack.""" message_block = self.get_slack_message_blocks( @@ -358,6 +382,8 @@ def _summarize_messages_in_range( categorization_message, _requires_llm, is_install_issue, + _step_name, + _full_errors, ) = analysis if errors_list is None: category = "unknown" @@ -451,16 +477,43 @@ def _process_message(self, msg, enable_inference): categorization_message, requires_llm, is_install_issue, + step_name, + full_errors_for_file, ) = download_and_analyze_logs(text) if errors_list is None: return ts - # Send error logs preview first - self._send_error_logs_preview( - errors_list, categorization_message, ts, is_install_issue + # For orion/changepoint failures, include visualization link in preview + viz_url = None + view_url = None + is_changepoint = "orion" in (categorization_message or "").lower() + if is_changepoint: + view_url, _ = extract_job_details(text) + if step_name and view_url: + viz_url = construct_visualization_url(view_url, step_name) + + pending_file = self._send_error_logs_preview( + errors_list, + categorization_message, + ts, + is_install_issue, + full_errors_for_file=full_errors_for_file, + viz_url=viz_url, ) - # Add job-history info in the thread after the preview + # Upload full error log just before job history + if pending_file: + if is_changepoint and view_url: + lines = pending_file.split("\n") + new_lines = [] + for line in lines: + new_lines.append(line) + if line.startswith("Previous:"): + new_lines.append(f"Build URL: {view_url}") + pending_file = "\n".join(new_lines) + self._upload_full_error_log(pending_file, ts) + + # Add job-history info in the thread after the full error log self._handle_job_history(thread_ts=ts, current_message=msg) if is_install_issue or not enable_inference: