From e5dbe30a827344563612fd0f5c5a60340ae03d87 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Sun, 22 Mar 2026 18:04:02 +0100 Subject: [PATCH 01/30] initial implementation --- src/cloudai/reporter.py | 527 ++++++++++++++++++- src/cloudai/util/general-report.jinja2 | 214 ++++++++ src/cloudai/util/general-slurm-report.jinja2 | 214 ++++++++ tests/test_reporter.py | 208 +++++++- 4 files changed, 1134 insertions(+), 29 deletions(-) diff --git a/src/cloudai/reporter.py b/src/cloudai/reporter.py index 9f5b44110..514428d12 100644 --- a/src/cloudai/reporter.py +++ b/src/cloudai/reporter.py @@ -14,12 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import ast import contextlib +import io import logging import tarfile -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path -from typing import Optional +from typing import Any, Optional import jinja2 import toml @@ -32,6 +34,155 @@ from .core import CommandGenStrategy, Reporter, TestRun, case_name from .models.scenario import TestRunDetails from .systems.slurm import SlurmSystem, SlurmSystemMetadata +from .systems.slurm.slurm_metadata import SlurmJobMetadata + +GPU_HOURLY_COST_USD = { + "H100": 4.50, + "B200": 8.00, + "GB200": 10.00, + "GB300": 12.00, +} + + +def _safe_literal_eval(raw: Any, default: Any) -> Any: + if isinstance(raw, str): + with contextlib.suppress(SyntaxError, ValueError): + return ast.literal_eval(raw) + return default + + +def _format_scalar(value: Any) -> str: + if isinstance(value, float): + return f"{value:.4f}".rstrip("0").rstrip(".") + return str(value) + + +def _format_duration(seconds: float | None) -> str: + if seconds is None: + return "n/a" + + seconds = max(float(seconds), 0.0) + if seconds < 60: + return f"{seconds:.1f}s" + + minutes, sec = divmod(round(seconds), 60) + hours, minutes = divmod(minutes, 60) + parts = [] + if hours: + parts.append(f"{hours}h") + if minutes: + parts.append(f"{minutes}m") + if sec or not parts: + parts.append(f"{sec}s") + return " ".join(parts) + + +def _format_float(value: float | None, precision: int = 2) -> str: + if value is None: + return "n/a" + return f"{value:.{precision}f}" + + +def _format_percent(value: float | None) -> str: + if value is None: + return "n/a" + return f"{value:.2f}%" + + +def _format_money(value: float | None) -> str: + if value is None: + return "n/a" + return f"${value:,.2f}" + + +def _normalize_gpu_family(gpu_name: str | None) -> str | None: + if not gpu_name: + return None + + upper = gpu_name.upper() + for family in GPU_HOURLY_COST_USD: + if family in upper: + return family + return None + + +def _build_running_best(points: list[tuple[int, float]]) -> list[tuple[int, float]]: + running_best: list[tuple[int, float]] = [] + best = None + for step, reward in points: + best = reward if best is None else max(best, reward) + running_best.append((step, best)) + return running_best + + +def _chart_points(points: list[tuple[int, float]], width: int, height: int, padding: int) -> list[tuple[float, float]]: + if not points: + return [] + + x_vals = [step for step, _ in points] + y_vals = [reward for _, reward in points] + min_x, max_x = min(x_vals), max(x_vals) + min_y, max_y = min(y_vals), max(y_vals) + + x_span = max(max_x - min_x, 1) + y_span = max(max_y - min_y, 1e-9) + inner_width = width - 2 * padding + inner_height = height - 2 * padding + + result = [] + for step, reward in points: + x = padding + ((step - min_x) / x_span) * inner_width + y = height - padding - ((reward - min_y) / y_span) * inner_height + result.append((x, y)) + return result + + +def _polyline(points: list[tuple[float, float]]) -> str: + return " ".join(f"{x:.2f},{y:.2f}" for x, y in points) + + +def _build_reward_chart_svg(steps: list["DSEStepData"]) -> str | None: + if not steps: + return None + + width, height, padding = 720, 260, 34 + reward_points = [(step.step, step.reward) for step in steps] + running_best = _build_running_best(reward_points) + reward_coords = _chart_points(reward_points, width, height, padding) + best_coords = _chart_points(running_best, width, height, padding) + + reward_line = _polyline(reward_coords) + best_line = _polyline(best_coords) + y_vals = [reward for _, reward in reward_points] + y_min, y_max = min(y_vals), max(y_vals) + + circles = [] + for step_data, (x, y) in zip(steps, reward_coords, strict=True): + tooltip = ( + f"Step {step_data.step} | Reward: {_format_float(step_data.reward, 4)}" + f" | Observation: {step_data.observation_display}" + ) + circles.append(f'{tooltip}') + + return "\n".join( + [ + f'', + f'', + f'', + f'', + f'', + *circles, + f'Step', + f'Reward', + "", + ] + ) @dataclass @@ -100,6 +251,88 @@ def from_test_runs(cls, test_runs: list[TestRun], results_root: Path) -> list["S return report_items +@dataclass +class DSEStepData: + """DSE step data.""" + + step: int + reward: float + observation: list[Any] + observation_display: str + action: dict[str, Any] + elapsed_time_sec: float | None = None + is_successful: bool = False + + +@dataclass +class DSEParameterRow: + """DSE parameter row.""" + + name: str + values: list[str] + best_value: str + + +@dataclass +class DSESummary: + """DSE summary report.""" + + name: str + description: str + iteration: int + output_root: Path + output_root_rel_path: str + total_space: int + executed_steps: int + skipped_steps: int + coverage_percent: float | None + best_step: int | None + best_reward: float | None + best_observation_display: str + avg_step_duration_sec: float | None + total_runtime_sec: float | None + projected_runtime_sec: float | None + saved_runtime_sec: float | None + success_count: int + failure_count: int + gpu_arch_label: str | None + gpu_arch_family: str | None + gpus_per_node: int | None + num_nodes: int | None + total_gpu_hours: float | None + projected_gpu_hours: float | None + saved_gpu_hours: float | None + estimated_saved_cost_usd: float | None + best_config_rel_path: str | None + best_scenario_rel_path: str | None + best_scenario_toml: str | None + analysis_rel_path: str | None + parameter_rows: list[DSEParameterRow] = field(default_factory=list) + chart_svg: str | None = None + + @property + def display_name(self) -> str: + if self.iteration == 0: + return self.name + return f"{self.name} iter={self.iteration}" + + @property + def status_text(self) -> str: + if self.failure_count == 0: + return "PASSED" + if self.success_count == 0: + return "FAILED" + return "PARTIAL" + + @property + def status_style(self) -> str: + return { + "PASSED": "[green]PASSED[/green]", + "FAILED": "[red]FAILED[/red]", + "PARTIAL": "[yellow]PARTIAL[/yellow]", + }[self.status_text] + + class PerTestReporter(Reporter): """Generates reports per test using test-specific reporting strategies.""" @@ -125,6 +358,10 @@ def generate(self) -> None: class StatusReporter(Reporter): """Generates HTML status reports with system-specific templates.""" + def __init__(self, system, test_scenario, results_root, config): + super().__init__(system, test_scenario, results_root, config) + self.dse_summaries: list[DSESummary] = [] + @property def template_file_path(self) -> Path: return Path(__file__).parent / "util" @@ -138,10 +375,13 @@ def template_file(self) -> str: def best_dse_config_file_name(self, tr: TestRun) -> str: return f"{tr.name}.toml" + def best_dse_scenario_file_name(self, tr: TestRun) -> str: + return f"{tr.name}-best-in-scenario.toml" + def generate(self) -> None: self.load_test_runs() - self.generate_scenario_report() self.report_best_dse_config() + self.generate_scenario_report() self.print_summary() def generate_scenario_report(self) -> None: @@ -154,7 +394,15 @@ def generate_scenario_report(self) -> None: if isinstance(self.system, SlurmSystem) else ReportItem.from_test_runs(self.trs, self.results_root) ) - report = template.render(name=self.test_scenario.name, report_items=report_items) + report = template.render( + name=self.test_scenario.name, + report_items=report_items, + dse_summaries=self.dse_summaries, + format_duration=_format_duration, + format_float=_format_float, + format_percent=_format_percent, + format_money=_format_money, + ) report_path = self.results_root / f"{self.test_scenario.name}.html" with report_path.open("w") as f: f.write(report) @@ -162,26 +410,239 @@ def generate_scenario_report(self) -> None: logging.info(f"Generated scenario report at {report_path}") def report_best_dse_config(self): + self.dse_summaries = [] for tr in self.test_scenario.test_runs: - if not tr.test.is_dse_job: + if not tr.is_dse_job: continue - tr_root = self.results_root / tr.name / f"{tr.current_iteration}" - trajectory_file = tr_root / "trajectory.csv" - if not trajectory_file.exists(): - logging.warning(f"No trajectory file found for {tr.name} at {trajectory_file}") + self.dse_summaries.extend(self._build_dse_summaries(tr)) + + def _build_dse_summaries(self, original_tr: TestRun) -> list[DSESummary]: + summaries: list[DSESummary] = [] + tr_base_dir = self.results_root / original_tr.name + if not tr_base_dir.exists(): + return summaries + + grouped_trs: dict[int, list[TestRun]] = {} + for tr in self.trs: + if tr.name != original_tr.name: continue + grouped_trs.setdefault(tr.current_iteration, []).append(tr) + iteration_dirs = sorted((d for d in tr_base_dir.iterdir() if d.is_dir()), key=lambda p: int(p.name)) + for iter_dir in iteration_dirs: + iteration = int(iter_dir.name) + summary = self._build_dse_summary_for_iteration( + original_tr, iteration, iter_dir, grouped_trs.get(iteration, []) + ) + if summary is not None: + summaries.append(summary) + return summaries + + def _build_dse_summary_for_iteration( + self, original_tr: TestRun, iteration: int, iter_dir: Path, step_trs: list[TestRun] + ) -> DSESummary | None: + trajectory_file = iter_dir / "trajectory.csv" + if not trajectory_file.exists(): + logging.warning(f"No trajectory file found for {original_tr.name} at {trajectory_file}") + return None + + df = lazy.pd.read_csv(trajectory_file) + if df.empty: + return None + + steps_by_number = {tr.step: tr for tr in step_trs} + steps: list[DSEStepData] = [] + for row in df.to_dict(orient="records"): + step_no = int(row["step"]) + action = _safe_literal_eval(row.get("action"), {}) + if not isinstance(action, dict): + action = {} + observation = _safe_literal_eval(row.get("observation"), []) + if not isinstance(observation, list): + observation = [observation] + elapsed_time = self._step_elapsed_time(iter_dir / str(step_no)) + tr = steps_by_number.get(step_no) + is_successful = tr.test.was_run_successful(tr).is_successful if tr is not None else False + steps.append( + DSEStepData( + step=step_no, + reward=float(row["reward"]), + observation=observation, + observation_display=", ".join(_format_scalar(v) for v in observation) if observation else "n/a", + action=action, + elapsed_time_sec=elapsed_time, + is_successful=is_successful, + ) + ) + + if not steps: + return None + + steps.sort(key=lambda step: step.step) + best_step_data = max(steps, key=lambda step: step.reward) + best_step_dir = iter_dir / str(best_step_data.step) + best_step_details = best_step_dir / CommandGenStrategy.TEST_RUN_DUMP_FILE_NAME + if not best_step_details.exists(): + logging.warning(f"No test run dump found for best DSE step at {best_step_details}") + return None + + with best_step_details.open() as f: + trd = TestRunDetails.model_validate(toml.load(f)) + + best_config_path = iter_dir / self.best_dse_config_file_name(original_tr) + logging.info(f"Writing best config for {original_tr.name} to {best_config_path}") + with best_config_path.open("w") as f: + toml.dump(trd.test_definition.model_dump(), f) + + best_scenario_content = self._build_best_scenario_toml(original_tr, trd) + best_scenario_path = iter_dir / self.best_dse_scenario_file_name(original_tr) + with best_scenario_path.open("w") as f: + f.write(best_scenario_content) + + elapsed_times = [step.elapsed_time_sec for step in steps if step.elapsed_time_sec is not None] + avg_step_duration_sec = sum(elapsed_times) / len(elapsed_times) if elapsed_times else None + total_runtime_sec = sum(elapsed_times) if elapsed_times else None + total_space = len(original_tr.all_combinations) + executed_steps = len(steps) + skipped_steps = max(total_space - executed_steps, 0) + coverage_percent = (executed_steps / total_space * 100.0) if total_space else None + projected_runtime_sec = avg_step_duration_sec * total_space if avg_step_duration_sec is not None else None + saved_runtime_sec = ( + max(projected_runtime_sec - total_runtime_sec, 0.0) + if projected_runtime_sec is not None and total_runtime_sec is not None + else None + ) + + metadata = self._best_available_metadata(iter_dir, best_step_data.step) + gpu_arch_label = metadata.system.gpu_arch_type if metadata else None + gpu_arch_family = _normalize_gpu_family(gpu_arch_label) + num_nodes = trd.nnodes + gpus_per_node = getattr(self.system, "gpus_per_node", None) + total_gpu_hours = ( + (total_runtime_sec / 3600.0) * num_nodes * gpus_per_node + if total_runtime_sec is not None and gpus_per_node is not None + else None + ) + projected_gpu_hours = ( + (projected_runtime_sec / 3600.0) * num_nodes * gpus_per_node + if projected_runtime_sec is not None and gpus_per_node is not None + else None + ) + saved_gpu_hours = ( + max(projected_gpu_hours - total_gpu_hours, 0.0) + if projected_gpu_hours is not None and total_gpu_hours is not None + else None + ) + estimated_saved_cost_usd = ( + saved_gpu_hours * GPU_HOURLY_COST_USD[gpu_arch_family] + if saved_gpu_hours is not None and gpu_arch_family in GPU_HOURLY_COST_USD + else None + ) + + success_count = sum(1 for step in steps if step.is_successful) + failure_count = len(steps) - success_count + best_action = best_step_data.action + parameter_rows = [ + DSEParameterRow( + name=name, + values=[_format_scalar(value) for value in values], + best_value=_format_scalar(best_action.get(name, "n/a")), + ) + for name, values in original_tr.param_space.items() + ] + analysis_file = iter_dir / "analysis.csv" + + return DSESummary( + name=original_tr.name, + description=original_tr.test.description, + iteration=iteration, + output_root=iter_dir, + output_root_rel_path=f"./{iter_dir.relative_to(self.results_root)}", + total_space=total_space, + executed_steps=executed_steps, + skipped_steps=skipped_steps, + coverage_percent=coverage_percent, + best_step=best_step_data.step, + best_reward=best_step_data.reward, + best_observation_display=best_step_data.observation_display, + avg_step_duration_sec=avg_step_duration_sec, + total_runtime_sec=total_runtime_sec, + projected_runtime_sec=projected_runtime_sec, + saved_runtime_sec=saved_runtime_sec, + success_count=success_count, + failure_count=failure_count, + gpu_arch_label=gpu_arch_label, + gpu_arch_family=gpu_arch_family, + gpus_per_node=gpus_per_node, + num_nodes=num_nodes, + total_gpu_hours=total_gpu_hours, + projected_gpu_hours=projected_gpu_hours, + saved_gpu_hours=saved_gpu_hours, + estimated_saved_cost_usd=estimated_saved_cost_usd, + best_config_rel_path=f"./{best_config_path.relative_to(self.results_root)}", + best_scenario_rel_path=f"./{best_scenario_path.relative_to(self.results_root)}", + best_scenario_toml=best_scenario_content, + analysis_rel_path=f"./{analysis_file.relative_to(self.results_root)}" if analysis_file.exists() else None, + parameter_rows=parameter_rows, + chart_svg=_build_reward_chart_svg(steps), + ) - df = lazy.pd.read_csv(trajectory_file) - best_step = df.loc[df["reward"].idxmax()]["step"] - best_step_details = tr_root / f"{best_step}" / CommandGenStrategy.TEST_RUN_DUMP_FILE_NAME - with best_step_details.open() as f: - trd = TestRunDetails.model_validate(toml.load(f)) + def _build_best_scenario_toml(self, original_tr: TestRun, best_trd: TestRunDetails) -> str: + tdef = best_trd.test_definition.model_copy(deep=True) + tdef.agent = None + tdef.agent_steps = None + tdef.agent_reward_function = None + tdef.agent_config = None + tdef.agent_metrics = ["default"] + + test_block: dict[str, Any] = { + "id": original_tr.name, + "num_nodes": best_trd.nnodes, + "name": tdef.name, + "description": tdef.description, + "test_template_name": tdef.test_template_name, + "cmd_args": tdef.cmd_args.model_dump(by_alias=True), + } + if original_tr.time_limit: + test_block["time_limit"] = original_tr.time_limit + if original_tr.nodes: + test_block["nodes"] = original_tr.nodes + if original_tr.exclude_nodes: + test_block["exclude_nodes"] = original_tr.exclude_nodes + if tdef.extra_env_vars: + test_block["extra_env_vars"] = tdef.extra_env_vars + if tdef.extra_container_mounts: + test_block["extra_container_mounts"] = tdef.extra_container_mounts + if tdef.git_repos: + test_block["git_repos"] = [repo.model_dump() for repo in tdef.git_repos] + if tdef.nsys: + test_block["nsys"] = tdef.nsys.model_dump(exclude_unset=True) + if original_tr.extra_srun_args: + test_block["extra_srun_args"] = original_tr.extra_srun_args + + scenario_dict = { + "name": f"{best_trd.test_definition.name}_best_config", + "Tests": [test_block], + } + buffer = io.StringIO() + toml.dump(scenario_dict, buffer) + return buffer.getvalue() + + @staticmethod + def _step_elapsed_time(step_dir: Path) -> float | None: + slurm_job_path = step_dir / "slurm-job.toml" + if not slurm_job_path.exists(): + return None - best_config_path = tr_root / self.best_dse_config_file_name(tr) - logging.info(f"Writing best config for {tr.name} to {best_config_path}") - with best_config_path.open("w") as f: - toml.dump(trd.test_definition.model_dump(), f) + with slurm_job_path.open() as f: + metadata = SlurmJobMetadata.model_validate(toml.load(f)) + return float(metadata.elapsed_time_sec) + + def _best_available_metadata(self, iter_dir: Path, best_step: int) -> SlurmSystemMetadata | None: + if not isinstance(self.system, SlurmSystem): + return None + best_step_dir = iter_dir / str(best_step) + return SlurmReportItem.get_metadata(best_step_dir, self.results_root) def print_summary(self) -> None: if not self.trs: @@ -192,15 +653,27 @@ def print_summary(self) -> None: for col in ["Case", "Status", "Details"]: table.add_column(col, overflow="fold") - for tr in self.trs: - tr_status = tr.test.was_run_successful(tr) - sts_text = f"[bold]{'[green]PASSED[/green]' if tr_status.is_successful else '[red]FAILED[/red]'}[/bold]" - display_path = str(tr.output_path.absolute()) - with contextlib.suppress(ValueError): - display_path = str(tr.output_path.absolute().relative_to(Path.cwd())) - details_text = f"\n{tr_status.error_message}" if tr_status.error_message else "" - columns = [tr.name, sts_text, f"{display_path}{details_text}"] - table.add_row(*columns) + if self.dse_summaries: + for summary in self.dse_summaries: + details = [ + f"steps={summary.executed_steps}/{summary.total_space}", + f"best_step={summary.best_step}", + f"best_reward={_format_float(summary.best_reward, 4)}", + f"failures={summary.failure_count}", + ] + if summary.best_scenario_rel_path: + details.append(summary.best_scenario_rel_path) + table.add_row(summary.display_name, f"[bold]{summary.status_style}[/bold]", "\n".join(details)) + else: + for tr in self.trs: + tr_status = tr.test.was_run_successful(tr) + sts_text = f"[bold]{'[green]PASSED[/green]' if tr_status.is_successful else '[red]FAILED[/red]'}[/bold]" + display_path = str(tr.output_path.absolute()) + with contextlib.suppress(ValueError): + display_path = str(tr.output_path.absolute().relative_to(Path.cwd())) + details_text = f"\n{tr_status.error_message}" if tr_status.error_message else "" + columns = [tr.name, sts_text, f"{display_path}{details_text}"] + table.add_row(*columns) console = Console() with console.capture() as capture: diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index 696e6dcda..13585812c 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -1,6 +1,219 @@ {% extends "base-report.jinja2" %} +{% block extra_head %} + +{% endblock %} + {% block content %} +{% if dse_summaries %} + {% for summary in dse_summaries %} +
+

{{ summary.display_name }}

+

{{ summary.description }}

+ +
+
+
Space
+
{{ summary.total_space }}
+
+
+
Ran
+
{{ summary.executed_steps }}
+
+
+
Skipped
+
{{ summary.skipped_steps }}
+
+
+
Coverage
+
{{ format_percent(summary.coverage_percent) }}
+
+
+
Saved Time
+
{{ format_duration(summary.saved_runtime_sec) }}
+
+
+
Saved GPU-Hours
+
{{ format_float(summary.saved_gpu_hours, 2) }}
+
+
+
Estimated $ Saved
+
{{ format_money(summary.estimated_saved_cost_usd) }}
+
+
+ +
+

Execution Context

+
+
Status{{ summary.status_text }}
+
GPU Family{{ summary.gpu_arch_family or "unknown" }}
+
GPU Label{{ summary.gpu_arch_label or "unknown" }}
+
GPUs Per Node{{ summary.gpus_per_node or "unknown" }}
+
Nodes{{ summary.num_nodes or "unknown" }}
+
Step Success/Failure{{ summary.success_count }}/{{ summary.failure_count }}
+
Average Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
+
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
+
Projected Full-Space Runtime{{ format_duration(summary.projected_runtime_sec) }}
+
+
+ +
+
+
+

Best Step

+
+
Best Step{{ summary.best_step }}
+
Best Reward{{ format_float(summary.best_reward, 4) }}
+
Best Observation{{ summary.best_observation_display }}
+
Run Folderopen
+
+ + {% if summary.best_scenario_toml %} +
+ Show best scenario TOML +
{{ summary.best_scenario_toml }}
+
+ {% endif %} +
+ +
+

Exploration Space

+

Each row shows the allowed values for a swept parameter and the selected best value.

+ + + + + + + {% for row in summary.parameter_rows %} + + + + + + {% endfor %} +
ParameterAllowed ValuesBest
{{ row.name }}{{ row.values | join(", ") }}{{ row.best_value }}
+
+
+ +
+

Reward Over Steps

+

Blue shows observed reward per trial. Red dashed shows the best-so-far curve.

+ {% if summary.chart_svg %} + {{ summary.chart_svg | safe }} + {% else %} +

No reward data available.

+ {% endif %} +
+
+
+ {% endfor %} +{% else %} @@ -19,4 +232,5 @@ {% endfor %}
Test
+{% endif %} {% endblock %} diff --git a/src/cloudai/util/general-slurm-report.jinja2 b/src/cloudai/util/general-slurm-report.jinja2 index c37b0aa0f..9298be928 100644 --- a/src/cloudai/util/general-slurm-report.jinja2 +++ b/src/cloudai/util/general-slurm-report.jinja2 @@ -1,6 +1,219 @@ {% extends "base-report.jinja2" %} +{% block extra_head %} + +{% endblock %} + {% block content %} +{% if dse_summaries %} + {% for summary in dse_summaries %} +
+

{{ summary.display_name }}

+

{{ summary.description }}

+ +
+
+
Space
+
{{ summary.total_space }}
+
+
+
Ran
+
{{ summary.executed_steps }}
+
+
+
Skipped
+
{{ summary.skipped_steps }}
+
+
+
Coverage
+
{{ format_percent(summary.coverage_percent) }}
+
+
+
Saved Time
+
{{ format_duration(summary.saved_runtime_sec) }}
+
+
+
Saved GPU-Hours
+
{{ format_float(summary.saved_gpu_hours, 2) }}
+
+
+
Estimated $ Saved
+
{{ format_money(summary.estimated_saved_cost_usd) }}
+
+
+ +
+

Execution Context

+
+
Status{{ summary.status_text }}
+
GPU Family{{ summary.gpu_arch_family or "unknown" }}
+
GPU Label{{ summary.gpu_arch_label or "unknown" }}
+
GPUs Per Node{{ summary.gpus_per_node or "unknown" }}
+
Nodes{{ summary.num_nodes or "unknown" }}
+
Step Success/Failure{{ summary.success_count }}/{{ summary.failure_count }}
+
Average Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
+
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
+
Projected Full-Space Runtime{{ format_duration(summary.projected_runtime_sec) }}
+
+
+ +
+
+
+

Best Step

+
+
Best Step{{ summary.best_step }}
+
Best Reward{{ format_float(summary.best_reward, 4) }}
+
Best Observation{{ summary.best_observation_display }}
+
Run Folderopen
+
+ + {% if summary.best_scenario_toml %} +
+ Show best scenario TOML +
{{ summary.best_scenario_toml }}
+
+ {% endif %} +
+ +
+

Exploration Space

+

Each row shows the allowed values for a swept parameter and the selected best value.

+ + + + + + + {% for row in summary.parameter_rows %} + + + + + + {% endfor %} +
ParameterAllowed ValuesBest
{{ row.name }}{{ row.values | join(", ") }}{{ row.best_value }}
+
+
+ +
+

Reward Over Steps

+

Blue shows observed reward per trial. Red dashed shows the best-so-far curve.

+ {% if summary.chart_svg %} + {{ summary.chart_svg | safe }} + {% else %} +

No reward data available.

+ {% endif %} +
+
+
+ {% endfor %} +{% else %} @@ -21,4 +234,5 @@ {% endfor %}
Test
+{% endif %} {% endblock %} diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 547c588c7..50d815b3c 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -15,6 +15,7 @@ # limitations under the License. import copy +import csv import tarfile from pathlib import Path @@ -23,8 +24,8 @@ from cloudai import TestRun, TestScenario from cloudai.cli.handlers import generate_reports -from cloudai.core import Registry, Reporter, System -from cloudai.models.scenario import ReportConfig +from cloudai.core import CommandGenStrategy, Registry, Reporter, System +from cloudai.models.scenario import ReportConfig, TestRunDetails from cloudai.reporter import PerTestReporter, SlurmReportItem, StatusReporter, TarballReporter from cloudai.systems.slurm.slurm_metadata import ( MetadataCUDA, @@ -40,6 +41,97 @@ from cloudai.workloads.nccl_test import NCCLCmdArgs, NCCLTestDefinition +def _write_successful_nccl_stdout(step_dir: Path) -> None: + (step_dir / "stdout.txt").write_text("# Out of bounds values\n# Avg bus bandwidth\n") + + +def _write_slurm_job_metadata(step_dir: Path, elapsed_time_sec: int) -> None: + slurm_job = { + "job_id": 123456, + "name": "test-job", + "state": "COMPLETED", + "start_time": "2026-03-21T15:00:00", + "end_time": "2026-03-21T15:05:00", + "elapsed_time_sec": elapsed_time_sec, + "exit_code": "0:0", + "srun_cmd": "srun echo test", + "test_cmd": "echo test", + "is_single_sbatch": False, + "job_root": str(step_dir), + "job_steps": [], + } + with (step_dir / "slurm-job.toml").open("w") as f: + toml.dump(slurm_job, f) + + +def _write_step_metadata(step_dir: Path, metadata: SlurmSystemMetadata) -> None: + metadata_dir = step_dir / "metadata" + metadata_dir.mkdir(parents=True, exist_ok=True) + with (metadata_dir / "node-0.toml").open("w") as f: + toml.dump(metadata.model_dump(), f) + + +def _create_dse_report_fixture( + slurm_system: SlurmSystem, + slurm_metadata: SlurmSystemMetadata, + gpu_name: str = "NVIDIA H100 80GB HBM3", +) -> TestRun: + test_definition = NCCLTestDefinition( + name="dse-nccl", + description="DSE summary sample", + test_template_name="NcclTest", + cmd_args=NCCLCmdArgs( + docker_image_url="fake://url/nccl", + subtest_name="all_reduce_perf_mpi", + nthreads=[1, 2], + datatype=["float", "uint8"], + blocking=[0, 1], + ), + agent_steps=3, + ) + tr = TestRun( + name="dse-report", + test=test_definition, + num_nodes=2, + nodes=["node1", "node2"], + time_limit="00:05:00", + ) + iter_dir = slurm_system.output_path / tr.name / "0" + iter_dir.mkdir(parents=True, exist_ok=True) + + rows = [ + (1, {"nthreads": 1, "datatype": "float", "blocking": 0}, 1.5, [2.5], 10), + (2, {"nthreads": 2, "datatype": "uint8", "blocking": 1}, 3.0, [1.2], 20), + (3, {"nthreads": 2, "datatype": "float", "blocking": 1}, 2.0, [1.8], 30), + ] + + with (iter_dir / "trajectory.csv").open("w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["step", "action", "reward", "observation"]) + for step, action, reward, observation, _elapsed in rows: + writer.writerow([step, action, reward, observation]) + + for step, action, _reward, _observation, elapsed in rows: + step_dir = iter_dir / str(step) + step_dir.mkdir(parents=True, exist_ok=True) + step_tr = tr.apply_params_set(action) + step_tr.step = step + step_tr.output_path = step_dir + + with (step_dir / CommandGenStrategy.TEST_RUN_DUMP_FILE_NAME).open("w") as f: + toml.dump(TestRunDetails.from_test_run(step_tr, "", "").model_dump(), f) + + _write_successful_nccl_stdout(step_dir) + _write_slurm_job_metadata(step_dir, elapsed) + + metadata = slurm_metadata.model_copy(deep=True) + metadata.system.gpu_arch_type = gpu_name + _write_step_metadata(iter_dir / "2", metadata) + (iter_dir / "analysis.csv").write_text("parameter,sensitivity,importance\nblocking,0.5,0.8\n") + + return tr + + class TestLoadTestTuns: def test_load_test_runs_behcnmark_sorted(self, slurm_system: SlurmSystem, benchmark_tr: TestRun) -> None: reporter = PerTestReporter( @@ -303,3 +395,115 @@ def test_report_order() -> None: assert reports[0][0] == "per_test" assert reports[-2][0] == "status" assert reports[-1][0] == "tarball" + + +def test_dse_summary_and_best_scenario_artifacts( + slurm_system: SlurmSystem, slurm_metadata: SlurmSystemMetadata +) -> None: + dse_tr = _create_dse_report_fixture(slurm_system, slurm_metadata) + reporter = StatusReporter( + slurm_system, + TestScenario(name="dse_scenario", test_runs=[dse_tr]), + slurm_system.output_path, + ReportConfig(), + ) + + reporter.load_test_runs() + reporter.report_best_dse_config() + + assert len(reporter.dse_summaries) == 1 + summary = reporter.dse_summaries[0] + assert summary.total_space == 8 + assert summary.executed_steps == 3 + assert summary.skipped_steps == 5 + assert summary.coverage_percent == pytest.approx(37.5) + assert summary.best_step == 2 + assert summary.best_reward == pytest.approx(3.0) + assert summary.best_observation_display == "1.2" + assert summary.avg_step_duration_sec == pytest.approx(20.0) + assert summary.total_runtime_sec == pytest.approx(60.0) + assert summary.projected_runtime_sec == pytest.approx(160.0) + assert summary.saved_runtime_sec == pytest.approx(100.0) + assert summary.saved_gpu_hours == pytest.approx((100.0 / 3600.0) * 16) + assert summary.estimated_saved_cost_usd == pytest.approx((summary.saved_gpu_hours or 0) * 4.5) + assert summary.gpu_arch_family == "H100" + assert summary.analysis_rel_path is not None + + best_config_path = slurm_system.output_path / dse_tr.name / "0" / reporter.best_dse_config_file_name(dse_tr) + best_scenario_path = slurm_system.output_path / dse_tr.name / "0" / reporter.best_dse_scenario_file_name(dse_tr) + assert best_config_path.exists() + assert best_scenario_path.exists() + + old_best = toml.load(best_config_path) + assert old_best["agent_steps"] == 3 + + best_scenario = toml.load(best_scenario_path) + assert best_scenario["Tests"][0]["cmd_args"]["datatype"] == "uint8" + assert best_scenario["Tests"][0]["cmd_args"]["blocking"] == 1 + assert best_scenario["Tests"][0]["cmd_args"]["nthreads"] == 2 + assert best_scenario["Tests"][0]["num_nodes"] == 2 + assert "agent" not in best_scenario["Tests"][0] + assert "agent_steps" not in best_scenario["Tests"][0] + + +def test_dse_generate_scenario_report_renders_html( + slurm_system: SlurmSystem, slurm_metadata: SlurmSystemMetadata +) -> None: + dse_tr = _create_dse_report_fixture(slurm_system, slurm_metadata) + reporter = StatusReporter( + slurm_system, + TestScenario(name="dse_scenario", test_runs=[dse_tr]), + slurm_system.output_path, + ReportConfig(), + ) + + reporter.generate() + + report_path = slurm_system.output_path / "dse_scenario.html" + html = report_path.read_text() + assert "Saved GPU-Hours" in html + assert "Reward Over Steps" in html + assert "Best Scenario TOML" in html + assert "BO Analysis" in html + assert "dse-report-best-in-scenario.toml" in html + assert " None: + dse_tr = _create_dse_report_fixture(slurm_system, slurm_metadata) + reporter = StatusReporter( + slurm_system, + TestScenario(name="dse_scenario", test_runs=[dse_tr]), + slurm_system.output_path, + ReportConfig(), + ) + + reporter.load_test_runs() + reporter.report_best_dse_config() + with caplog.at_level("INFO"): + reporter.print_summary() + + assert "steps=3/8" in caplog.text + assert "best_step=2" in caplog.text + assert "dse-report-best-in-scenario.toml" in caplog.text + assert "step=1" not in caplog.text + + +def test_unknown_gpu_family_omits_estimated_cost( + slurm_system: SlurmSystem, slurm_metadata: SlurmSystemMetadata +) -> None: + dse_tr = _create_dse_report_fixture(slurm_system, slurm_metadata, gpu_name="Mystery GPU") + reporter = StatusReporter( + slurm_system, + TestScenario(name="dse_scenario", test_runs=[dse_tr]), + slurm_system.output_path, + ReportConfig(), + ) + + reporter.load_test_runs() + reporter.report_best_dse_config() + + assert reporter.dse_summaries[0].gpu_arch_family is None + assert reporter.dse_summaries[0].estimated_saved_cost_usd is None From 62fe3b87bb33f8fdece411e7972d45db9ec45a6c Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Sun, 22 Mar 2026 19:35:20 +0100 Subject: [PATCH 02/30] refactored ai code --- pyproject.toml | 4 +- src/cloudai/report_generator/status_report.py | 507 ++++++++++++++ src/cloudai/reporter.py | 652 ++---------------- src/cloudai/util/general-report.jinja2 | 15 +- src/cloudai/util/general-slurm-report.jinja2 | 238 ------- tests/test_reporter.py | 140 ++-- 6 files changed, 652 insertions(+), 904 deletions(-) create mode 100644 src/cloudai/report_generator/status_report.py delete mode 100644 src/cloudai/util/general-slurm-report.jinja2 diff --git a/pyproject.toml b/pyproject.toml index fa80670bb..4e14aa151 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -145,8 +145,8 @@ root_package = "cloudai" [[tool.importlinter.contracts]] name = "Report generator is leaf dependency" type = "forbidden" - forbidden_modules = ["cloudai.systems", "cloudai.workloads", "cloudai.cli"] - allow_indirect_imports = true # allow "from cloudai.core import ..." + forbidden_modules = ["cloudai.workloads", "cloudai.cli"] + allow_indirect_imports = true # allow "from cloudai.core import ..." source_modules = ["cloudai.report_generator"] [[tool.importlinter.contracts]] diff --git a/src/cloudai/report_generator/status_report.py b/src/cloudai/report_generator/status_report.py new file mode 100644 index 000000000..cfb732e38 --- /dev/null +++ b/src/cloudai/report_generator/status_report.py @@ -0,0 +1,507 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import ast +import contextlib +import logging +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Optional + +import toml + +from cloudai.core import CommandGenStrategy, TestRun, case_name +from cloudai.models.scenario import TestRunDetails +from cloudai.util.lazy_imports import lazy + +from ..systems.slurm import SlurmJobMetadata, SlurmSystemMetadata + +GPU_HOURLY_COST_USD = { + "H100": 4.50, + "B200": 8.00, + "GB200": 10.00, + "GB300": 12.00, +} + + +def format_duration(seconds: float | None) -> str: + if seconds is None: + return "n/a" + + seconds = max(float(seconds), 0.0) + if seconds < 60: + return f"{seconds:.1f}s" + + minutes, sec = divmod(round(seconds), 60) + hours, minutes = divmod(minutes, 60) + parts = [] + if hours: + parts.append(f"{hours}h") + if minutes: + parts.append(f"{minutes}m") + if sec or not parts: + parts.append(f"{sec}s") + return " ".join(parts) + + +def format_float(value: float | None, precision: int = 2) -> str: + if value is None: + return "n/a" + return f"{value:.{precision}f}" + + +def format_percent(value: float | None) -> str: + if value is None: + return "n/a" + return f"{value:.2f}%" + + +def format_money(value: float | None) -> str: + if value is None: + return "n/a" + return f"${value:,.2f}" + + +def _safe_literal_eval(raw: Any, default: Any) -> Any: + if isinstance(raw, str): + with contextlib.suppress(SyntaxError, ValueError): + return ast.literal_eval(raw) + return default + + +def _format_scalar(value: Any) -> str: + if isinstance(value, float): + return f"{value:.4f}".rstrip("0").rstrip(".") + return str(value) + + +def _normalize_gpu_family(gpu_name: str | None) -> str | None: + if not gpu_name: + return None + + upper = gpu_name.upper() + for family in GPU_HOURLY_COST_USD: + if family in upper: + return family + return None + + +def _build_running_best(points: list[tuple[int, float]]) -> list[tuple[int, float]]: + running_best: list[tuple[int, float]] = [] + best = None + for step, reward in points: + best = reward if best is None else max(best, reward) + running_best.append((step, best)) + return running_best + + +def _chart_points(points: list[tuple[int, float]], width: int, height: int, padding: int) -> list[tuple[float, float]]: + if not points: + return [] + + x_vals = [step for step, _ in points] + y_vals = [reward for _, reward in points] + min_x, max_x = min(x_vals), max(x_vals) + min_y, max_y = min(y_vals), max(y_vals) + + x_span = max(max_x - min_x, 1) + y_span = max(max_y - min_y, 1e-9) + inner_width = width - 2 * padding + inner_height = height - 2 * padding + + result = [] + for step, reward in points: + x = padding + ((step - min_x) / x_span) * inner_width + y = height - padding - ((reward - min_y) / y_span) * inner_height + result.append((x, y)) + return result + + +def _polyline(points: list[tuple[float, float]]) -> str: + return " ".join(f"{x:.2f},{y:.2f}" for x, y in points) + + +def _build_reward_chart_svg(steps: list["DSEStepData"]) -> str | None: + if not steps: + return None + + width, height, padding = 720, 260, 34 + reward_points = [(step.step, step.reward) for step in steps] + running_best = _build_running_best(reward_points) + reward_coords = _chart_points(reward_points, width, height, padding) + best_coords = _chart_points(running_best, width, height, padding) + + reward_line = _polyline(reward_coords) + best_line = _polyline(best_coords) + y_vals = [reward for _, reward in reward_points] + y_min, y_max = min(y_vals), max(y_vals) + + circles = [] + for step_data, (x, y) in zip(steps, reward_coords, strict=True): + tooltip = ( + f"Step {step_data.step} | Reward: {format_float(step_data.reward, 4)}" + f" | Observation: {step_data.observation_display}" + ) + circles.append(f'{tooltip}') + + return "\n".join( + [ + f'', + f'', + f'', + f'', + f'', + *circles, + f'Step', + f'Reward', + "", + ] + ) + + +def load_system_metadata(run_dir: Path, results_root: Path) -> SlurmSystemMetadata | None: + """Load system metadata from run_dir. At the moment it supports only Slurm.""" + metadata_path = run_dir / "metadata" + if not metadata_path.exists(): + logging.debug(f"No metadata folder found in {run_dir=}") + if not (results_root / "metadata").exists(): + logging.debug(f"No metadata folder found in {results_root=}") + return None + metadata_path = results_root / "metadata" + + node_files = list(metadata_path.glob("node-*.toml")) + if not node_files: + logging.debug(f"No node files found in {metadata_path}") + return None + + node_file = node_files[0] + with node_file.open() as f: + try: + return SlurmSystemMetadata.model_validate(toml.load(f)) + except Exception as e: + logging.debug(f"Error validating metadata for {node_file}: {e}") + + return None + + +@dataclass +class ReportItem: + """Basic report item for general systems.""" + + name: str + description: str + logs_path: Optional[str] = None + nodes: Optional[SlurmSystemMetadata] = None + + @classmethod + def from_test_runs(cls, test_runs: list[TestRun], results_root: Path) -> list["ReportItem"]: + report_items: list[ReportItem] = [] + for tr in test_runs: + report_items.append( + ReportItem( + name=case_name(tr), + description=tr.test.description, + logs_path=f"./{tr.output_path.relative_to(results_root)}" if tr.output_path.exists() else None, + nodes=load_system_metadata(tr.output_path, results_root), + ) + ) + return report_items + + +@dataclass +class DSEStepData: + """DSE step data.""" + + step: int + reward: float + observation: list[Any] + observation_display: str + action: dict[str, Any] + elapsed_time_sec: int | None = None + is_successful: bool = False + + +@dataclass +class DSEParameterRow: + """DSE parameter row.""" + + name: str + values: list[str] + best_value: str + + +@dataclass +class DSESummary: + """Summary of a DSE iteration.""" + + name: str + description: str + iteration: int + output_root: Path + output_root_rel_path: str + total_space: int + executed_steps: int + skipped_steps: int + coverage_percent: float | None + best_step: int | None + best_reward: float | None + best_observation_display: str + avg_step_duration_sec: float | None + total_runtime_sec: float | None + projected_runtime_sec: float | None + saved_runtime_sec: float | None + success_count: int + failure_count: int + gpu_arch_label: str | None + gpu_arch_family: str | None + gpus_per_node: int | None + num_nodes: int | None + total_gpu_hours: float | None + projected_gpu_hours: float | None + saved_gpu_hours: float | None + estimated_saved_cost_usd: float | None + best_config_rel_path: str | None + best_config_toml: str | None + analysis_rel_path: str | None + parameter_rows: list[DSEParameterRow] = field(default_factory=list) + chart_svg: str | None = None + + @property + def display_name(self) -> str: + if self.iteration == 0: + return self.name + return f"{self.name} iter={self.iteration}" + + @property + def status_text(self) -> str: + if self.failure_count == 0: + return "PASSED" + if self.success_count == 0: + return "FAILED" + return "PARTIAL" + + @property + def status_style(self) -> str: + return { + "PASSED": "[green]PASSED[/green]", + "FAILED": "[red]FAILED[/red]", + "PARTIAL": "[yellow]PARTIAL[/yellow]", + }[self.status_text] + + +class DSEReportBuilder: + """Build DSE summaries and best-config artifacts from generated results.""" + + def __init__(self, system: Any, results_root: Path, loaded_test_runs: list[TestRun]): + self.system = system + self.results_root = results_root + self.loaded_test_runs = loaded_test_runs + + @staticmethod + def best_config_file_name(tr: TestRun) -> str: + return f"{tr.name}.toml" + + def build(self, original_test_runs: list[TestRun]) -> list[DSESummary]: + summaries: list[DSESummary] = [] + for tr in original_test_runs: + if not tr.is_dse_job: + continue + summaries.extend(self._build_for_test_run(tr)) + return summaries + + def _build_for_test_run(self, original_tr: TestRun) -> list[DSESummary]: + summaries: list[DSESummary] = [] + tr_base_dir = self.results_root / original_tr.name + if not tr_base_dir.exists(): + return summaries + + grouped_trs: dict[int, list[TestRun]] = {} + for tr in self.loaded_test_runs: + if tr.name != original_tr.name: + continue + grouped_trs.setdefault(tr.current_iteration, []).append(tr) + + iteration_dirs = sorted((d for d in tr_base_dir.iterdir() if d.is_dir()), key=lambda p: int(p.name)) + for iter_dir in iteration_dirs: + iteration = int(iter_dir.name) + summary = self._build_iteration_summary(original_tr, iteration, iter_dir, grouped_trs.get(iteration, [])) + if summary is not None: + summaries.append(summary) + return summaries + + def _build_iteration_summary( + self, + original_tr: TestRun, + iteration: int, + iter_dir: Path, + step_trs: list[TestRun], + ) -> DSESummary | None: + trajectory_file = iter_dir / "trajectory.csv" + if not trajectory_file.exists(): + logging.warning(f"No trajectory file found for {original_tr.name} at {trajectory_file}") + return None + + df = lazy.pd.read_csv(trajectory_file) + if df.empty: + return None + + steps_by_number = {tr.step: tr for tr in step_trs} + steps: list[DSEStepData] = [] + for row in df.to_dict(orient="records"): + step_no = int(row["step"]) + action = _safe_literal_eval(row.get("action"), {}) + if not isinstance(action, dict): + action = {} + observation = _safe_literal_eval(row.get("observation"), []) + if not isinstance(observation, list): + observation = [observation] + tr = steps_by_number.get(step_no) + is_successful = tr.test.was_run_successful(tr).is_successful if tr is not None else False + steps.append( + DSEStepData( + step=step_no, + reward=float(row["reward"]), + observation=observation, + observation_display=", ".join(_format_scalar(v) for v in observation) if observation else "n/a", + action=action, + elapsed_time_sec=self._step_elapsed_time(iter_dir / str(step_no)), + is_successful=is_successful, + ) + ) + + if not steps: + return None + + steps.sort(key=lambda step: step.step) + best_step_data = max(steps, key=lambda step: step.reward) + best_step_dir = iter_dir / str(best_step_data.step) + best_step_details = best_step_dir / CommandGenStrategy.TEST_RUN_DUMP_FILE_NAME + if not best_step_details.exists(): + logging.warning(f"No test run dump found for best DSE step at {best_step_details}") + return None + + with best_step_details.open() as f: + trd = TestRunDetails.model_validate(toml.load(f)) + + best_config_path = iter_dir / self.best_config_file_name(original_tr) + with best_config_path.open("w") as f: + toml.dump(trd.test_definition.model_dump(), f) + best_config_toml = toml.dumps(trd.test_definition.model_dump()) + + elapsed_times = [step.elapsed_time_sec for step in steps if step.elapsed_time_sec is not None] + avg_step_duration_sec = sum(elapsed_times) / len(elapsed_times) if elapsed_times else None + total_runtime_sec = sum(elapsed_times) if elapsed_times else None + total_space = len(original_tr.all_combinations) + executed_steps = len(steps) + skipped_steps = max(total_space - executed_steps, 0) + coverage_percent = (executed_steps / total_space * 100.0) if total_space else None + projected_runtime_sec = avg_step_duration_sec * total_space if avg_step_duration_sec is not None else None + saved_runtime_sec = ( + max(projected_runtime_sec - total_runtime_sec, 0.0) + if projected_runtime_sec is not None and total_runtime_sec is not None + else None + ) + + metadata = load_system_metadata(iter_dir / str(best_step_data.step), self.results_root) + gpu_arch_label = metadata.system.gpu_arch_type if metadata else None + gpu_arch_family = _normalize_gpu_family(gpu_arch_label) + num_nodes = trd.nnodes + gpus_per_node = getattr(self.system, "gpus_per_node", None) + total_gpu_hours = ( + (total_runtime_sec / 3600.0) * num_nodes * gpus_per_node + if total_runtime_sec is not None and gpus_per_node is not None + else None + ) + projected_gpu_hours = ( + (projected_runtime_sec / 3600.0) * num_nodes * gpus_per_node + if projected_runtime_sec is not None and gpus_per_node is not None + else None + ) + saved_gpu_hours = ( + max(projected_gpu_hours - total_gpu_hours, 0.0) + if projected_gpu_hours is not None and total_gpu_hours is not None + else None + ) + estimated_saved_cost_usd = ( + saved_gpu_hours * GPU_HOURLY_COST_USD[gpu_arch_family] + if saved_gpu_hours is not None and gpu_arch_family in GPU_HOURLY_COST_USD + else None + ) + + success_count = sum(1 for step in steps if step.is_successful) + failure_count = len(steps) - success_count + best_action = best_step_data.action + parameter_rows = [ + DSEParameterRow( + name=name, + values=[_format_scalar(value) for value in values], + best_value=_format_scalar(best_action.get(name, "n/a")), + ) + for name, values in original_tr.param_space.items() + ] + analysis_file = iter_dir / "analysis.csv" + + return DSESummary( + name=original_tr.name, + description=original_tr.test.description, + iteration=iteration, + output_root=iter_dir, + output_root_rel_path=f"./{iter_dir.relative_to(self.results_root)}", + total_space=total_space, + executed_steps=executed_steps, + skipped_steps=skipped_steps, + coverage_percent=coverage_percent, + best_step=best_step_data.step, + best_reward=best_step_data.reward, + best_observation_display=best_step_data.observation_display, + avg_step_duration_sec=avg_step_duration_sec, + total_runtime_sec=total_runtime_sec, + projected_runtime_sec=projected_runtime_sec, + saved_runtime_sec=saved_runtime_sec, + success_count=success_count, + failure_count=failure_count, + gpu_arch_label=gpu_arch_label, + gpu_arch_family=gpu_arch_family, + gpus_per_node=gpus_per_node, + num_nodes=num_nodes, + total_gpu_hours=total_gpu_hours, + projected_gpu_hours=projected_gpu_hours, + saved_gpu_hours=saved_gpu_hours, + estimated_saved_cost_usd=estimated_saved_cost_usd, + best_config_rel_path=f"./{best_config_path.relative_to(self.results_root)}", + best_config_toml=best_config_toml, + analysis_rel_path=f"./{analysis_file.relative_to(self.results_root)}" if analysis_file.exists() else None, + parameter_rows=parameter_rows, + chart_svg=_build_reward_chart_svg(steps), + ) + + @staticmethod + def _step_elapsed_time(step_dir: Path) -> int | None: + slurm_job_path = step_dir / "slurm-job.toml" + if not slurm_job_path.exists(): + return None + + with slurm_job_path.open() as f: + metadata = SlurmJobMetadata.model_validate(toml.load(f)) + return metadata.elapsed_time_sec diff --git a/src/cloudai/reporter.py b/src/cloudai/reporter.py index 514428d12..56ee450ae 100644 --- a/src/cloudai/reporter.py +++ b/src/cloudai/reporter.py @@ -14,323 +14,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ast import contextlib -import io import logging import tarfile -from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Optional import jinja2 -import toml from rich import box from rich.console import Console from rich.table import Table -from cloudai.util.lazy_imports import lazy - -from .core import CommandGenStrategy, Reporter, TestRun, case_name -from .models.scenario import TestRunDetails -from .systems.slurm import SlurmSystem, SlurmSystemMetadata -from .systems.slurm.slurm_metadata import SlurmJobMetadata - -GPU_HOURLY_COST_USD = { - "H100": 4.50, - "B200": 8.00, - "GB200": 10.00, - "GB300": 12.00, -} - - -def _safe_literal_eval(raw: Any, default: Any) -> Any: - if isinstance(raw, str): - with contextlib.suppress(SyntaxError, ValueError): - return ast.literal_eval(raw) - return default - - -def _format_scalar(value: Any) -> str: - if isinstance(value, float): - return f"{value:.4f}".rstrip("0").rstrip(".") - return str(value) - - -def _format_duration(seconds: float | None) -> str: - if seconds is None: - return "n/a" - - seconds = max(float(seconds), 0.0) - if seconds < 60: - return f"{seconds:.1f}s" - - minutes, sec = divmod(round(seconds), 60) - hours, minutes = divmod(minutes, 60) - parts = [] - if hours: - parts.append(f"{hours}h") - if minutes: - parts.append(f"{minutes}m") - if sec or not parts: - parts.append(f"{sec}s") - return " ".join(parts) - - -def _format_float(value: float | None, precision: int = 2) -> str: - if value is None: - return "n/a" - return f"{value:.{precision}f}" - - -def _format_percent(value: float | None) -> str: - if value is None: - return "n/a" - return f"{value:.2f}%" - - -def _format_money(value: float | None) -> str: - if value is None: - return "n/a" - return f"${value:,.2f}" - - -def _normalize_gpu_family(gpu_name: str | None) -> str | None: - if not gpu_name: - return None - - upper = gpu_name.upper() - for family in GPU_HOURLY_COST_USD: - if family in upper: - return family - return None - - -def _build_running_best(points: list[tuple[int, float]]) -> list[tuple[int, float]]: - running_best: list[tuple[int, float]] = [] - best = None - for step, reward in points: - best = reward if best is None else max(best, reward) - running_best.append((step, best)) - return running_best - - -def _chart_points(points: list[tuple[int, float]], width: int, height: int, padding: int) -> list[tuple[float, float]]: - if not points: - return [] - - x_vals = [step for step, _ in points] - y_vals = [reward for _, reward in points] - min_x, max_x = min(x_vals), max(x_vals) - min_y, max_y = min(y_vals), max(y_vals) - - x_span = max(max_x - min_x, 1) - y_span = max(max_y - min_y, 1e-9) - inner_width = width - 2 * padding - inner_height = height - 2 * padding - - result = [] - for step, reward in points: - x = padding + ((step - min_x) / x_span) * inner_width - y = height - padding - ((reward - min_y) / y_span) * inner_height - result.append((x, y)) - return result - - -def _polyline(points: list[tuple[float, float]]) -> str: - return " ".join(f"{x:.2f},{y:.2f}" for x, y in points) - - -def _build_reward_chart_svg(steps: list["DSEStepData"]) -> str | None: - if not steps: - return None - - width, height, padding = 720, 260, 34 - reward_points = [(step.step, step.reward) for step in steps] - running_best = _build_running_best(reward_points) - reward_coords = _chart_points(reward_points, width, height, padding) - best_coords = _chart_points(running_best, width, height, padding) - - reward_line = _polyline(reward_coords) - best_line = _polyline(best_coords) - y_vals = [reward for _, reward in reward_points] - y_min, y_max = min(y_vals), max(y_vals) - - circles = [] - for step_data, (x, y) in zip(steps, reward_coords, strict=True): - tooltip = ( - f"Step {step_data.step} | Reward: {_format_float(step_data.reward, 4)}" - f" | Observation: {step_data.observation_display}" - ) - circles.append(f'{tooltip}') - - return "\n".join( - [ - f'', - f'', - f'', - f'', - f'', - *circles, - f'Step', - f'Reward', - "", - ] - ) - - -@dataclass -class ReportItem: - """Basic report item for general systems.""" - - name: str - description: str - logs_path: Optional[str] = None - - @classmethod - def from_test_runs(cls, test_runs: list[TestRun], results_root: Path) -> list["ReportItem"]: - report_items: list[ReportItem] = [] - for tr in test_runs: - report_items.append(ReportItem(case_name(tr), tr.test.description)) - if tr.output_path.exists(): - report_items[-1].logs_path = f"./{tr.output_path.relative_to(results_root)}" - return report_items - - -@dataclass -class SlurmReportItem: - """Enhanced report item for Slurm systems with node information.""" - - name: str - description: str - logs_path: Optional[str] = None - nodes: Optional[str] = None - - @classmethod - def get_metadata(cls, run_dir: Path, results_root: Path) -> Optional[SlurmSystemMetadata]: - metadata_path = run_dir / "metadata" - if not metadata_path.exists(): - logging.debug(f"No metadata folder found in {run_dir=}") - if not (results_root / "metadata").exists(): - logging.debug(f"No metadata folder found in {results_root=}") - return None - else: # single-sbatch case - metadata_path = results_root / "metadata" - - node_files = list(metadata_path.glob("node-*.toml")) - if not node_files: - logging.debug(f"No node files found in {metadata_path}") - return None - - node_file = node_files[0] - with node_file.open() as f: - try: - return SlurmSystemMetadata.model_validate(toml.load(f)) - except Exception as e: - logging.debug(f"Error validating metadata for {node_file}: {e}") - - return None - - @classmethod - def from_test_runs(cls, test_runs: list[TestRun], results_root: Path) -> list["SlurmReportItem"]: - report_items: list[SlurmReportItem] = [] - for tr in test_runs: - ri = SlurmReportItem(case_name(tr), tr.test.description) - if tr.output_path.exists(): - ri.logs_path = f"./{tr.output_path.relative_to(results_root)}" - if metadata := cls.get_metadata(tr.output_path, results_root): - ri.nodes = metadata.slurm.node_list - report_items.append(ri) - - return report_items - - -@dataclass -class DSEStepData: - """DSE step data.""" - - step: int - reward: float - observation: list[Any] - observation_display: str - action: dict[str, Any] - elapsed_time_sec: float | None = None - is_successful: bool = False - - -@dataclass -class DSEParameterRow: - """DSE parameter row.""" - - name: str - values: list[str] - best_value: str - - -@dataclass -class DSESummary: - """DSE summary report.""" - - name: str - description: str - iteration: int - output_root: Path - output_root_rel_path: str - total_space: int - executed_steps: int - skipped_steps: int - coverage_percent: float | None - best_step: int | None - best_reward: float | None - best_observation_display: str - avg_step_duration_sec: float | None - total_runtime_sec: float | None - projected_runtime_sec: float | None - saved_runtime_sec: float | None - success_count: int - failure_count: int - gpu_arch_label: str | None - gpu_arch_family: str | None - gpus_per_node: int | None - num_nodes: int | None - total_gpu_hours: float | None - projected_gpu_hours: float | None - saved_gpu_hours: float | None - estimated_saved_cost_usd: float | None - best_config_rel_path: str | None - best_scenario_rel_path: str | None - best_scenario_toml: str | None - analysis_rel_path: str | None - parameter_rows: list[DSEParameterRow] = field(default_factory=list) - chart_svg: str | None = None - - @property - def display_name(self) -> str: - if self.iteration == 0: - return self.name - return f"{self.name} iter={self.iteration}" - - @property - def status_text(self) -> str: - if self.failure_count == 0: - return "PASSED" - if self.success_count == 0: - return "FAILED" - return "PARTIAL" - - @property - def status_style(self) -> str: - return { - "PASSED": "[green]PASSED[/green]", - "FAILED": "[red]FAILED[/red]", - "PARTIAL": "[yellow]PARTIAL[/yellow]", - }[self.status_text] +from cloudai.core import Reporter, TestRun +from cloudai.report_generator.status_report import ( + DSEReportBuilder, + DSESummary, + ReportItem, + format_duration, + format_float, + format_money, + format_percent, +) class PerTestReporter(Reporter): @@ -358,50 +61,32 @@ def generate(self) -> None: class StatusReporter(Reporter): """Generates HTML status reports with system-specific templates.""" - def __init__(self, system, test_scenario, results_root, config): - super().__init__(system, test_scenario, results_root, config) - self.dse_summaries: list[DSESummary] = [] - @property - def template_file_path(self) -> Path: + def templates_dir(self) -> Path: return Path(__file__).parent / "util" - @property - def template_file(self) -> str: - if isinstance(self.system, SlurmSystem): - return "general-slurm-report.jinja2" - return "general-report.jinja2" - - def best_dse_config_file_name(self, tr: TestRun) -> str: - return f"{tr.name}.toml" - - def best_dse_scenario_file_name(self, tr: TestRun) -> str: - return f"{tr.name}-best-in-scenario.toml" - def generate(self) -> None: self.load_test_runs() - self.report_best_dse_config() - self.generate_scenario_report() - self.print_summary() - def generate_scenario_report(self) -> None: - template = jinja2.Environment(loader=jinja2.FileSystemLoader(self.template_file_path)).get_template( - self.template_file - ) + dse_builder = DSEReportBuilder(self.system, self.results_root, self.trs) + dse_summaries = dse_builder.build(self.test_scenario.test_runs) - report_items = ( - SlurmReportItem.from_test_runs(self.trs, self.results_root) - if isinstance(self.system, SlurmSystem) - else ReportItem.from_test_runs(self.trs, self.results_root) - ) + self.to_html(dse_summaries) + self.to_console(dse_summaries) + + def to_html(self, dse_summaries: list[DSESummary]) -> None: + jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(self.templates_dir)) + template = jinja_env.get_template("general-report.jinja2") + + report_items = ReportItem.from_test_runs(self.trs, self.results_root) report = template.render( name=self.test_scenario.name, report_items=report_items, - dse_summaries=self.dse_summaries, - format_duration=_format_duration, - format_float=_format_float, - format_percent=_format_percent, - format_money=_format_money, + dse_summaries=dse_summaries, + format_duration=format_duration, + format_float=format_float, + format_percent=format_percent, + format_money=format_money, ) report_path = self.results_root / f"{self.test_scenario.name}.html" with report_path.open("w") as f: @@ -409,242 +94,7 @@ def generate_scenario_report(self) -> None: logging.info(f"Generated scenario report at {report_path}") - def report_best_dse_config(self): - self.dse_summaries = [] - for tr in self.test_scenario.test_runs: - if not tr.is_dse_job: - continue - - self.dse_summaries.extend(self._build_dse_summaries(tr)) - - def _build_dse_summaries(self, original_tr: TestRun) -> list[DSESummary]: - summaries: list[DSESummary] = [] - tr_base_dir = self.results_root / original_tr.name - if not tr_base_dir.exists(): - return summaries - - grouped_trs: dict[int, list[TestRun]] = {} - for tr in self.trs: - if tr.name != original_tr.name: - continue - grouped_trs.setdefault(tr.current_iteration, []).append(tr) - iteration_dirs = sorted((d for d in tr_base_dir.iterdir() if d.is_dir()), key=lambda p: int(p.name)) - for iter_dir in iteration_dirs: - iteration = int(iter_dir.name) - summary = self._build_dse_summary_for_iteration( - original_tr, iteration, iter_dir, grouped_trs.get(iteration, []) - ) - if summary is not None: - summaries.append(summary) - return summaries - - def _build_dse_summary_for_iteration( - self, original_tr: TestRun, iteration: int, iter_dir: Path, step_trs: list[TestRun] - ) -> DSESummary | None: - trajectory_file = iter_dir / "trajectory.csv" - if not trajectory_file.exists(): - logging.warning(f"No trajectory file found for {original_tr.name} at {trajectory_file}") - return None - - df = lazy.pd.read_csv(trajectory_file) - if df.empty: - return None - - steps_by_number = {tr.step: tr for tr in step_trs} - steps: list[DSEStepData] = [] - for row in df.to_dict(orient="records"): - step_no = int(row["step"]) - action = _safe_literal_eval(row.get("action"), {}) - if not isinstance(action, dict): - action = {} - observation = _safe_literal_eval(row.get("observation"), []) - if not isinstance(observation, list): - observation = [observation] - elapsed_time = self._step_elapsed_time(iter_dir / str(step_no)) - tr = steps_by_number.get(step_no) - is_successful = tr.test.was_run_successful(tr).is_successful if tr is not None else False - steps.append( - DSEStepData( - step=step_no, - reward=float(row["reward"]), - observation=observation, - observation_display=", ".join(_format_scalar(v) for v in observation) if observation else "n/a", - action=action, - elapsed_time_sec=elapsed_time, - is_successful=is_successful, - ) - ) - - if not steps: - return None - - steps.sort(key=lambda step: step.step) - best_step_data = max(steps, key=lambda step: step.reward) - best_step_dir = iter_dir / str(best_step_data.step) - best_step_details = best_step_dir / CommandGenStrategy.TEST_RUN_DUMP_FILE_NAME - if not best_step_details.exists(): - logging.warning(f"No test run dump found for best DSE step at {best_step_details}") - return None - - with best_step_details.open() as f: - trd = TestRunDetails.model_validate(toml.load(f)) - - best_config_path = iter_dir / self.best_dse_config_file_name(original_tr) - logging.info(f"Writing best config for {original_tr.name} to {best_config_path}") - with best_config_path.open("w") as f: - toml.dump(trd.test_definition.model_dump(), f) - - best_scenario_content = self._build_best_scenario_toml(original_tr, trd) - best_scenario_path = iter_dir / self.best_dse_scenario_file_name(original_tr) - with best_scenario_path.open("w") as f: - f.write(best_scenario_content) - - elapsed_times = [step.elapsed_time_sec for step in steps if step.elapsed_time_sec is not None] - avg_step_duration_sec = sum(elapsed_times) / len(elapsed_times) if elapsed_times else None - total_runtime_sec = sum(elapsed_times) if elapsed_times else None - total_space = len(original_tr.all_combinations) - executed_steps = len(steps) - skipped_steps = max(total_space - executed_steps, 0) - coverage_percent = (executed_steps / total_space * 100.0) if total_space else None - projected_runtime_sec = avg_step_duration_sec * total_space if avg_step_duration_sec is not None else None - saved_runtime_sec = ( - max(projected_runtime_sec - total_runtime_sec, 0.0) - if projected_runtime_sec is not None and total_runtime_sec is not None - else None - ) - - metadata = self._best_available_metadata(iter_dir, best_step_data.step) - gpu_arch_label = metadata.system.gpu_arch_type if metadata else None - gpu_arch_family = _normalize_gpu_family(gpu_arch_label) - num_nodes = trd.nnodes - gpus_per_node = getattr(self.system, "gpus_per_node", None) - total_gpu_hours = ( - (total_runtime_sec / 3600.0) * num_nodes * gpus_per_node - if total_runtime_sec is not None and gpus_per_node is not None - else None - ) - projected_gpu_hours = ( - (projected_runtime_sec / 3600.0) * num_nodes * gpus_per_node - if projected_runtime_sec is not None and gpus_per_node is not None - else None - ) - saved_gpu_hours = ( - max(projected_gpu_hours - total_gpu_hours, 0.0) - if projected_gpu_hours is not None and total_gpu_hours is not None - else None - ) - estimated_saved_cost_usd = ( - saved_gpu_hours * GPU_HOURLY_COST_USD[gpu_arch_family] - if saved_gpu_hours is not None and gpu_arch_family in GPU_HOURLY_COST_USD - else None - ) - - success_count = sum(1 for step in steps if step.is_successful) - failure_count = len(steps) - success_count - best_action = best_step_data.action - parameter_rows = [ - DSEParameterRow( - name=name, - values=[_format_scalar(value) for value in values], - best_value=_format_scalar(best_action.get(name, "n/a")), - ) - for name, values in original_tr.param_space.items() - ] - analysis_file = iter_dir / "analysis.csv" - - return DSESummary( - name=original_tr.name, - description=original_tr.test.description, - iteration=iteration, - output_root=iter_dir, - output_root_rel_path=f"./{iter_dir.relative_to(self.results_root)}", - total_space=total_space, - executed_steps=executed_steps, - skipped_steps=skipped_steps, - coverage_percent=coverage_percent, - best_step=best_step_data.step, - best_reward=best_step_data.reward, - best_observation_display=best_step_data.observation_display, - avg_step_duration_sec=avg_step_duration_sec, - total_runtime_sec=total_runtime_sec, - projected_runtime_sec=projected_runtime_sec, - saved_runtime_sec=saved_runtime_sec, - success_count=success_count, - failure_count=failure_count, - gpu_arch_label=gpu_arch_label, - gpu_arch_family=gpu_arch_family, - gpus_per_node=gpus_per_node, - num_nodes=num_nodes, - total_gpu_hours=total_gpu_hours, - projected_gpu_hours=projected_gpu_hours, - saved_gpu_hours=saved_gpu_hours, - estimated_saved_cost_usd=estimated_saved_cost_usd, - best_config_rel_path=f"./{best_config_path.relative_to(self.results_root)}", - best_scenario_rel_path=f"./{best_scenario_path.relative_to(self.results_root)}", - best_scenario_toml=best_scenario_content, - analysis_rel_path=f"./{analysis_file.relative_to(self.results_root)}" if analysis_file.exists() else None, - parameter_rows=parameter_rows, - chart_svg=_build_reward_chart_svg(steps), - ) - - def _build_best_scenario_toml(self, original_tr: TestRun, best_trd: TestRunDetails) -> str: - tdef = best_trd.test_definition.model_copy(deep=True) - tdef.agent = None - tdef.agent_steps = None - tdef.agent_reward_function = None - tdef.agent_config = None - tdef.agent_metrics = ["default"] - - test_block: dict[str, Any] = { - "id": original_tr.name, - "num_nodes": best_trd.nnodes, - "name": tdef.name, - "description": tdef.description, - "test_template_name": tdef.test_template_name, - "cmd_args": tdef.cmd_args.model_dump(by_alias=True), - } - if original_tr.time_limit: - test_block["time_limit"] = original_tr.time_limit - if original_tr.nodes: - test_block["nodes"] = original_tr.nodes - if original_tr.exclude_nodes: - test_block["exclude_nodes"] = original_tr.exclude_nodes - if tdef.extra_env_vars: - test_block["extra_env_vars"] = tdef.extra_env_vars - if tdef.extra_container_mounts: - test_block["extra_container_mounts"] = tdef.extra_container_mounts - if tdef.git_repos: - test_block["git_repos"] = [repo.model_dump() for repo in tdef.git_repos] - if tdef.nsys: - test_block["nsys"] = tdef.nsys.model_dump(exclude_unset=True) - if original_tr.extra_srun_args: - test_block["extra_srun_args"] = original_tr.extra_srun_args - - scenario_dict = { - "name": f"{best_trd.test_definition.name}_best_config", - "Tests": [test_block], - } - buffer = io.StringIO() - toml.dump(scenario_dict, buffer) - return buffer.getvalue() - - @staticmethod - def _step_elapsed_time(step_dir: Path) -> float | None: - slurm_job_path = step_dir / "slurm-job.toml" - if not slurm_job_path.exists(): - return None - - with slurm_job_path.open() as f: - metadata = SlurmJobMetadata.model_validate(toml.load(f)) - return float(metadata.elapsed_time_sec) - - def _best_available_metadata(self, iter_dir: Path, best_step: int) -> SlurmSystemMetadata | None: - if not isinstance(self.system, SlurmSystem): - return None - best_step_dir = iter_dir / str(best_step) - return SlurmReportItem.get_metadata(best_step_dir, self.results_root) - - def print_summary(self) -> None: + def to_console(self, dse_summaries: list[DSESummary]): if not self.trs: logging.debug("No test runs found, skipping summary.") return @@ -653,34 +103,40 @@ def print_summary(self) -> None: for col in ["Case", "Status", "Details"]: table.add_column(col, overflow="fold") - if self.dse_summaries: - for summary in self.dse_summaries: - details = [ - f"steps={summary.executed_steps}/{summary.total_space}", - f"best_step={summary.best_step}", - f"best_reward={_format_float(summary.best_reward, 4)}", - f"failures={summary.failure_count}", - ] - if summary.best_scenario_rel_path: - details.append(summary.best_scenario_rel_path) - table.add_row(summary.display_name, f"[bold]{summary.status_style}[/bold]", "\n".join(details)) + if dse_summaries: + self._add_dse_rows(dse_summaries, table) else: - for tr in self.trs: - tr_status = tr.test.was_run_successful(tr) - sts_text = f"[bold]{'[green]PASSED[/green]' if tr_status.is_successful else '[red]FAILED[/red]'}[/bold]" - display_path = str(tr.output_path.absolute()) - with contextlib.suppress(ValueError): - display_path = str(tr.output_path.absolute().relative_to(Path.cwd())) - details_text = f"\n{tr_status.error_message}" if tr_status.error_message else "" - columns = [tr.name, sts_text, f"{display_path}{details_text}"] - table.add_row(*columns) + self._add_standard_rows(table) console = Console() with console.capture() as capture: - console.print(table) # doesn't print to stdout, captures only + console.print(table) logging.info(capture.get()) + @staticmethod + def _add_dse_rows(dse_summaries: list[DSESummary], table: Table): + for summary in dse_summaries: + details = [ + f"steps={summary.executed_steps}/{summary.total_space}", + f"best_step={summary.best_step}", + f"best_reward={format_float(summary.best_reward, 4)}", + f"failures={summary.failure_count}", + ] + if summary.best_config_rel_path: + details.append(summary.best_config_rel_path) + table.add_row(summary.display_name, f"[bold]{summary.status_style}[/bold]", "\n".join(details)) + + def _add_standard_rows(self, table: Table): + for tr in self.trs: + tr_status = tr.test.was_run_successful(tr) + sts_text = f"[bold]{'[green]PASSED[/green]' if tr_status.is_successful else '[red]FAILED[/red]'}[/bold]" + display_path = str(tr.output_path.absolute()) + with contextlib.suppress(ValueError): + display_path = str(tr.output_path.absolute().relative_to(Path.cwd())) + details_text = f"\n{tr_status.error_message}" if tr_status.error_message else "" + table.add_row(tr.name, sts_text, f"{display_path}{details_text}") + class TarballReporter(Reporter): """Creates tarballs of results for failed test runs.""" diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index 13585812c..55d3396d9 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -170,13 +170,12 @@ - {% if summary.best_scenario_toml %} + {% if summary.best_config_toml %}
- Show best scenario TOML -
{{ summary.best_scenario_toml }}
+ Show best config TOML +
{{ summary.best_config_toml }}
{% endif %} @@ -219,6 +218,9 @@ Test Description Results + {% if report_items | selectattr('nodes') | first is not none %} + Nodes + {% endif %} {% for item in report_items %} @@ -229,6 +231,11 @@ {% else %} no logs {% endif %} + {% if item.nodes is not none %} + {{ item.nodes }} + {% else %} + no nodes information + {% endif %} {% endfor %} diff --git a/src/cloudai/util/general-slurm-report.jinja2 b/src/cloudai/util/general-slurm-report.jinja2 deleted file mode 100644 index 9298be928..000000000 --- a/src/cloudai/util/general-slurm-report.jinja2 +++ /dev/null @@ -1,238 +0,0 @@ -{% extends "base-report.jinja2" %} - -{% block extra_head %} - -{% endblock %} - -{% block content %} -{% if dse_summaries %} - {% for summary in dse_summaries %} -
-

{{ summary.display_name }}

-

{{ summary.description }}

- -
-
-
Space
-
{{ summary.total_space }}
-
-
-
Ran
-
{{ summary.executed_steps }}
-
-
-
Skipped
-
{{ summary.skipped_steps }}
-
-
-
Coverage
-
{{ format_percent(summary.coverage_percent) }}
-
-
-
Saved Time
-
{{ format_duration(summary.saved_runtime_sec) }}
-
-
-
Saved GPU-Hours
-
{{ format_float(summary.saved_gpu_hours, 2) }}
-
-
-
Estimated $ Saved
-
{{ format_money(summary.estimated_saved_cost_usd) }}
-
-
- -
-

Execution Context

-
-
Status{{ summary.status_text }}
-
GPU Family{{ summary.gpu_arch_family or "unknown" }}
-
GPU Label{{ summary.gpu_arch_label or "unknown" }}
-
GPUs Per Node{{ summary.gpus_per_node or "unknown" }}
-
Nodes{{ summary.num_nodes or "unknown" }}
-
Step Success/Failure{{ summary.success_count }}/{{ summary.failure_count }}
-
Average Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
-
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
-
Projected Full-Space Runtime{{ format_duration(summary.projected_runtime_sec) }}
-
-
- -
-
-
-

Best Step

-
-
Best Step{{ summary.best_step }}
-
Best Reward{{ format_float(summary.best_reward, 4) }}
-
Best Observation{{ summary.best_observation_display }}
-
Run Folderopen
-
- - {% if summary.best_scenario_toml %} -
- Show best scenario TOML -
{{ summary.best_scenario_toml }}
-
- {% endif %} -
- -
-

Exploration Space

-

Each row shows the allowed values for a swept parameter and the selected best value.

- - - - - - - {% for row in summary.parameter_rows %} - - - - - - {% endfor %} -
ParameterAllowed ValuesBest
{{ row.name }}{{ row.values | join(", ") }}{{ row.best_value }}
-
-
- -
-

Reward Over Steps

-

Blue shows observed reward per trial. Red dashed shows the best-so-far curve.

- {% if summary.chart_svg %} - {{ summary.chart_svg | safe }} - {% else %} -

No reward data available.

- {% endif %} -
-
-
- {% endfor %} -{% else %} - - - - - - - - {% for item in report_items %} - - - - {% if item.logs_path %} - - {% else %} - - {% endif %} - - - {% endfor %} -
TestDescriptionResultsNodes
{{ item.name }}{{ item.description }}logsno logs{{ item.nodes }}
-{% endif %} -{% endblock %} diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 50d815b3c..71511fab9 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -26,7 +26,8 @@ from cloudai.cli.handlers import generate_reports from cloudai.core import CommandGenStrategy, Registry, Reporter, System from cloudai.models.scenario import ReportConfig, TestRunDetails -from cloudai.reporter import PerTestReporter, SlurmReportItem, StatusReporter, TarballReporter +from cloudai.report_generator.status_report import DSEReportBuilder, ReportItem, load_system_metadata +from cloudai.reporter import PerTestReporter, StatusReporter, TarballReporter from cloudai.systems.slurm.slurm_metadata import ( MetadataCUDA, MetadataMPI, @@ -132,6 +133,22 @@ def _create_dse_report_fixture( return tr +def _build_dse_summaries( + slurm_system: SlurmSystem, + dse_tr: TestRun, + scenario_name: str = "dse_scenario", +) -> tuple[StatusReporter, list]: + reporter = StatusReporter( + slurm_system, + TestScenario(name=scenario_name, test_runs=[dse_tr]), + slurm_system.output_path, + ReportConfig(), + ) + reporter.load_test_runs() + summaries = DSEReportBuilder(slurm_system, slurm_system.output_path, reporter.trs).build([dse_tr]) + return reporter, summaries + + class TestLoadTestTuns: def test_load_test_runs_behcnmark_sorted(self, slurm_system: SlurmSystem, benchmark_tr: TestRun) -> None: reporter = PerTestReporter( @@ -185,12 +202,13 @@ def test_create_tarball_preserves_full_name(tmp_path: Path, slurm_system: SlurmS def test_best_dse_config(dse_tr: TestRun, slurm_system: SlurmSystem) -> None: - reporter = StatusReporter( - slurm_system, TestScenario(name="test_scenario", test_runs=[dse_tr]), slurm_system.output_path, ReportConfig() - ) - reporter.report_best_dse_config() + reporter, summaries = _build_dse_summaries(slurm_system, dse_tr, scenario_name="test_scenario") + assert len(summaries) == dse_tr.iterations best_config_path = ( - reporter.results_root / dse_tr.name / f"{dse_tr.current_iteration}" / reporter.best_dse_config_file_name(dse_tr) + reporter.results_root + / dse_tr.name + / f"{dse_tr.current_iteration}" + / DSEReportBuilder.best_config_file_name(dse_tr) ) assert best_config_path.exists() nccl = NCCLTestDefinition.model_validate(toml.load(best_config_path)) @@ -209,7 +227,7 @@ def test_template_file_path(system: System) -> None: reporter = StatusReporter( system, TestScenario(name="test_scenario", test_runs=[]), system.output_path, ReportConfig() ) - assert (reporter.template_file_path / reporter.template_file).exists() + assert (reporter.templates_dir / "general-report.jinja2").exists() MY_REPORT_CALLED = 0 @@ -353,19 +371,19 @@ def slurm_metadata() -> SlurmSystemMetadata: ) -class TestSlurmReportItem: +class TestLoadSystemMetadata: def test_no_metadata_folder(self, slurm_system: SlurmSystem) -> None: run_dir = slurm_system.output_path / "run_dir" run_dir.mkdir(parents=True, exist_ok=True) - meta = SlurmReportItem.get_metadata(run_dir, slurm_system.output_path) + meta = load_system_metadata(run_dir, slurm_system.output_path) assert meta is None def test_no_metadata_files(self, slurm_system: SlurmSystem) -> None: run_dir = slurm_system.output_path / "run_dir" (run_dir / "metadata").mkdir(parents=True, exist_ok=True) - meta = SlurmReportItem.get_metadata(run_dir, slurm_system.output_path) + meta = load_system_metadata(run_dir, slurm_system.output_path) assert meta is None def test_metadata_file_in_run_dir(self, slurm_system: SlurmSystem, slurm_metadata: SlurmSystemMetadata) -> None: @@ -374,7 +392,7 @@ def test_metadata_file_in_run_dir(self, slurm_system: SlurmSystem, slurm_metadat with open(run_dir / "metadata" / "node-0.toml", "w") as f: toml.dump(slurm_metadata.model_dump(), f) - meta = SlurmReportItem.get_metadata(run_dir, slurm_system.output_path) + meta = load_system_metadata(run_dir, slurm_system.output_path) assert meta is not None assert meta.slurm.node_list == slurm_metadata.slurm.node_list @@ -385,11 +403,29 @@ def test_metadata_for_single_sbatch(self, slurm_system: SlurmSystem, slurm_metad with open(slurm_system.output_path / "metadata" / "node-0.toml", "w") as f: toml.dump(slurm_metadata.model_dump(), f) - meta = SlurmReportItem.get_metadata(run_dir, slurm_system.output_path) + meta = load_system_metadata(run_dir, slurm_system.output_path) assert meta is not None assert meta.slurm.node_list == slurm_metadata.slurm.node_list +def test_report_item_from_test_runs_includes_logs_and_metadata( + slurm_system: SlurmSystem, benchmark_tr: TestRun, slurm_metadata: SlurmSystemMetadata +) -> None: + run_dir = slurm_system.output_path / benchmark_tr.name / "0" + metadata_dir = run_dir / "metadata" + metadata_dir.mkdir(parents=True, exist_ok=True) + with open(metadata_dir / "node-0.toml", "w") as f: + toml.dump(slurm_metadata.model_dump(), f) + + benchmark_tr.output_path = run_dir + items = ReportItem.from_test_runs([benchmark_tr], slurm_system.output_path) + + assert len(items) == 1 + assert items[0].logs_path == f"./{benchmark_tr.name}/0" + assert items[0].nodes is not None + assert items[0].nodes.slurm.node_list == slurm_metadata.slurm.node_list + + def test_report_order() -> None: reports = Registry().ordered_scenario_reports() assert reports[0][0] == "per_test" @@ -397,22 +433,12 @@ def test_report_order() -> None: assert reports[-1][0] == "tarball" -def test_dse_summary_and_best_scenario_artifacts( - slurm_system: SlurmSystem, slurm_metadata: SlurmSystemMetadata -) -> None: +def test_dse_summary_and_best_config_artifacts(slurm_system: SlurmSystem, slurm_metadata: SlurmSystemMetadata) -> None: dse_tr = _create_dse_report_fixture(slurm_system, slurm_metadata) - reporter = StatusReporter( - slurm_system, - TestScenario(name="dse_scenario", test_runs=[dse_tr]), - slurm_system.output_path, - ReportConfig(), - ) + _, summaries = _build_dse_summaries(slurm_system, dse_tr) - reporter.load_test_runs() - reporter.report_best_dse_config() - - assert len(reporter.dse_summaries) == 1 - summary = reporter.dse_summaries[0] + assert len(summaries) == 1 + summary = summaries[0] assert summary.total_space == 8 assert summary.executed_steps == 3 assert summary.skipped_steps == 5 @@ -428,22 +454,27 @@ def test_dse_summary_and_best_scenario_artifacts( assert summary.estimated_saved_cost_usd == pytest.approx((summary.saved_gpu_hours or 0) * 4.5) assert summary.gpu_arch_family == "H100" assert summary.analysis_rel_path is not None + assert summary.best_config_rel_path == f"./{dse_tr.name}/0/{dse_tr.name}.toml" + assert summary.chart_svg is not None + + best_values = {row.name: row.best_value for row in summary.parameter_rows} + assert best_values["nthreads"] == "2" + assert best_values["datatype"] == "uint8" + assert best_values["blocking"] == "1" - best_config_path = slurm_system.output_path / dse_tr.name / "0" / reporter.best_dse_config_file_name(dse_tr) - best_scenario_path = slurm_system.output_path / dse_tr.name / "0" / reporter.best_dse_scenario_file_name(dse_tr) + best_config_path = slurm_system.output_path / dse_tr.name / "0" / DSEReportBuilder.best_config_file_name(dse_tr) assert best_config_path.exists() - assert best_scenario_path.exists() - old_best = toml.load(best_config_path) - assert old_best["agent_steps"] == 3 + best_config = toml.load(best_config_path) + assert best_config["agent_steps"] == 3 + assert best_config["cmd_args"]["datatype"] == "uint8" + assert best_config["cmd_args"]["blocking"] == 1 + assert best_config["cmd_args"]["nthreads"] == 2 - best_scenario = toml.load(best_scenario_path) - assert best_scenario["Tests"][0]["cmd_args"]["datatype"] == "uint8" - assert best_scenario["Tests"][0]["cmd_args"]["blocking"] == 1 - assert best_scenario["Tests"][0]["cmd_args"]["nthreads"] == 2 - assert best_scenario["Tests"][0]["num_nodes"] == 2 - assert "agent" not in best_scenario["Tests"][0] - assert "agent_steps" not in best_scenario["Tests"][0] + inline_best_config = toml.loads(summary.best_config_toml or "") + assert inline_best_config["cmd_args"]["datatype"] == "uint8" + assert inline_best_config["cmd_args"]["blocking"] == 1 + assert inline_best_config["cmd_args"]["nthreads"] == 2 def test_dse_generate_scenario_report_renders_html( @@ -463,9 +494,10 @@ def test_dse_generate_scenario_report_renders_html( html = report_path.read_text() assert "Saved GPU-Hours" in html assert "Reward Over Steps" in html - assert "Best Scenario TOML" in html + assert "Best Test TOML" in html + assert "Show best config TOML" in html assert "BO Analysis" in html - assert "dse-report-best-in-scenario.toml" in html + assert "dse-report.toml" in html assert " None: dse_tr = _create_dse_report_fixture(slurm_system, slurm_metadata) - reporter = StatusReporter( - slurm_system, - TestScenario(name="dse_scenario", test_runs=[dse_tr]), - slurm_system.output_path, - ReportConfig(), - ) - - reporter.load_test_runs() - reporter.report_best_dse_config() + reporter, summaries = _build_dse_summaries(slurm_system, dse_tr) with caplog.at_level("INFO"): - reporter.print_summary() + reporter.to_console(summaries) assert "steps=3/8" in caplog.text assert "best_step=2" in caplog.text - assert "dse-report-best-in-scenario.toml" in caplog.text + assert "dse-report.toml" in caplog.text assert "step=1" not in caplog.text @@ -495,15 +519,7 @@ def test_unknown_gpu_family_omits_estimated_cost( slurm_system: SlurmSystem, slurm_metadata: SlurmSystemMetadata ) -> None: dse_tr = _create_dse_report_fixture(slurm_system, slurm_metadata, gpu_name="Mystery GPU") - reporter = StatusReporter( - slurm_system, - TestScenario(name="dse_scenario", test_runs=[dse_tr]), - slurm_system.output_path, - ReportConfig(), - ) - - reporter.load_test_runs() - reporter.report_best_dse_config() + _reporter, summaries = _build_dse_summaries(slurm_system, dse_tr) - assert reporter.dse_summaries[0].gpu_arch_family is None - assert reporter.dse_summaries[0].estimated_saved_cost_usd is None + assert summaries[0].gpu_arch_family is None + assert summaries[0].estimated_saved_cost_usd is None From e1babd5041146f0f6a155c866a0a7e66f7b52568 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Sun, 22 Mar 2026 20:05:38 +0100 Subject: [PATCH 03/30] better visuals --- src/cloudai/report_generator/status_report.py | 96 +--- src/cloudai/util/base-report.jinja2 | 75 ++- src/cloudai/util/general-report.jinja2 | 517 +++++++++++++----- tests/test_reporter.py | 14 +- 4 files changed, 481 insertions(+), 221 deletions(-) diff --git a/src/cloudai/report_generator/status_report.py b/src/cloudai/report_generator/status_report.py index cfb732e38..733071301 100644 --- a/src/cloudai/report_generator/status_report.py +++ b/src/cloudai/report_generator/status_report.py @@ -24,13 +24,12 @@ from typing import Any, Optional import toml +from pydantic import BaseModel from cloudai.core import CommandGenStrategy, TestRun, case_name from cloudai.models.scenario import TestRunDetails from cloudai.util.lazy_imports import lazy -from ..systems.slurm import SlurmJobMetadata, SlurmSystemMetadata - GPU_HOURLY_COST_USD = { "H100": 4.50, "B200": 8.00, @@ -110,77 +109,38 @@ def _build_running_best(points: list[tuple[int, float]]) -> list[tuple[int, floa return running_best -def _chart_points(points: list[tuple[int, float]], width: int, height: int, padding: int) -> list[tuple[float, float]]: - if not points: - return [] +def _build_reward_chart_data(steps: list["DSEStepData"]) -> dict[str, list[Any]] | None: + if not steps: + return None - x_vals = [step for step, _ in points] - y_vals = [reward for _, reward in points] - min_x, max_x = min(x_vals), max(x_vals) - min_y, max_y = min(y_vals), max(y_vals) + reward_points = [(step.step, step.reward) for step in steps] + running_best = _build_running_best(reward_points) + return { + "labels": [step.step for step in steps], + "rewards": [step.reward for step in steps], + "running_best": [reward for _, reward in running_best], + "observations": [step.observation_display for step in steps], + } - x_span = max(max_x - min_x, 1) - y_span = max(max_y - min_y, 1e-9) - inner_width = width - 2 * padding - inner_height = height - 2 * padding - result = [] - for step, reward in points: - x = padding + ((step - min_x) / x_span) * inner_width - y = height - padding - ((reward - min_y) / y_span) * inner_height - result.append((x, y)) - return result +class _ReportMetadataSystem(BaseModel): + gpu_arch_type: str -def _polyline(points: list[tuple[float, float]]) -> str: - return " ".join(f"{x:.2f},{y:.2f}" for x, y in points) +class _ReportMetadataSlurm(BaseModel): + node_list: str -def _build_reward_chart_svg(steps: list["DSEStepData"]) -> str | None: - if not steps: - return None +class _ReportSystemMetadata(BaseModel): + system: _ReportMetadataSystem + slurm: _ReportMetadataSlurm - width, height, padding = 720, 260, 34 - reward_points = [(step.step, step.reward) for step in steps] - running_best = _build_running_best(reward_points) - reward_coords = _chart_points(reward_points, width, height, padding) - best_coords = _chart_points(running_best, width, height, padding) - - reward_line = _polyline(reward_coords) - best_line = _polyline(best_coords) - y_vals = [reward for _, reward in reward_points] - y_min, y_max = min(y_vals), max(y_vals) - - circles = [] - for step_data, (x, y) in zip(steps, reward_coords, strict=True): - tooltip = ( - f"Step {step_data.step} | Reward: {format_float(step_data.reward, 4)}" - f" | Observation: {step_data.observation_display}" - ) - circles.append(f'{tooltip}') - - return "\n".join( - [ - f'', - f'', - f'', - f'', - f'', - *circles, - f'Step', - f'Reward', - "", - ] - ) + +class _ReportJobMetadata(BaseModel): + elapsed_time_sec: int -def load_system_metadata(run_dir: Path, results_root: Path) -> SlurmSystemMetadata | None: +def load_system_metadata(run_dir: Path, results_root: Path) -> _ReportSystemMetadata | None: """Load system metadata from run_dir. At the moment it supports only Slurm.""" metadata_path = run_dir / "metadata" if not metadata_path.exists(): @@ -198,7 +158,7 @@ def load_system_metadata(run_dir: Path, results_root: Path) -> SlurmSystemMetada node_file = node_files[0] with node_file.open() as f: try: - return SlurmSystemMetadata.model_validate(toml.load(f)) + return _ReportSystemMetadata.model_validate(toml.load(f)) except Exception as e: logging.debug(f"Error validating metadata for {node_file}: {e}") @@ -212,7 +172,7 @@ class ReportItem: name: str description: str logs_path: Optional[str] = None - nodes: Optional[SlurmSystemMetadata] = None + nodes: Optional[_ReportSystemMetadata] = None @classmethod def from_test_runs(cls, test_runs: list[TestRun], results_root: Path) -> list["ReportItem"]: @@ -285,7 +245,7 @@ class DSESummary: best_config_toml: str | None analysis_rel_path: str | None parameter_rows: list[DSEParameterRow] = field(default_factory=list) - chart_svg: str | None = None + reward_chart_data: dict[str, list[Any]] | None = None @property def display_name(self) -> str: @@ -493,7 +453,7 @@ def _build_iteration_summary( best_config_toml=best_config_toml, analysis_rel_path=f"./{analysis_file.relative_to(self.results_root)}" if analysis_file.exists() else None, parameter_rows=parameter_rows, - chart_svg=_build_reward_chart_svg(steps), + reward_chart_data=_build_reward_chart_data(steps), ) @staticmethod @@ -503,5 +463,5 @@ def _step_elapsed_time(step_dir: Path) -> int | None: return None with slurm_job_path.open() as f: - metadata = SlurmJobMetadata.model_validate(toml.load(f)) + metadata = _ReportJobMetadata.model_validate(toml.load(f)) return metadata.elapsed_time_sec diff --git a/src/cloudai/util/base-report.jinja2 b/src/cloudai/util/base-report.jinja2 index 80cff35e2..87b28de82 100644 --- a/src/cloudai/util/base-report.jinja2 +++ b/src/cloudai/util/base-report.jinja2 @@ -3,50 +3,95 @@ {{ name }} {% block extra_head %}{% endblock %} diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index 55d3396d9..9c00a7ea4 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -1,214 +1,459 @@ {% extends "base-report.jinja2" %} {% block extra_head %} +{% if dse_summaries %} + +{% endif %} +{% if dse_summaries %} + +{% endif %} {% endblock %} {% block content %} {% if dse_summaries %} {% for summary in dse_summaries %}
-

{{ summary.display_name }}

-

{{ summary.description }}

- -
-
-
Space
-
{{ summary.total_space }}
-
-
-
Ran
-
{{ summary.executed_steps }}
-
-
-
Skipped
-
{{ summary.skipped_steps }}
-
-
-
Coverage
-
{{ format_percent(summary.coverage_percent) }}
-
-
-
Saved Time
-
{{ format_duration(summary.saved_runtime_sec) }}
+
+
+

{{ summary.display_name }}

+ {{ summary.status_text }} + + {{ summary.executed_steps }} explored out of {{ summary.total_space }} combinations + {% if summary.best_step is not none %} • best step {{ summary.best_step }}{% endif %} + {% if summary.best_reward is not none %} • reward {{ format_float(summary.best_reward, 4) }}{% endif %} +
-
-
Saved GPU-Hours
-
{{ format_float(summary.saved_gpu_hours, 2) }}
-
-
-
Estimated $ Saved
-
{{ format_money(summary.estimated_saved_cost_usd) }}
+

{{ summary.description }}

+ +
+
+
Saved Time
+
{{ format_duration(summary.saved_runtime_sec) }}
+
Projected full search minus observed runtime
+
+
+
Saved GPU-Hours
+
{{ format_float(summary.saved_gpu_hours, 2) }}
+
Estimated from nodes, GPUs per node, and observed timings
+
+
+
Estimated $ Saved
+
{{ format_money(summary.estimated_saved_cost_usd) }}
+
Approximate savings using GPU-family hourly assumptions
+
+
+
Space
+
{{ summary.total_space }}
+
+
+
Ran
+
{{ summary.executed_steps }}
+
+
+
Skipped
+
{{ summary.skipped_steps }}
+
+
+
Coverage
+
{{ format_percent(summary.coverage_percent) }}
+

Execution Context

-
Status{{ summary.status_text }}
GPU Family{{ summary.gpu_arch_family or "unknown" }}
GPU Label{{ summary.gpu_arch_label or "unknown" }}
GPUs Per Node{{ summary.gpus_per_node or "unknown" }}
Nodes{{ summary.num_nodes or "unknown" }}
-
Step Success/Failure{{ summary.success_count }}/{{ summary.failure_count }}
+
Step Success / Failure{{ summary.success_count }} / {{ summary.failure_count }}
Average Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
Projected Full-Space Runtime{{ format_duration(summary.projected_runtime_sec) }}
-
-
-
-

Best Step

-
-
Best Step{{ summary.best_step }}
-
Best Reward{{ format_float(summary.best_reward, 4) }}
-
Best Observation{{ summary.best_observation_display }}
-
Run Folderopen
-
- - {% if summary.best_config_toml %} -
- Show best config TOML -
{{ summary.best_config_toml }}
-
- {% endif %} -
- -
-

Exploration Space

-

Each row shows the allowed values for a swept parameter and the selected best value.

- - - - - - - {% for row in summary.parameter_rows %} - - - - - - {% endfor %} -
ParameterAllowed ValuesBest
{{ row.name }}{{ row.values | join(", ") }}{{ row.best_value }}
-
+
+

Best Step

+
+
Best Step{{ summary.best_step }}
+
Best Reward{{ format_float(summary.best_reward, 4) }}
+
Best Observation{{ summary.best_observation_display }}
+
Run Folderopen
- -
-

Reward Over Steps

-

Blue shows observed reward per trial. Red dashed shows the best-so-far curve.

- {% if summary.chart_svg %} - {{ summary.chart_svg | safe }} - {% else %} -

No reward data available.

- {% endif %} + + {% if summary.best_config_toml %} +
+ Show best config TOML +
{{ summary.best_config_toml }}
+
+ {% endif %} +
+ +
+

Exploration Space

+

Each row shows the allowed values for a swept parameter and the selected best value.

+ + + + + + + + + + {% for row in summary.parameter_rows %} + + + + + + {% endfor %} + +
ParameterAllowed ValuesBest
{{ row.name }}{{ row.values | join(", ") }}{{ row.best_value }}
+
+ +
+

Reward Over Steps

+

Observed reward is shown as the dark line; the NVIDIA-green dashed line tracks the best-so-far trajectory.

+ {% if summary.reward_chart_data %} +
+ +
+ +

Interactive chart unavailable. Step count, best reward, and summary metrics remain available above.

+ + {% else %} +

No reward data available.

+ {% endif %}
{% endfor %} diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 71511fab9..64bb97b7c 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -455,7 +455,11 @@ def test_dse_summary_and_best_config_artifacts(slurm_system: SlurmSystem, slurm_ assert summary.gpu_arch_family == "H100" assert summary.analysis_rel_path is not None assert summary.best_config_rel_path == f"./{dse_tr.name}/0/{dse_tr.name}.toml" - assert summary.chart_svg is not None + assert summary.reward_chart_data is not None + assert summary.reward_chart_data["labels"] == [1, 2, 3] + assert summary.reward_chart_data["rewards"] == pytest.approx([1.5, 3.0, 2.0]) + assert summary.reward_chart_data["running_best"] == pytest.approx([1.5, 3.0, 3.0]) + assert summary.reward_chart_data["observations"] == ["2.5", "1.2", "1.8"] best_values = {row.name: row.best_value for row in summary.parameter_rows} assert best_values["nthreads"] == "2" @@ -492,13 +496,19 @@ def test_dse_generate_scenario_report_renders_html( report_path = slurm_system.output_path / "dse_scenario.html" html = report_path.read_text() + assert "cdn.jsdelivr.net/npm/chart.js" in html assert "Saved GPU-Hours" in html assert "Reward Over Steps" in html assert "Best Test TOML" in html assert "Show best config TOML" in html assert "BO Analysis" in html assert "dse-report.toml" in html - assert " Date: Mon, 23 Mar 2026 15:31:23 +0100 Subject: [PATCH 04/30] always render statuses table --- src/cloudai/util/general-report.jinja2 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index 9c00a7ea4..d41c0bbf4 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -330,7 +330,7 @@ {% endblock %} {% block content %} -{% if dse_summaries %} + {% for summary in dse_summaries %}
@@ -457,7 +457,7 @@
{% endfor %} -{% else %} + @@ -484,5 +484,5 @@ {% endfor %}
Test
-{% endif %} + {% endblock %} From e099bc96208a599d854e60ff11468ce5b0749c7b Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Tue, 24 Mar 2026 14:03:10 +0100 Subject: [PATCH 05/30] cleaner look of the report --- src/cloudai/report_generator/status_report.py | 45 +- src/cloudai/util/general-report.jinja2 | 429 +++++++++++++----- tests/test_reporter.py | 23 +- 3 files changed, 346 insertions(+), 151 deletions(-) diff --git a/src/cloudai/report_generator/status_report.py b/src/cloudai/report_generator/status_report.py index 733071301..15e929d7e 100644 --- a/src/cloudai/report_generator/status_report.py +++ b/src/cloudai/report_generator/status_report.py @@ -21,7 +21,7 @@ import logging from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Optional +from typing import Any import toml from pydantic import BaseModel @@ -123,6 +123,20 @@ def _build_reward_chart_data(steps: list["DSEStepData"]) -> dict[str, list[Any]] } +def _build_effort_chart_data(executed_steps: int, total_space: int) -> dict[str, Any] | None: + if total_space <= 0: + return None + + explored_ratio = min(max(executed_steps / total_space, 0.0), 1.0) + explored_display_percent = 100.0 if explored_ratio >= 1.0 else min(max(explored_ratio * 100.0, 14.0), 62.0) + + return { + "explored_ratio": explored_ratio, + "explored_display_percent": explored_display_percent, + "remainder_display_percent": max(100.0 - explored_display_percent, 0.0), + } + + class _ReportMetadataSystem(BaseModel): gpu_arch_type: str @@ -171,19 +185,25 @@ class ReportItem: name: str description: str - logs_path: Optional[str] = None - nodes: Optional[_ReportSystemMetadata] = None + logs_path: str | None + nodes: _ReportSystemMetadata | None + status_text: str + status_class: str @classmethod def from_test_runs(cls, test_runs: list[TestRun], results_root: Path) -> list["ReportItem"]: report_items: list[ReportItem] = [] for tr in test_runs: + tr_status = tr.test.was_run_successful(tr) + status_text = "PASSED" if tr_status.is_successful else "FAILED" report_items.append( ReportItem( name=case_name(tr), description=tr.test.description, logs_path=f"./{tr.output_path.relative_to(results_root)}" if tr.output_path.exists() else None, nodes=load_system_metadata(tr.output_path, results_root), + status_text=status_text, + status_class=status_text.lower(), ) ) return report_items @@ -223,22 +243,14 @@ class DSESummary: total_space: int executed_steps: int skipped_steps: int - coverage_percent: float | None best_step: int | None best_reward: float | None - best_observation_display: str avg_step_duration_sec: float | None total_runtime_sec: float | None - projected_runtime_sec: float | None saved_runtime_sec: float | None success_count: int failure_count: int gpu_arch_label: str | None - gpu_arch_family: str | None - gpus_per_node: int | None - num_nodes: int | None - total_gpu_hours: float | None - projected_gpu_hours: float | None saved_gpu_hours: float | None estimated_saved_cost_usd: float | None best_config_rel_path: str | None @@ -246,6 +258,7 @@ class DSESummary: analysis_rel_path: str | None parameter_rows: list[DSEParameterRow] = field(default_factory=list) reward_chart_data: dict[str, list[Any]] | None = None + effort_chart_data: dict[str, Any] | None = None @property def display_name(self) -> str: @@ -375,7 +388,6 @@ def _build_iteration_summary( total_space = len(original_tr.all_combinations) executed_steps = len(steps) skipped_steps = max(total_space - executed_steps, 0) - coverage_percent = (executed_steps / total_space * 100.0) if total_space else None projected_runtime_sec = avg_step_duration_sec * total_space if avg_step_duration_sec is not None else None saved_runtime_sec = ( max(projected_runtime_sec - total_runtime_sec, 0.0) @@ -431,22 +443,14 @@ def _build_iteration_summary( total_space=total_space, executed_steps=executed_steps, skipped_steps=skipped_steps, - coverage_percent=coverage_percent, best_step=best_step_data.step, best_reward=best_step_data.reward, - best_observation_display=best_step_data.observation_display, avg_step_duration_sec=avg_step_duration_sec, total_runtime_sec=total_runtime_sec, - projected_runtime_sec=projected_runtime_sec, saved_runtime_sec=saved_runtime_sec, success_count=success_count, failure_count=failure_count, gpu_arch_label=gpu_arch_label, - gpu_arch_family=gpu_arch_family, - gpus_per_node=gpus_per_node, - num_nodes=num_nodes, - total_gpu_hours=total_gpu_hours, - projected_gpu_hours=projected_gpu_hours, saved_gpu_hours=saved_gpu_hours, estimated_saved_cost_usd=estimated_saved_cost_usd, best_config_rel_path=f"./{best_config_path.relative_to(self.results_root)}", @@ -454,6 +458,7 @@ def _build_iteration_summary( analysis_rel_path=f"./{analysis_file.relative_to(self.results_root)}" if analysis_file.exists() else None, parameter_rows=parameter_rows, reward_chart_data=_build_reward_chart_data(steps), + effort_chart_data=_build_effort_chart_data(executed_steps, total_space), ) @staticmethod diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index d41c0bbf4..30f4025ef 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -8,17 +8,18 @@ .dse-summary { margin: 2rem 0 3rem; } + .overview-board, + .dse-box { + border: 1px solid var(--nv-border); + border-radius: var(--radius-lg); + background: linear-gradient(180deg, rgba(255, 255, 255, 0.98), rgba(247, 252, 233, 0.98)); + box-shadow: var(--nv-shadow); + } .overview-board { position: relative; margin-bottom: 1.5rem; padding: 1.6rem; - border: 1px solid rgba(118, 185, 0, 0.28); - border-radius: var(--radius-lg); - background: - linear-gradient(140deg, rgba(118, 185, 0, 0.10), transparent 36%), - linear-gradient(180deg, rgba(255, 255, 255, 0.98), rgba(247, 252, 233, 0.98)); - box-shadow: var(--nv-shadow); - color: var(--nv-text); + overflow: hidden; } .overview-board::before { content: ""; @@ -36,7 +37,7 @@ flex-wrap: wrap; align-items: center; gap: 0.8rem 1rem; - margin-bottom: 0.8rem; + margin-bottom: 0.85rem; } .overview-board h2 { margin: 0; @@ -77,10 +78,11 @@ color: var(--nv-muted); font-size: 0.95rem; } - .overview-card-grid { + .summary-stat-grid { display: grid; - grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); + grid-template-columns: repeat(auto-fit, minmax(170px, 1fr)); gap: 0.9rem; + margin-bottom: 1rem; } .metric-card { padding: 1rem 1.05rem; @@ -93,10 +95,13 @@ border-color: rgba(118, 185, 0, 0.34); box-shadow: inset 0 4px 0 var(--nv-green); } - .metric-card--muted { - background: rgba(255, 255, 255, 0.92); + .metric-card--neutral { + background: rgba(255, 255, 255, 0.94); } .metric-label { + display: flex; + align-items: center; + gap: 0.45rem; color: var(--nv-muted); font-size: 0.77rem; text-transform: uppercase; @@ -110,27 +115,17 @@ line-height: 1.1; letter-spacing: -0.03em; } - .metric-note { - margin-top: 0.35rem; - color: var(--nv-muted); - font-size: 0.84rem; - } - .dse-box { - margin-top: 1.15rem; - padding: 1.2rem 1.3rem; - border: 1px solid var(--nv-border); - border-radius: var(--radius-md); - background: linear-gradient(180deg, rgba(255, 255, 255, 0.96), rgba(244, 248, 240, 0.96)); - box-shadow: 0 12px 34px rgba(17, 17, 17, 0.05); - } - .context-list { + .context-strip { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); - gap: 0.85rem 1rem; - margin: 0.75rem 0 0; + gap: 0.8rem 1rem; + margin-bottom: 1rem; } - .context-list div { - font-size: 0.95rem; + .context-item { + padding: 0.85rem 0.95rem; + border-radius: var(--radius-md); + border: 1px solid rgba(17, 17, 17, 0.06); + background: rgba(255, 255, 255, 0.90); } .label { color: var(--nv-muted); @@ -143,19 +138,119 @@ color: var(--nv-ink); font-weight: 650; } - .best-link-list { + .effort-block { + padding: 1rem 1.05rem; + border-radius: var(--radius-md); + border: 1px solid rgba(118, 185, 0, 0.22); + background: linear-gradient(180deg, rgba(118, 185, 0, 0.08), rgba(255, 255, 255, 0.92)); + } + .effort-head { display: flex; flex-wrap: wrap; - gap: 0.85rem 1.2rem; - margin-top: 1rem; + align-items: baseline; + justify-content: space-between; + gap: 0.5rem 1rem; + margin-bottom: 0.65rem; + } + .effort-head h3 { + margin: 0; } - .best-link-list a { + .effort-caption { + color: var(--nv-muted); + font-size: 0.92rem; + } + .effort-bar { + display: flex; + align-items: stretch; + width: 100%; + min-height: 56px; + margin: 0.7rem 0 0.6rem; + overflow: hidden; + border-radius: 999px; + border: 1px solid rgba(17, 17, 17, 0.08); + background: rgba(255, 255, 255, 0.86); + } + .effort-bar__explored { + display: flex; + align-items: center; + justify-content: center; + min-width: 86px; + background: linear-gradient(90deg, var(--nv-green), var(--nv-green-bright)); + color: #ffffff; + font-weight: 700; + font-size: 0.9rem; + } + .effort-bar__break { + display: flex; + align-items: center; + justify-content: center; + width: 62px; + color: var(--nv-muted); + font-weight: 900; + letter-spacing: 0.08em; + background: + repeating-linear-gradient( + -55deg, + rgba(17, 17, 17, 0.06), + rgba(17, 17, 17, 0.06) 6px, + rgba(255, 255, 255, 0.75) 6px, + rgba(255, 255, 255, 0.75) 12px + ); + } + .effort-bar__remaining { + display: flex; + align-items: center; + justify-content: flex-end; + padding-right: 1rem; + color: var(--nv-muted); + font-weight: 700; + background: rgba(17, 17, 17, 0.08); + } + .effort-scale { + display: flex; + justify-content: space-between; + gap: 1rem; + color: var(--nv-muted); + font-size: 0.9rem; + } + .dse-box { + margin-top: 1.15rem; + padding: 1.2rem 1.3rem; + } + .section-topline { + display: flex; + flex-wrap: wrap; + align-items: center; + justify-content: space-between; + gap: 0.8rem 1rem; + margin-bottom: 0.9rem; + } + .section-topline h3 { + margin: 0; + } + .best-config-actions { + display: flex; + flex-wrap: wrap; + gap: 0.75rem; + align-items: center; + } + .action-button { display: inline-flex; align-items: center; gap: 0.4rem; + padding: 0.55rem 0.9rem; + border-radius: 999px; + border: 1px solid rgba(118, 185, 0, 0.28); + background: #ffffff; + color: var(--nv-green-strong); + font-weight: 700; + cursor: pointer; + } + .action-button:hover { + background: var(--nv-green-soft); + text-decoration: none; } .best-config-preview { - margin-top: 1rem; border-radius: var(--radius-sm); overflow: hidden; border: 1px solid rgba(118, 185, 0, 0.22); @@ -177,14 +272,30 @@ background: #fbfdf8; border-top: 1px solid rgba(118, 185, 0, 0.16); } - .best-value { - display: inline-block; + .space-table th, + .steps-table th { + background: #f0f9de; + } + .value-cloud { + display: flex; + flex-wrap: wrap; + gap: 0.45rem; + } + .value-pill { + display: inline-flex; + align-items: center; padding: 0.22rem 0.62rem; border-radius: 999px; + background: rgba(255, 255, 255, 0.92); + border: 1px solid rgba(17, 17, 17, 0.08); + color: var(--nv-text); + font-weight: 650; + white-space: nowrap; + } + .value-pill--selected { background: var(--nv-green-soft); + border-color: rgba(118, 185, 0, 0.30); color: var(--nv-green-strong); - font-weight: 750; - white-space: nowrap; } .small-note { color: var(--nv-muted); @@ -211,30 +322,55 @@ .chart-shell.is-enhanced ~ .chart-fallback { display: none; } - .space-table th { - background: #f0f9de; - } + .steps-table tbody tr:nth-child(even), .space-table tbody tr:nth-child(even) { background: rgba(118, 185, 0, 0.05); } + .steps-table tbody tr:hover, .space-table tbody tr:hover { background: rgba(118, 185, 0, 0.10); } @media (max-width: 640px) { - .overview-board { - padding: 1.2rem; + .overview-board, + .dse-box { + padding: 1.1rem; } .metric-value { font-size: 1.35rem; } - .context-list { + .context-strip { grid-template-columns: 1fr; } + .effort-bar__break { + width: 46px; + } } {% if dse_summaries %} {% endif %} {% endblock %} {% block content %} - +{% if dse_summaries %} {% for summary in dse_summaries %}
@@ -338,99 +473,103 @@

{{ summary.display_name }}

{{ summary.status_text }} - {{ summary.executed_steps }} explored out of {{ summary.total_space }} combinations - {% if summary.best_step is not none %} • best step {{ summary.best_step }}{% endif %} - {% if summary.best_reward is not none %} • reward {{ format_float(summary.best_reward, 4) }}{% endif %} + {{ "{:,}".format(summary.executed_steps) }} explored out of {{ "{:,}".format(summary.total_space) }} combinations

{{ summary.description }}

-
+
+
+
Search Space
+
{{ "{:,}".format(summary.total_space) }}
+
+
+
Explored Steps
+
{{ "{:,}".format(summary.executed_steps) }}
+
Saved Time
{{ format_duration(summary.saved_runtime_sec) }}
-
Projected full search minus observed runtime
Saved GPU-Hours
{{ format_float(summary.saved_gpu_hours, 2) }}
-
Estimated from nodes, GPUs per node, and observed timings
Estimated $ Saved
{{ format_money(summary.estimated_saved_cost_usd) }}
-
Approximate savings using GPU-family hourly assumptions
-
-
Space
-
{{ summary.total_space }}
-
-
-
Ran
-
{{ summary.executed_steps }}
+
+ +
+
GPU Label{{ summary.gpu_arch_label or "unknown" }}
+
Step Success / Failure{{ summary.success_count }} / {{ summary.failure_count }}
+
Average Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
+
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
+
+ + {% if summary.effort_chart_data %} +
+
+

DSE Savings

+ Axis break emphasizes the gap between explored trials and full-space search.
-
-
Skipped
-
{{ summary.skipped_steps }}
+
+
+ {{ "{:,}".format(summary.executed_steps) }} +
+ {% if summary.skipped_steps > 0 %} +
//
+
+ {{ "{:,}".format(summary.total_space) }} +
+ {% endif %}
-
-
Coverage
-
{{ format_percent(summary.coverage_percent) }}
+
+ Explored + Full space
+ {% endif %}
-

Execution Context

-
-
GPU Family{{ summary.gpu_arch_family or "unknown" }}
-
GPU Label{{ summary.gpu_arch_label or "unknown" }}
-
GPUs Per Node{{ summary.gpus_per_node or "unknown" }}
-
Nodes{{ summary.num_nodes or "unknown" }}
-
Step Success / Failure{{ summary.success_count }} / {{ summary.failure_count }}
-
Average Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
-
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
-
Projected Full-Space Runtime{{ format_duration(summary.projected_runtime_sec) }}
-
-
- -
-

Best Step

-
-
Best Step{{ summary.best_step }}
-
Best Reward{{ format_float(summary.best_reward, 4) }}
-
Best Observation{{ summary.best_observation_display }}
-
Run Folderopen
-
-

Exploration Space

-

Each row shows the allowed values for a swept parameter and the selected best value.

- {% for row in summary.parameter_rows %} - - + {% endfor %} @@ -449,7 +588,7 @@ > -

Interactive chart unavailable. Step count, best reward, and summary metrics remain available above.

+

Interactive chart unavailable. Step count and summary metrics remain available above.

{% else %}

No reward data available.

@@ -458,31 +597,73 @@ {% endfor %} -
Parameter Allowed ValuesBest
{{ row.name }}{{ row.values | join(", ") }}{{ row.best_value }} +
+ {% for value in row.values %} + {{ value }} + {% endfor %} +
+
- - - - - {% if report_items | selectattr('nodes') | first is not none %} - - {% endif %} - - {% for item in report_items %} - - - - {% if item.logs_path %} - - {% else %} - - {% endif %} - {% if item.nodes is not none %} - - {% else %} - - {% endif %} - - {% endfor %} -
TestDescriptionResultsNodes
{{ item.name }}{{ item.description }}logsno logs{{ item.nodes }}no nodes information
- +
+

All Steps

+ + + + + + + + + + + {% for item in report_items %} + + + + + + + {% endfor %} + +
CaseStatusResultsNodes
{{ item.name }} + {% if item.status_text %} + {{ item.status_text }} + {% else %} + unknown + {% endif %} + + {% if item.logs_path %} + logs + {% else %} + no logs + {% endif %} + + {% if item.nodes is not none %} + {{ item.nodes.slurm.node_list }} + {% else %} + no nodes information + {% endif %} +
+
+{% else %} + + + + + + {% if report_items | selectattr('nodes') | first is not none %} + + {% endif %} + + {% for item in report_items %} + + + + {% if item.logs_path %} + + {% else %} + + {% endif %} + {% if item.nodes is not none %} + + {% else %} + + {% endif %} + + {% endfor %} +
TestDescriptionResultsNodes
{{ item.name }}{{ item.description }}logsno logs{{ item.nodes }}no nodes information
+{% endif %} {% endblock %} diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 64bb97b7c..d1f0c977e 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -424,6 +424,8 @@ def test_report_item_from_test_runs_includes_logs_and_metadata( assert items[0].logs_path == f"./{benchmark_tr.name}/0" assert items[0].nodes is not None assert items[0].nodes.slurm.node_list == slurm_metadata.slurm.node_list + assert items[0].status_text == "FAILED" + assert items[0].status_class == "failed" def test_report_order() -> None: @@ -442,17 +444,13 @@ def test_dse_summary_and_best_config_artifacts(slurm_system: SlurmSystem, slurm_ assert summary.total_space == 8 assert summary.executed_steps == 3 assert summary.skipped_steps == 5 - assert summary.coverage_percent == pytest.approx(37.5) assert summary.best_step == 2 assert summary.best_reward == pytest.approx(3.0) - assert summary.best_observation_display == "1.2" assert summary.avg_step_duration_sec == pytest.approx(20.0) assert summary.total_runtime_sec == pytest.approx(60.0) - assert summary.projected_runtime_sec == pytest.approx(160.0) assert summary.saved_runtime_sec == pytest.approx(100.0) assert summary.saved_gpu_hours == pytest.approx((100.0 / 3600.0) * 16) assert summary.estimated_saved_cost_usd == pytest.approx((summary.saved_gpu_hours or 0) * 4.5) - assert summary.gpu_arch_family == "H100" assert summary.analysis_rel_path is not None assert summary.best_config_rel_path == f"./{dse_tr.name}/0/{dse_tr.name}.toml" assert summary.reward_chart_data is not None @@ -460,6 +458,8 @@ def test_dse_summary_and_best_config_artifacts(slurm_system: SlurmSystem, slurm_ assert summary.reward_chart_data["rewards"] == pytest.approx([1.5, 3.0, 2.0]) assert summary.reward_chart_data["running_best"] == pytest.approx([1.5, 3.0, 3.0]) assert summary.reward_chart_data["observations"] == ["2.5", "1.2", "1.8"] + assert summary.effort_chart_data is not None + assert summary.effort_chart_data["explored_ratio"] == pytest.approx(3 / 8) best_values = {row.name: row.best_value for row in summary.parameter_rows} assert best_values["nthreads"] == "2" @@ -498,16 +498,26 @@ def test_dse_generate_scenario_report_renders_html( html = report_path.read_text() assert "cdn.jsdelivr.net/npm/chart.js" in html assert "Saved GPU-Hours" in html + assert "Search Space" in html + assert "Explored Steps" in html + assert "DSE Savings" in html assert "Reward Over Steps" in html assert "Best Test TOML" in html assert "Show best config TOML" in html + assert "Copy TOML" in html assert "BO Analysis" in html + assert "All Steps" in html assert "dse-report.toml" in html assert "js-reward-chart" in html assert "chart-shell" in html - assert "dse-section-grid" not in html + assert 'class="value-pill value-pill--selected"' in html + assert "Execution Context" not in html assert "Exploration Mix" not in html - assert "37.50%" in html + assert "Skipped" not in html + assert "Coverage" not in html + assert "GPU Family" not in html + assert "Best" not in html + assert "status-pill--passed" in html assert "1m 40s" in html @@ -531,5 +541,4 @@ def test_unknown_gpu_family_omits_estimated_cost( dse_tr = _create_dse_report_fixture(slurm_system, slurm_metadata, gpu_name="Mystery GPU") _reporter, summaries = _build_dse_summaries(slurm_system, dse_tr) - assert summaries[0].gpu_arch_family is None assert summaries[0].estimated_saved_cost_usd is None From d36b0e276f76a0c6e8c89fa281c02ada30f104e5 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Tue, 24 Mar 2026 14:30:00 +0100 Subject: [PATCH 06/30] cleaner look of the report --- src/cloudai/report_generator/status_report.py | 14 +- src/cloudai/util/general-report.jinja2 | 248 ++++++++++-------- tests/test_reporter.py | 4 + 3 files changed, 148 insertions(+), 118 deletions(-) diff --git a/src/cloudai/report_generator/status_report.py b/src/cloudai/report_generator/status_report.py index 15e929d7e..763feedb5 100644 --- a/src/cloudai/report_generator/status_report.py +++ b/src/cloudai/report_generator/status_report.py @@ -128,12 +128,11 @@ def _build_effort_chart_data(executed_steps: int, total_space: int) -> dict[str, return None explored_ratio = min(max(executed_steps / total_space, 0.0), 1.0) - explored_display_percent = 100.0 if explored_ratio >= 1.0 else min(max(explored_ratio * 100.0, 14.0), 62.0) - return { "explored_ratio": explored_ratio, - "explored_display_percent": explored_display_percent, - "remainder_display_percent": max(100.0 - explored_display_percent, 0.0), + "labels": ["Explored", "Full Space"], + "values": [executed_steps, total_space], + "use_log_scale": total_space / max(executed_steps, 1) >= 20, } @@ -248,7 +247,6 @@ class DSESummary: avg_step_duration_sec: float | None total_runtime_sec: float | None saved_runtime_sec: float | None - success_count: int failure_count: int gpu_arch_label: str | None saved_gpu_hours: float | None @@ -270,7 +268,7 @@ def display_name(self) -> str: def status_text(self) -> str: if self.failure_count == 0: return "PASSED" - if self.success_count == 0: + if self.failure_count == self.executed_steps: return "FAILED" return "PARTIAL" @@ -421,8 +419,7 @@ def _build_iteration_summary( else None ) - success_count = sum(1 for step in steps if step.is_successful) - failure_count = len(steps) - success_count + failure_count = sum(1 for step in steps if not step.is_successful) best_action = best_step_data.action parameter_rows = [ DSEParameterRow( @@ -448,7 +445,6 @@ def _build_iteration_summary( avg_step_duration_sec=avg_step_duration_sec, total_runtime_sec=total_runtime_sec, saved_runtime_sec=saved_runtime_sec, - success_count=success_count, failure_count=failure_count, gpu_arch_label=gpu_arch_label, saved_gpu_hours=saved_gpu_hours, diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index 30f4025ef..aac379eab 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -78,11 +78,21 @@ color: var(--nv-muted); font-size: 0.95rem; } - .summary-stat-grid { + .overview-layout { display: grid; - grid-template-columns: repeat(auto-fit, minmax(170px, 1fr)); + grid-template-columns: minmax(0, 1fr) minmax(320px, 0.92fr); + gap: 1rem; + align-items: stretch; + } + .overview-left { + display: flex; + flex-direction: column; + gap: 0.9rem; + } + .summary-stat-grid { + display: flex; + flex-direction: column; gap: 0.9rem; - margin-bottom: 1rem; } .metric-card { padding: 1rem 1.05rem; @@ -116,10 +126,9 @@ letter-spacing: -0.03em; } .context-strip { - display: grid; - grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); - gap: 0.8rem 1rem; - margin-bottom: 1rem; + display: flex; + flex-direction: column; + gap: 0.8rem; } .context-item { padding: 0.85rem 0.95rem; @@ -139,6 +148,9 @@ font-weight: 650; } .effort-block { + display: flex; + flex-direction: column; + height: 100%; padding: 1rem 1.05rem; border-radius: var(--radius-md); border: 1px solid rgba(118, 185, 0, 0.22); @@ -159,59 +171,19 @@ color: var(--nv-muted); font-size: 0.92rem; } - .effort-bar { - display: flex; - align-items: stretch; - width: 100%; - min-height: 56px; - margin: 0.7rem 0 0.6rem; - overflow: hidden; - border-radius: 999px; + .effort-chart-shell { + position: relative; + flex: 1; + min-height: 360px; + margin-top: 0.6rem; + border-radius: var(--radius-md); border: 1px solid rgba(17, 17, 17, 0.08); - background: rgba(255, 255, 255, 0.86); - } - .effort-bar__explored { - display: flex; - align-items: center; - justify-content: center; - min-width: 86px; - background: linear-gradient(90deg, var(--nv-green), var(--nv-green-bright)); - color: #ffffff; - font-weight: 700; - font-size: 0.9rem; - } - .effort-bar__break { - display: flex; - align-items: center; - justify-content: center; - width: 62px; - color: var(--nv-muted); - font-weight: 900; - letter-spacing: 0.08em; - background: - repeating-linear-gradient( - -55deg, - rgba(17, 17, 17, 0.06), - rgba(17, 17, 17, 0.06) 6px, - rgba(255, 255, 255, 0.75) 6px, - rgba(255, 255, 255, 0.75) 12px - ); - } - .effort-bar__remaining { - display: flex; - align-items: center; - justify-content: flex-end; - padding-right: 1rem; - color: var(--nv-muted); - font-weight: 700; - background: rgba(17, 17, 17, 0.08); + background: rgba(255, 255, 255, 0.88); + padding: 0.85rem; } - .effort-scale { - display: flex; - justify-content: space-between; - gap: 1rem; - color: var(--nv-muted); - font-size: 0.9rem; + .effort-chart-shell canvas { + width: 100% !important; + height: 100% !important; } .dse-box { margin-top: 1.15rem; @@ -338,12 +310,9 @@ .metric-value { font-size: 1.35rem; } - .context-strip { + .overview-layout { grid-template-columns: 1fr; } - .effort-bar__break { - width: 46px; - } } {% if dse_summaries %} @@ -459,6 +428,64 @@ }); canvas.parentElement.classList.add("is-enhanced"); }); + + const effortCanvases = document.querySelectorAll(".js-effort-chart"); + effortCanvases.forEach((canvas) => { + const dataNode = document.getElementById(canvas.dataset.chartDataId); + if (!dataNode) { + return; + } + const chartData = JSON.parse(dataNode.textContent); + new Chart(canvas, { + type: "bar", + data: { + labels: chartData.labels, + datasets: [ + { + data: chartData.values, + backgroundColor: ["#76b900", "rgba(57, 66, 78, 0.24)"], + borderColor: ["#76b900", "rgba(57, 66, 78, 0.50)"], + borderWidth: 1, + borderRadius: 14, + maxBarThickness: 90 + } + ] + }, + options: { + responsive: true, + maintainAspectRatio: false, + plugins: { + legend: { + display: false + }, + tooltip: { + callbacks: { + label: function (context) { + return context.raw.toLocaleString(); + } + } + } + }, + scales: { + x: { + grid: { + display: false + } + }, + y: { + type: chartData.use_log_scale ? "logarithmic" : "linear", + beginAtZero: !chartData.use_log_scale, + ticks: { + callback: function (value) { + return Number(value).toLocaleString(); + } + } + } + } + } + }); + canvas.parentElement.classList.add("is-enhanced"); + }); }); {% endif %} @@ -478,59 +505,62 @@

{{ summary.description }}

-
-
-
Search Space
-
{{ "{:,}".format(summary.total_space) }}
-
-
-
Explored Steps
-
{{ "{:,}".format(summary.executed_steps) }}
-
-
-
Saved Time
-
{{ format_duration(summary.saved_runtime_sec) }}
-
-
-
Saved GPU-Hours
-
{{ format_float(summary.saved_gpu_hours, 2) }}
-
-
-
Estimated $ Saved
-
{{ format_money(summary.estimated_saved_cost_usd) }}
-
-
- -
-
GPU Label{{ summary.gpu_arch_label or "unknown" }}
-
Step Success / Failure{{ summary.success_count }} / {{ summary.failure_count }}
-
Average Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
-
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
-
+
+
+
+
+
Search Space
+
{{ "{:,}".format(summary.total_space) }}
+
+
+
Explored Steps
+
{{ "{:,}".format(summary.executed_steps) }}
+
+
+
Saved Time
+
{{ format_duration(summary.saved_runtime_sec) }}
+
+
+
Saved GPU-Hours
+
{{ format_float(summary.saved_gpu_hours, 2) }}
+
+
+
Estimated $ Saved
+
{{ format_money(summary.estimated_saved_cost_usd) }}
+
+
- {% if summary.effort_chart_data %} -
-
-

DSE Savings

- Axis break emphasizes the gap between explored trials and full-space search. +
+
GPU Label{{ summary.gpu_arch_label or "unknown" }}
+
Average Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
+
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
+
-
-
- {{ "{:,}".format(summary.executed_steps) }} + + {% if summary.effort_chart_data %} +
+
+

DSE Savings

+ + {% if summary.effort_chart_data.use_log_scale %} + Log scale keeps both bars legible across a large search-space gap. + {% else %} + Side-by-side comparison of explored steps versus full search space. + {% endif %} +
- {% if summary.skipped_steps > 0 %} -
//
-
- {{ "{:,}".format(summary.total_space) }} +
+
- {% endif %} -
-
- Explored - Full space + +

Interactive chart unavailable. The totals above remain available as text.

+ {% endif %}
- {% endif %}
diff --git a/tests/test_reporter.py b/tests/test_reporter.py index d1f0c977e..fb6712596 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -460,6 +460,9 @@ def test_dse_summary_and_best_config_artifacts(slurm_system: SlurmSystem, slurm_ assert summary.reward_chart_data["observations"] == ["2.5", "1.2", "1.8"] assert summary.effort_chart_data is not None assert summary.effort_chart_data["explored_ratio"] == pytest.approx(3 / 8) + assert summary.effort_chart_data["labels"] == ["Explored", "Full Space"] + assert summary.effort_chart_data["values"] == [3, 8] + assert summary.effort_chart_data["use_log_scale"] is False best_values = {row.name: row.best_value for row in summary.parameter_rows} assert best_values["nthreads"] == "2" @@ -508,6 +511,7 @@ def test_dse_generate_scenario_report_renders_html( assert "BO Analysis" in html assert "All Steps" in html assert "dse-report.toml" in html + assert "js-effort-chart" in html assert "js-reward-chart" in html assert "chart-shell" in html assert 'class="value-pill value-pill--selected"' in html From 0c7267ab55a42f404b02ee8110634435db12e398 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Tue, 24 Mar 2026 14:58:52 +0100 Subject: [PATCH 07/30] almost there --- src/cloudai/report_generator/status_report.py | 6 -- src/cloudai/reporter.py | 2 +- src/cloudai/util/general-report.jinja2 | 100 ++++++++++-------- tests/test_reporter.py | 4 +- 4 files changed, 58 insertions(+), 54 deletions(-) diff --git a/src/cloudai/report_generator/status_report.py b/src/cloudai/report_generator/status_report.py index 763feedb5..2fcc2aa0a 100644 --- a/src/cloudai/report_generator/status_report.py +++ b/src/cloudai/report_generator/status_report.py @@ -258,12 +258,6 @@ class DSESummary: reward_chart_data: dict[str, list[Any]] | None = None effort_chart_data: dict[str, Any] | None = None - @property - def display_name(self) -> str: - if self.iteration == 0: - return self.name - return f"{self.name} iter={self.iteration}" - @property def status_text(self) -> str: if self.failure_count == 0: diff --git a/src/cloudai/reporter.py b/src/cloudai/reporter.py index 56ee450ae..6f6111bfe 100644 --- a/src/cloudai/reporter.py +++ b/src/cloudai/reporter.py @@ -125,7 +125,7 @@ def _add_dse_rows(dse_summaries: list[DSESummary], table: Table): ] if summary.best_config_rel_path: details.append(summary.best_config_rel_path) - table.add_row(summary.display_name, f"[bold]{summary.status_style}[/bold]", "\n".join(details)) + table.add_row(summary.description, f"[bold]{summary.status_style}[/bold]", "\n".join(details)) def _add_standard_rows(self, table: Table): for tr in self.trs: diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index aac379eab..fbbf01be5 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -87,7 +87,18 @@ .overview-left { display: flex; flex-direction: column; - gap: 0.9rem; + gap: 1rem; + } + .impact-stat-grid, + .context-strip { + display: grid; + gap: 0.8rem; + } + .impact-stat-grid { + grid-template-columns: repeat(2, minmax(0, 1fr)); + } + .context-strip { + grid-template-columns: repeat(2, minmax(0, 1fr)); } .summary-stat-grid { display: flex; @@ -95,7 +106,7 @@ gap: 0.9rem; } .metric-card { - padding: 1rem 1.05rem; + padding: 0.95rem 1rem; border-radius: var(--radius-md); border: 1px solid rgba(17, 17, 17, 0.06); background: rgba(255, 255, 255, 0.96); @@ -120,16 +131,11 @@ .metric-value { margin-top: 0.3rem; color: var(--nv-ink); - font-size: 1.6rem; + font-size: 1.45rem; font-weight: 750; line-height: 1.1; letter-spacing: -0.03em; } - .context-strip { - display: flex; - flex-direction: column; - gap: 0.8rem; - } .context-item { padding: 0.85rem 0.95rem; border-radius: var(--radius-md); @@ -173,8 +179,9 @@ } .effort-chart-shell { position: relative; - flex: 1; + flex: none; min-height: 360px; + height: 360px; margin-top: 0.6rem; border-radius: var(--radius-md); border: 1px solid rgba(17, 17, 17, 0.08); @@ -182,8 +189,8 @@ padding: 0.85rem; } .effort-chart-shell canvas { + display: block; width: 100% !important; - height: 100% !important; } .dse-box { margin-top: 1.15rem; @@ -280,10 +287,11 @@ .chart-shell { position: relative; min-height: 260px; + height: 320px; } .chart-shell canvas { + display: block; width: 100% !important; - height: 100% !important; } .chart-fallback { margin-top: 0.75rem; @@ -313,6 +321,17 @@ .overview-layout { grid-template-columns: 1fr; } + .impact-stat-grid, + .context-strip { + grid-template-columns: 1fr; + } + .chart-shell { + height: 280px; + } + .effort-chart-shell { + height: 280px; + min-height: 280px; + } } {% if dse_summaries %} @@ -497,55 +516,48 @@
-

{{ summary.display_name }}

+

DSE: overview

{{ summary.status_text }} - - {{ "{:,}".format(summary.executed_steps) }} explored out of {{ "{:,}".format(summary.total_space) }} combinations -
-

{{ summary.description }}

-
-
Search Space
-
{{ "{:,}".format(summary.total_space) }}
-
-
-
Explored Steps
-
{{ "{:,}".format(summary.executed_steps) }}
-
-
-
Saved Time
-
{{ format_duration(summary.saved_runtime_sec) }}
-
-
-
Saved GPU-Hours
-
{{ format_float(summary.saved_gpu_hours, 2) }}
+
+
+
Saved Time
+
{{ format_duration(summary.saved_runtime_sec) }}
+
+
+
Saved GPU-Hours
+
{{ format_float(summary.saved_gpu_hours, 2) }}
+
-
-
Estimated $ Saved
-
{{ format_money(summary.estimated_saved_cost_usd) }}
+ +
+
+
Estimated Savings
+
{{ format_money(summary.estimated_saved_cost_usd) }}
+
+
GPU Label{{ summary.gpu_arch_label or "unknown" }}
-
-
-
GPU Label{{ summary.gpu_arch_label or "unknown" }}
-
Average Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
-
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
+
+
Average Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
+
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
+
{% if summary.effort_chart_data %}
-

DSE Savings

+

DSE Savings: {{ "{:,}".format(summary.executed_steps) }} vs {{ "{:,}".format(summary.total_space) }}

{% if summary.effort_chart_data.use_log_scale %} Log scale keeps both bars legible across a large search-space gap. {% else %} - Side-by-side comparison of explored steps versus full search space. + Compared with the full search space. {% endif %}
@@ -565,7 +577,7 @@
-

Best Config

+

DSE: Best Config

{% if summary.best_config_rel_path %}Best Test TOML{% endif %} {% if summary.analysis_rel_path %}BO Analysis{% endif %} @@ -581,7 +593,7 @@
-

Exploration Space

+

DSE: Exploration Space

@@ -607,7 +619,7 @@
-

Reward Over Steps

+

DSE: Reward Over Steps

Observed reward is shown as the dark line; the NVIDIA-green dashed line tracks the best-so-far trajectory.

{% if summary.reward_chart_data %}
diff --git a/tests/test_reporter.py b/tests/test_reporter.py index fb6712596..0adb7064e 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -501,9 +501,7 @@ def test_dse_generate_scenario_report_renders_html( html = report_path.read_text() assert "cdn.jsdelivr.net/npm/chart.js" in html assert "Saved GPU-Hours" in html - assert "Search Space" in html - assert "Explored Steps" in html - assert "DSE Savings" in html + assert "DSE Savings: 3 vs 8" in html assert "Reward Over Steps" in html assert "Best Test TOML" in html assert "Show best config TOML" in html From 86741ef8341af145401f061134b3acbe694222d2 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Tue, 24 Mar 2026 16:24:30 +0100 Subject: [PATCH 08/30] we are somewhere --- src/cloudai/report_generator/status_report.py | 8 +- src/cloudai/util/general-report.jinja2 | 168 ++++++++++-------- tests/test_reporter.py | 17 +- 3 files changed, 118 insertions(+), 75 deletions(-) diff --git a/src/cloudai/report_generator/status_report.py b/src/cloudai/report_generator/status_report.py index 2fcc2aa0a..0ad04cc68 100644 --- a/src/cloudai/report_generator/status_report.py +++ b/src/cloudai/report_generator/status_report.py @@ -19,6 +19,7 @@ import ast import contextlib import logging +import math from dataclasses import dataclass, field from pathlib import Path from typing import Any @@ -128,11 +129,16 @@ def _build_effort_chart_data(executed_steps: int, total_space: int) -> dict[str, return None explored_ratio = min(max(executed_steps / total_space, 0.0), 1.0) + show_break = total_space / max(executed_steps, 1) >= 12 + explored_height_pct = max(12.0, math.sqrt(explored_ratio) * 100) if explored_ratio > 0 else 12.0 + explored_height_pct = min(explored_height_pct, 100.0) return { "explored_ratio": explored_ratio, "labels": ["Explored", "Full Space"], "values": [executed_steps, total_space], - "use_log_scale": total_space / max(executed_steps, 1) >= 20, + "explored_height_pct": explored_height_pct, + "full_height_pct": 100.0, + "show_break": show_break, } diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index fbbf01be5..b6bbe52f1 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -178,7 +178,9 @@ font-size: 0.92rem; } .effort-chart-shell { - position: relative; + display: flex; + align-items: stretch; + justify-content: center; flex: none; min-height: 360px; height: 360px; @@ -186,11 +188,74 @@ border-radius: var(--radius-md); border: 1px solid rgba(17, 17, 17, 0.08); background: rgba(255, 255, 255, 0.88); - padding: 0.85rem; + padding: 1rem 1.1rem 0.9rem; } - .effort-chart-shell canvas { - display: block; - width: 100% !important; + .effort-compare { + display: grid; + grid-template-columns: repeat(2, minmax(120px, 1fr)); + gap: 1.25rem; + align-items: end; + width: 100%; + } + .effort-bar-card { + display: flex; + flex-direction: column; + align-items: center; + gap: 0.65rem; + min-width: 0; + } + .effort-value { + color: var(--nv-ink); + font-size: 1.1rem; + font-weight: 750; + letter-spacing: -0.02em; + } + .effort-stage { + position: relative; + display: flex; + align-items: end; + justify-content: center; + width: 100%; + height: 238px; + padding: 0 0.9rem; + border-bottom: 1px solid rgba(17, 17, 17, 0.10); + } + .effort-bar { + position: relative; + width: min(96px, 100%); + min-width: 54px; + border-radius: 18px 18px 6px 6px; + border: 1px solid rgba(17, 17, 17, 0.08); + box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.65); + } + .effort-bar--explored { + background: linear-gradient(180deg, var(--nv-green-bright), var(--nv-green)); + border-color: rgba(118, 185, 0, 0.38); + } + .effort-bar--full { + background: linear-gradient(180deg, rgba(57, 66, 78, 0.30), rgba(57, 66, 78, 0.18)); + border-color: rgba(57, 66, 78, 0.18); + } + .effort-bar--break::after { + content: ""; + position: absolute; + left: 8%; + right: 8%; + top: 42%; + height: 18px; + border-top: 4px solid rgba(255, 255, 255, 0.98); + border-bottom: 4px solid rgba(255, 255, 255, 0.98); + background: rgba(255, 255, 255, 0.98); + clip-path: polygon(0 28%, 22% 0, 50% 28%, 78% 0, 100% 28%, 100% 72%, 78% 100%, 50% 72%, 22% 100%, 0 72%); + box-shadow: 0 0 0 1px rgba(57, 66, 78, 0.04); + } + .effort-bar-label { + color: var(--nv-muted); + font-size: 0.8rem; + font-weight: 700; + letter-spacing: 0.08em; + text-transform: uppercase; + text-align: center; } .dse-box { margin-top: 1.15rem; @@ -332,6 +397,9 @@ height: 280px; min-height: 280px; } + .effort-stage { + height: 180px; + } } {% if dse_summaries %} @@ -448,63 +516,6 @@ canvas.parentElement.classList.add("is-enhanced"); }); - const effortCanvases = document.querySelectorAll(".js-effort-chart"); - effortCanvases.forEach((canvas) => { - const dataNode = document.getElementById(canvas.dataset.chartDataId); - if (!dataNode) { - return; - } - const chartData = JSON.parse(dataNode.textContent); - new Chart(canvas, { - type: "bar", - data: { - labels: chartData.labels, - datasets: [ - { - data: chartData.values, - backgroundColor: ["#76b900", "rgba(57, 66, 78, 0.24)"], - borderColor: ["#76b900", "rgba(57, 66, 78, 0.50)"], - borderWidth: 1, - borderRadius: 14, - maxBarThickness: 90 - } - ] - }, - options: { - responsive: true, - maintainAspectRatio: false, - plugins: { - legend: { - display: false - }, - tooltip: { - callbacks: { - label: function (context) { - return context.raw.toLocaleString(); - } - } - } - }, - scales: { - x: { - grid: { - display: false - } - }, - y: { - type: chartData.use_log_scale ? "logarithmic" : "linear", - beginAtZero: !chartData.use_log_scale, - ticks: { - callback: function (value) { - return Number(value).toLocaleString(); - } - } - } - } - } - }); - canvas.parentElement.classList.add("is-enhanced"); - }); }); {% endif %} @@ -554,22 +565,37 @@

DSE Savings: {{ "{:,}".format(summary.executed_steps) }} vs {{ "{:,}".format(summary.total_space) }}

- {% if summary.effort_chart_data.use_log_scale %} - Log scale keeps both bars legible across a large search-space gap. + {% if summary.effort_chart_data.show_break %} + Full-space bar uses a visual break so both bars stay readable. {% else %} Compared with the full search space. {% endif %}
- +
+
+
{{ "{:,}".format(summary.effort_chart_data["values"][0]) }}
+
+
+
+
{{ summary.effort_chart_data["labels"][0] }}
+
+
+
{{ "{:,}".format(summary.effort_chart_data["values"][1]) }}
+
+
+
+
{{ summary.effort_chart_data["labels"][1] }}
+
+
- -

Interactive chart unavailable. The totals above remain available as text.

{% endif %}
diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 0adb7064e..cada48daf 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -26,7 +26,7 @@ from cloudai.cli.handlers import generate_reports from cloudai.core import CommandGenStrategy, Registry, Reporter, System from cloudai.models.scenario import ReportConfig, TestRunDetails -from cloudai.report_generator.status_report import DSEReportBuilder, ReportItem, load_system_metadata +from cloudai.report_generator.status_report import DSEReportBuilder, ReportItem, _build_effort_chart_data, load_system_metadata from cloudai.reporter import PerTestReporter, StatusReporter, TarballReporter from cloudai.systems.slurm.slurm_metadata import ( MetadataCUDA, @@ -462,7 +462,9 @@ def test_dse_summary_and_best_config_artifacts(slurm_system: SlurmSystem, slurm_ assert summary.effort_chart_data["explored_ratio"] == pytest.approx(3 / 8) assert summary.effort_chart_data["labels"] == ["Explored", "Full Space"] assert summary.effort_chart_data["values"] == [3, 8] - assert summary.effort_chart_data["use_log_scale"] is False + assert summary.effort_chart_data["explored_height_pct"] == pytest.approx((3 / 8) ** 0.5 * 100) + assert summary.effort_chart_data["full_height_pct"] == pytest.approx(100.0) + assert summary.effort_chart_data["show_break"] is False best_values = {row.name: row.best_value for row in summary.parameter_rows} assert best_values["nthreads"] == "2" @@ -509,7 +511,8 @@ def test_dse_generate_scenario_report_renders_html( assert "BO Analysis" in html assert "All Steps" in html assert "dse-report.toml" in html - assert "js-effort-chart" in html + assert "effort-bar--explored" in html + assert "effort-bar--full" in html assert "js-reward-chart" in html assert "chart-shell" in html assert 'class="value-pill value-pill--selected"' in html @@ -523,6 +526,14 @@ def test_dse_generate_scenario_report_renders_html( assert "1m 40s" in html +def test_effort_chart_uses_break_for_large_search_space() -> None: + chart_data = _build_effort_chart_data(30, 100_000) + + assert chart_data is not None + assert chart_data["show_break"] is True + assert chart_data["explored_height_pct"] == pytest.approx(12.0) + + def test_dse_console_summary_is_compact( slurm_system: SlurmSystem, slurm_metadata: SlurmSystemMetadata, caplog: pytest.LogCaptureFixture ) -> None: From 9d335ba980ee7d8490c4d5ff4bc43557f47ce124 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Tue, 24 Mar 2026 17:35:52 +0100 Subject: [PATCH 09/30] looks nice --- src/cloudai/report_generator/status_report.py | 15 +- src/cloudai/util/general-report.jinja2 | 162 +++++++----------- tests/test_reporter.py | 22 +-- 3 files changed, 76 insertions(+), 123 deletions(-) diff --git a/src/cloudai/report_generator/status_report.py b/src/cloudai/report_generator/status_report.py index 0ad04cc68..d54e1535b 100644 --- a/src/cloudai/report_generator/status_report.py +++ b/src/cloudai/report_generator/status_report.py @@ -19,7 +19,6 @@ import ast import contextlib import logging -import math from dataclasses import dataclass, field from pathlib import Path from typing import Any @@ -129,16 +128,14 @@ def _build_effort_chart_data(executed_steps: int, total_space: int) -> dict[str, return None explored_ratio = min(max(executed_steps / total_space, 0.0), 1.0) - show_break = total_space / max(executed_steps, 1) >= 12 - explored_height_pct = max(12.0, math.sqrt(explored_ratio) * 100) if explored_ratio > 0 else 12.0 - explored_height_pct = min(explored_height_pct, 100.0) + reduction_factor = total_space / max(executed_steps, 1) return { "explored_ratio": explored_ratio, - "labels": ["Explored", "Full Space"], - "values": [executed_steps, total_space], - "explored_height_pct": explored_height_pct, - "full_height_pct": 100.0, - "show_break": show_break, + "explored_percent": explored_ratio * 100.0, + "avoided_percent": max((1.0 - explored_ratio) * 100.0, 0.0), + "reduction_factor": reduction_factor, + "executed_steps": executed_steps, + "total_space": total_space, } diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index b6bbe52f1..7eacebb80 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -82,7 +82,7 @@ display: grid; grid-template-columns: minmax(0, 1fr) minmax(320px, 0.92fr); gap: 1rem; - align-items: stretch; + align-items: start; } .overview-left { display: flex; @@ -116,6 +116,15 @@ border-color: rgba(118, 185, 0, 0.34); box-shadow: inset 0 4px 0 var(--nv-green); } + .metric-card--accent { + background: linear-gradient(180deg, rgba(118, 185, 0, 0.20), rgba(255, 255, 255, 0.99)); + border-color: rgba(118, 185, 0, 0.42); + box-shadow: inset 0 4px 0 var(--nv-green-bright); + } + .metric-card--accent .metric-value { + font-size: 1.8rem; + color: var(--nv-green-strong); + } .metric-card--neutral { background: rgba(255, 255, 255, 0.94); } @@ -142,6 +151,9 @@ border: 1px solid rgba(17, 17, 17, 0.06); background: rgba(255, 255, 255, 0.90); } + .context-item--runtime { + background: rgba(249, 251, 247, 0.96); + } .label { color: var(--nv-muted); display: block; @@ -156,7 +168,7 @@ .effort-block { display: flex; flex-direction: column; - height: 100%; + align-self: start; padding: 1rem 1.05rem; border-radius: var(--radius-md); border: 1px solid rgba(118, 185, 0, 0.22); @@ -179,83 +191,45 @@ } .effort-chart-shell { display: flex; - align-items: stretch; - justify-content: center; + align-items: center; flex: none; - min-height: 360px; - height: 360px; - margin-top: 0.6rem; + min-height: 0; + height: auto; + margin-top: 0.45rem; border-radius: var(--radius-md); border: 1px solid rgba(17, 17, 17, 0.08); background: rgba(255, 255, 255, 0.88); - padding: 1rem 1.1rem 0.9rem; - } - .effort-compare { - display: grid; - grid-template-columns: repeat(2, minmax(120px, 1fr)); - gap: 1.25rem; - align-items: end; - width: 100%; + padding: 0.95rem 1rem; } - .effort-bar-card { + .efficiency-panel { display: flex; flex-direction: column; - align-items: center; - gap: 0.65rem; - min-width: 0; - } - .effort-value { - color: var(--nv-ink); - font-size: 1.1rem; - font-weight: 750; - letter-spacing: -0.02em; - } - .effort-stage { - position: relative; - display: flex; - align-items: end; - justify-content: center; + gap: 0.8rem; width: 100%; - height: 238px; - padding: 0 0.9rem; - border-bottom: 1px solid rgba(17, 17, 17, 0.10); } - .effort-bar { - position: relative; - width: min(96px, 100%); - min-width: 54px; - border-radius: 18px 18px 6px 6px; - border: 1px solid rgba(17, 17, 17, 0.08); - box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.65); + .efficiency-steps { + color: var(--nv-muted); + font-size: 0.95rem; + font-weight: 650; } - .effort-bar--explored { - background: linear-gradient(180deg, var(--nv-green-bright), var(--nv-green)); - border-color: rgba(118, 185, 0, 0.38); + .efficiency-callout { + display: flex; + align-items: baseline; + gap: 0.55rem; + flex-wrap: wrap; + padding: 0; } - .effort-bar--full { - background: linear-gradient(180deg, rgba(57, 66, 78, 0.30), rgba(57, 66, 78, 0.18)); - border-color: rgba(57, 66, 78, 0.18); + .efficiency-ratio { + color: var(--nv-green-strong); + font-size: 2rem; + font-weight: 800; + letter-spacing: -0.04em; } - .effort-bar--break::after { - content: ""; - position: absolute; - left: 8%; - right: 8%; - top: 42%; - height: 18px; - border-top: 4px solid rgba(255, 255, 255, 0.98); - border-bottom: 4px solid rgba(255, 255, 255, 0.98); - background: rgba(255, 255, 255, 0.98); - clip-path: polygon(0 28%, 22% 0, 50% 28%, 78% 0, 100% 28%, 100% 72%, 78% 100%, 50% 72%, 22% 100%, 0 72%); - box-shadow: 0 0 0 1px rgba(57, 66, 78, 0.04); - } - .effort-bar-label { - color: var(--nv-muted); - font-size: 0.8rem; - font-weight: 700; - letter-spacing: 0.08em; - text-transform: uppercase; - text-align: center; + .efficiency-ratio-note { + color: var(--nv-ink); + font-size: 1rem; + font-weight: 650; + line-height: 1.2; } .dse-box { margin-top: 1.15rem; @@ -394,11 +368,12 @@ height: 280px; } .effort-chart-shell { - height: 280px; - min-height: 280px; + height: auto; + min-height: 0; } - .effort-stage { - height: 180px; + .metric-card--accent .metric-value, + .efficiency-ratio { + font-size: 1.45rem; } } @@ -546,7 +521,7 @@
-
+
Estimated Savings
{{ format_money(summary.estimated_saved_cost_usd) }}
@@ -554,8 +529,8 @@
-
Average Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
-
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
+
Avg Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
+
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
@@ -563,38 +538,17 @@ {% if summary.effort_chart_data %}
-

DSE Savings: {{ "{:,}".format(summary.executed_steps) }} vs {{ "{:,}".format(summary.total_space) }}

- - {% if summary.effort_chart_data.show_break %} - Full-space bar uses a visual break so both bars stay readable. - {% else %} - Compared with the full search space. - {% endif %} - +

Exploration Efficiency

+ {{ format_percent(summary.effort_chart_data["explored_percent"]) }} explored
-
-
-
-
{{ "{:,}".format(summary.effort_chart_data["values"][0]) }}
-
-
-
-
{{ summary.effort_chart_data["labels"][0] }}
-
-
-
{{ "{:,}".format(summary.effort_chart_data["values"][1]) }}
-
-
-
-
{{ summary.effort_chart_data["labels"][1] }}
-
+
+
+
+
~{{ format_float(summary.effort_chart_data["reduction_factor"], 1) }}x
+
reduction in search space
+
{{ "{:,}".format(summary.effort_chart_data["executed_steps"]) }} / {{ "{:,}".format(summary.effort_chart_data["total_space"]) }} steps
+
{% endif %} diff --git a/tests/test_reporter.py b/tests/test_reporter.py index cada48daf..25e067aa3 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -460,11 +460,11 @@ def test_dse_summary_and_best_config_artifacts(slurm_system: SlurmSystem, slurm_ assert summary.reward_chart_data["observations"] == ["2.5", "1.2", "1.8"] assert summary.effort_chart_data is not None assert summary.effort_chart_data["explored_ratio"] == pytest.approx(3 / 8) - assert summary.effort_chart_data["labels"] == ["Explored", "Full Space"] - assert summary.effort_chart_data["values"] == [3, 8] - assert summary.effort_chart_data["explored_height_pct"] == pytest.approx((3 / 8) ** 0.5 * 100) - assert summary.effort_chart_data["full_height_pct"] == pytest.approx(100.0) - assert summary.effort_chart_data["show_break"] is False + assert summary.effort_chart_data["explored_percent"] == pytest.approx(37.5) + assert summary.effort_chart_data["avoided_percent"] == pytest.approx(62.5) + assert summary.effort_chart_data["reduction_factor"] == pytest.approx(8 / 3) + assert summary.effort_chart_data["executed_steps"] == 3 + assert summary.effort_chart_data["total_space"] == 8 best_values = {row.name: row.best_value for row in summary.parameter_rows} assert best_values["nthreads"] == "2" @@ -503,7 +503,9 @@ def test_dse_generate_scenario_report_renders_html( html = report_path.read_text() assert "cdn.jsdelivr.net/npm/chart.js" in html assert "Saved GPU-Hours" in html - assert "DSE Savings: 3 vs 8" in html + assert "Exploration Efficiency" in html + assert "3 / 8 steps" in html + assert "reduction in search space" in html assert "Reward Over Steps" in html assert "Best Test TOML" in html assert "Show best config TOML" in html @@ -511,8 +513,7 @@ def test_dse_generate_scenario_report_renders_html( assert "BO Analysis" in html assert "All Steps" in html assert "dse-report.toml" in html - assert "effort-bar--explored" in html - assert "effort-bar--full" in html + assert "efficiency-ratio" in html assert "js-reward-chart" in html assert "chart-shell" in html assert 'class="value-pill value-pill--selected"' in html @@ -530,8 +531,9 @@ def test_effort_chart_uses_break_for_large_search_space() -> None: chart_data = _build_effort_chart_data(30, 100_000) assert chart_data is not None - assert chart_data["show_break"] is True - assert chart_data["explored_height_pct"] == pytest.approx(12.0) + assert chart_data["explored_percent"] == pytest.approx(0.03) + assert chart_data["avoided_percent"] == pytest.approx(99.97) + assert chart_data["reduction_factor"] == pytest.approx(100_000 / 30) def test_dse_console_summary_is_compact( From dbd7ebec7d8cd0a19ba063708eb9be23f0c382f2 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Tue, 24 Mar 2026 17:49:10 +0100 Subject: [PATCH 10/30] top block looks solid --- src/cloudai/util/general-report.jinja2 | 67 +++++++++++++++----------- tests/test_reporter.py | 4 +- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index 7eacebb80..bc3c3ff03 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -190,16 +190,7 @@ font-size: 0.92rem; } .effort-chart-shell { - display: flex; - align-items: center; - flex: none; - min-height: 0; - height: auto; margin-top: 0.45rem; - border-radius: var(--radius-md); - border: 1px solid rgba(17, 17, 17, 0.08); - background: rgba(255, 255, 255, 0.88); - padding: 0.95rem 1rem; } .efficiency-panel { display: flex; @@ -273,14 +264,34 @@ overflow: hidden; border: 1px solid rgba(118, 185, 0, 0.22); background: linear-gradient(180deg, #ffffff, #f8fbf4); + margin-top: 1.15rem; } .best-config-preview summary { + display: flex; + align-items: center; + justify-content: space-between; + gap: 0.75rem; + list-style: none; cursor: pointer; padding: 0.9rem 1rem; color: var(--nv-ink); font-weight: 650; background: linear-gradient(180deg, rgba(118, 185, 0, 0.18), rgba(255, 255, 255, 0.72)); } + .best-config-preview summary::-webkit-details-marker { + display: none; + } + .best-config-summary-title { + display: inline-flex; + align-items: center; + gap: 0.55rem; + } + .best-config-summary-actions { + display: inline-flex; + align-items: center; + gap: 0.6rem; + margin-left: auto; + } .best-config-preview pre { max-height: 360px; overflow: auto; @@ -375,6 +386,13 @@ .efficiency-ratio { font-size: 1.45rem; } + .best-config-preview summary { + align-items: flex-start; + flex-direction: column; + } + .best-config-summary-actions { + margin-left: 0; + } } {% if dse_summaries %} @@ -539,34 +557,27 @@

Exploration Efficiency

- {{ format_percent(summary.effort_chart_data["explored_percent"]) }} explored
-
-
-
-
~{{ format_float(summary.effort_chart_data["reduction_factor"], 1) }}x
-
reduction in search space
-
-
{{ "{:,}".format(summary.effort_chart_data["executed_steps"]) }} / {{ "{:,}".format(summary.effort_chart_data["total_space"]) }} steps
+
+
+
~{{ format_float(summary.effort_chart_data["reduction_factor"], 1) }}x
+
reduction in search space
+
{{ "{:,}".format(summary.effort_chart_data["executed_steps"]) }} / {{ "{:,}".format(summary.effort_chart_data["total_space"]) }} steps
{% endif %}
-
-
-
-

DSE: Best Config

-
- {% if summary.best_config_rel_path %}Best Test TOML{% endif %} - {% if summary.analysis_rel_path %}BO Analysis{% endif %} - {% if summary.best_config_toml %}{% endif %} -
-
{% if summary.best_config_toml %}
- Show best config TOML + + Best Config TOML + + {% if summary.analysis_rel_path %}BO Analysis{% endif %} + + +
{{ summary.best_config_toml }}
{% endif %} diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 25e067aa3..e3f3868e6 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -507,12 +507,10 @@ def test_dse_generate_scenario_report_renders_html( assert "3 / 8 steps" in html assert "reduction in search space" in html assert "Reward Over Steps" in html - assert "Best Test TOML" in html - assert "Show best config TOML" in html + assert "Best Config TOML" in html assert "Copy TOML" in html assert "BO Analysis" in html assert "All Steps" in html - assert "dse-report.toml" in html assert "efficiency-ratio" in html assert "js-reward-chart" in html assert "chart-shell" in html From e93eb005487a889dc30e2d2b3bec773ca76df7ee Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Tue, 24 Mar 2026 18:10:55 +0100 Subject: [PATCH 11/30] graph looks good --- src/cloudai/report_generator/status_report.py | 14 +------ src/cloudai/util/general-report.jinja2 | 41 +++++++++---------- tests/test_reporter.py | 2 +- 3 files changed, 22 insertions(+), 35 deletions(-) diff --git a/src/cloudai/report_generator/status_report.py b/src/cloudai/report_generator/status_report.py index d54e1535b..032d1e1de 100644 --- a/src/cloudai/report_generator/status_report.py +++ b/src/cloudai/report_generator/status_report.py @@ -100,26 +100,16 @@ def _normalize_gpu_family(gpu_name: str | None) -> str | None: return None -def _build_running_best(points: list[tuple[int, float]]) -> list[tuple[int, float]]: - running_best: list[tuple[int, float]] = [] - best = None - for step, reward in points: - best = reward if best is None else max(best, reward) - running_best.append((step, best)) - return running_best - - def _build_reward_chart_data(steps: list["DSEStepData"]) -> dict[str, list[Any]] | None: if not steps: return None - reward_points = [(step.step, step.reward) for step in steps] - running_best = _build_running_best(reward_points) + best_index = max(range(len(steps)), key=lambda idx: steps[idx].reward) return { "labels": [step.step for step in steps], "rewards": [step.reward for step in steps], - "running_best": [reward for _, reward in running_best], "observations": [step.observation_display for step in steps], + "best_index": best_index, } diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index bc3c3ff03..ea1d6c1c8 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -435,6 +435,15 @@ return; } const chartData = JSON.parse(dataNode.textContent); + const pointBackgroundColors = chartData.rewards.map((_, idx) => + idx === chartData.best_index ? "#76b900" : "#39424e" + ); + const pointBorderColors = chartData.rewards.map((_, idx) => + idx === chartData.best_index ? "#76b900" : "#39424e" + ); + const pointRadii = chartData.rewards.map((_, idx) => + idx === chartData.best_index ? 5 : 3 + ); new Chart(canvas, { type: "line", data: { @@ -445,25 +454,13 @@ data: chartData.rewards, borderColor: "#39424e", backgroundColor: "rgba(57, 66, 78, 0.14)", - pointBackgroundColor: "#39424e", - pointRadius: 3, - pointHoverRadius: 4, + pointBackgroundColor: pointBackgroundColors, + pointBorderColor: pointBorderColors, + pointRadius: pointRadii, + pointHoverRadius: 6, borderWidth: 2, tension: 0.25, fill: true - }, - { - label: "Best so far", - data: chartData.running_best, - borderColor: "#76b900", - backgroundColor: "rgba(118, 185, 0, 0.18)", - pointBackgroundColor: "#76b900", - pointRadius: 0, - pointHoverRadius: 3, - borderWidth: 3, - tension: 0.18, - borderDash: [8, 6], - fill: false } ] }, @@ -476,16 +473,17 @@ }, plugins: { legend: { - labels: { - usePointStyle: true, - boxWidth: 8 - } + display: false }, tooltip: { callbacks: { afterBody: function (context) { const idx = context[0].dataIndex; - return "Observation: " + (chartData.observations[idx] || "n/a"); + const lines = ["Observation: " + (chartData.observations[idx] || "n/a")]; + if (idx === chartData.best_index) { + lines.push("Best step"); + } + return lines; } } } @@ -611,7 +609,6 @@

DSE: Reward Over Steps

-

Observed reward is shown as the dark line; the NVIDIA-green dashed line tracks the best-so-far trajectory.

{% if summary.reward_chart_data %}
Date: Tue, 24 Mar 2026 18:15:29 +0100 Subject: [PATCH 12/30] better colors --- src/cloudai/util/general-report.jinja2 | 44 +++++++++++++------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index ea1d6c1c8..4162875c7 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -12,7 +12,7 @@ .dse-box { border: 1px solid var(--nv-border); border-radius: var(--radius-lg); - background: linear-gradient(180deg, rgba(255, 255, 255, 0.98), rgba(247, 252, 233, 0.98)); + background: rgba(255, 255, 255, 0.98); box-shadow: var(--nv-shadow); } .overview-board { @@ -63,8 +63,8 @@ letter-spacing: 0.06em; } .status-pill--passed { - background: rgba(118, 185, 0, 0.14); - border-color: rgba(118, 185, 0, 0.32); + background: rgba(118, 185, 0, 0.10); + border-color: rgba(118, 185, 0, 0.22); } .status-pill--partial { background: rgba(250, 204, 21, 0.16); @@ -108,18 +108,18 @@ .metric-card { padding: 0.95rem 1rem; border-radius: var(--radius-md); - border: 1px solid rgba(17, 17, 17, 0.06); - background: rgba(255, 255, 255, 0.96); + border: 1px solid rgba(17, 17, 17, 0.07); + background: #ffffff; } .metric-card--hero { - background: linear-gradient(180deg, rgba(118, 185, 0, 0.16), rgba(255, 255, 255, 0.98)); - border-color: rgba(118, 185, 0, 0.34); - box-shadow: inset 0 4px 0 var(--nv-green); + background: linear-gradient(180deg, rgba(248, 251, 244, 1), rgba(255, 255, 255, 1)); + border-color: rgba(118, 185, 0, 0.24); + box-shadow: inset 0 3px 0 rgba(118, 185, 0, 0.88); } .metric-card--accent { - background: linear-gradient(180deg, rgba(118, 185, 0, 0.20), rgba(255, 255, 255, 0.99)); - border-color: rgba(118, 185, 0, 0.42); - box-shadow: inset 0 4px 0 var(--nv-green-bright); + background: linear-gradient(180deg, rgba(245, 250, 238, 1), rgba(255, 255, 255, 1)); + border-color: rgba(118, 185, 0, 0.28); + box-shadow: inset 0 3px 0 rgba(118, 185, 0, 0.92); } .metric-card--accent .metric-value { font-size: 1.8rem; @@ -148,11 +148,11 @@ .context-item { padding: 0.85rem 0.95rem; border-radius: var(--radius-md); - border: 1px solid rgba(17, 17, 17, 0.06); - background: rgba(255, 255, 255, 0.90); + border: 1px solid rgba(17, 17, 17, 0.07); + background: #ffffff; } .context-item--runtime { - background: rgba(249, 251, 247, 0.96); + background: #fbfcfa; } .label { color: var(--nv-muted); @@ -171,8 +171,8 @@ align-self: start; padding: 1rem 1.05rem; border-radius: var(--radius-md); - border: 1px solid rgba(118, 185, 0, 0.22); - background: linear-gradient(180deg, rgba(118, 185, 0, 0.08), rgba(255, 255, 255, 0.92)); + border: 1px solid rgba(17, 17, 17, 0.07); + background: linear-gradient(180deg, #fcfdfb, #ffffff); } .effort-head { display: flex; @@ -262,8 +262,8 @@ .best-config-preview { border-radius: var(--radius-sm); overflow: hidden; - border: 1px solid rgba(118, 185, 0, 0.22); - background: linear-gradient(180deg, #ffffff, #f8fbf4); + border: 1px solid rgba(17, 17, 17, 0.08); + background: #ffffff; margin-top: 1.15rem; } .best-config-preview summary { @@ -276,7 +276,7 @@ padding: 0.9rem 1rem; color: var(--nv-ink); font-weight: 650; - background: linear-gradient(180deg, rgba(118, 185, 0, 0.18), rgba(255, 255, 255, 0.72)); + background: #fbfcfa; } .best-config-preview summary::-webkit-details-marker { display: none; @@ -298,8 +298,8 @@ margin: 0; padding: 1rem; color: #1f2933; - background: #fbfdf8; - border-top: 1px solid rgba(118, 185, 0, 0.16); + background: #ffffff; + border-top: 1px solid rgba(17, 17, 17, 0.06); } .space-table th, .steps-table th { @@ -332,7 +332,7 @@ margin-top: 0; } .viz-card { - background: linear-gradient(180deg, rgba(255, 255, 255, 0.98), rgba(247, 250, 243, 0.98)); + background: rgba(255, 255, 255, 0.98); } .chart-shell { position: relative; From 3dd3b4921ff548401ce09f199301156052acf71c Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Tue, 24 Mar 2026 18:33:25 +0100 Subject: [PATCH 13/30] support for multiple dse cases --- src/cloudai/report_generator/status_report.py | 4 + src/cloudai/reporter.py | 34 +++++++- src/cloudai/util/general-report.jinja2 | 87 +++++++++++++++++-- tests/test_reporter.py | 33 ++++++- 4 files changed, 149 insertions(+), 9 deletions(-) diff --git a/src/cloudai/report_generator/status_report.py b/src/cloudai/report_generator/status_report.py index 032d1e1de..3ce0c4943 100644 --- a/src/cloudai/report_generator/status_report.py +++ b/src/cloudai/report_generator/status_report.py @@ -175,12 +175,14 @@ def load_system_metadata(run_dir: Path, results_root: Path) -> _ReportSystemMeta class ReportItem: """Basic report item for general systems.""" + group_name: str name: str description: str logs_path: str | None nodes: _ReportSystemMetadata | None status_text: str status_class: str + is_dse: bool @classmethod def from_test_runs(cls, test_runs: list[TestRun], results_root: Path) -> list["ReportItem"]: @@ -190,12 +192,14 @@ def from_test_runs(cls, test_runs: list[TestRun], results_root: Path) -> list["R status_text = "PASSED" if tr_status.is_successful else "FAILED" report_items.append( ReportItem( + group_name=tr.name, name=case_name(tr), description=tr.test.description, logs_path=f"./{tr.output_path.relative_to(results_root)}" if tr.output_path.exists() else None, nodes=load_system_metadata(tr.output_path, results_root), status_text=status_text, status_class=status_text.lower(), + is_dse=tr.is_dse_job, ) ) return report_items diff --git a/src/cloudai/reporter.py b/src/cloudai/reporter.py index 6f6111bfe..cd76c6a95 100644 --- a/src/cloudai/reporter.py +++ b/src/cloudai/reporter.py @@ -17,6 +17,7 @@ import contextlib import logging import tarfile +from collections import defaultdict from pathlib import Path import jinja2 @@ -79,10 +80,16 @@ def to_html(self, dse_summaries: list[DSESummary]) -> None: template = jinja_env.get_template("general-report.jinja2") report_items = ReportItem.from_test_runs(self.trs, self.results_root) + dse_cases = self._build_dse_cases(dse_summaries, report_items) + dse_case_names = {case["name"] for case in dse_cases} + dse_report_items = [item for item in report_items if item.group_name in dse_case_names] + standard_report_items = [item for item in report_items if item.group_name not in dse_case_names] report = template.render( name=self.test_scenario.name, - report_items=report_items, + report_items=standard_report_items, dse_summaries=dse_summaries, + dse_cases=dse_cases, + dse_report_items=dse_report_items, format_duration=format_duration, format_float=format_float, format_percent=format_percent, @@ -94,6 +101,31 @@ def to_html(self, dse_summaries: list[DSESummary]) -> None: logging.info(f"Generated scenario report at {report_path}") + def _build_dse_cases(self, dse_summaries: list[DSESummary], report_items: list[ReportItem]) -> list[dict[str, object]]: + summaries_by_name: dict[str, list[DSESummary]] = defaultdict(list) + for summary in dse_summaries: + summaries_by_name[summary.name].append(summary) + + items_by_name: dict[str, list[ReportItem]] = defaultdict(list) + for item in report_items: + if item.is_dse: + items_by_name[item.group_name].append(item) + + dse_case_names = [] + for tr in self.test_scenario.test_runs: + if tr.is_dse_job and tr.name not in dse_case_names: + dse_case_names.append(tr.name) + + return [ + { + "name": case_name, + "summaries": summaries_by_name.get(case_name, []), + "report_items": items_by_name.get(case_name, []), + } + for case_name in dse_case_names + if summaries_by_name.get(case_name) + ] + def to_console(self, dse_summaries: list[DSESummary]): if not self.trs: logging.debug("No test runs found, skipping summary.") diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index 4162875c7..afbfa52bc 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -8,6 +8,41 @@ .dse-summary { margin: 2rem 0 3rem; } + .dse-tabs { + margin: 1.25rem 0 2rem; + } + .dse-tab-list { + display: flex; + flex-wrap: wrap; + gap: 0.7rem; + margin-bottom: 1.15rem; + } + .dse-tab-button { + display: inline-flex; + align-items: center; + gap: 0.45rem; + padding: 0.58rem 0.95rem; + border-radius: 999px; + border: 1px solid rgba(17, 17, 17, 0.08); + background: #ffffff; + color: var(--nv-ink); + font-weight: 650; + cursor: pointer; + } + .dse-tab-button:hover { + background: #fbfcfa; + } + .dse-tab-button.is-active { + border-color: rgba(118, 185, 0, 0.28); + background: rgba(118, 185, 0, 0.10); + color: var(--nv-green-strong); + } + .dse-tab-panel { + display: none; + } + .dse-tab-panel.is-active { + display: block; + } .overview-board, .dse-box { border: 1px solid var(--nv-border); @@ -420,6 +455,18 @@ }); }); + document.querySelectorAll(".js-dse-tabs").forEach((root) => { + const buttons = root.querySelectorAll(".js-dse-tab-button"); + const panels = root.querySelectorAll(".js-dse-tab-panel"); + buttons.forEach((button) => { + button.addEventListener("click", function () { + const target = button.dataset.tabTarget; + buttons.forEach((candidate) => candidate.classList.toggle("is-active", candidate === button)); + panels.forEach((panel) => panel.classList.toggle("is-active", panel.dataset.tabPanel === target)); + }); + }); + }); + if (typeof Chart === "undefined") { return; } @@ -513,9 +560,29 @@ {% endblock %} {% block content %} -{% if dse_summaries %} - {% for summary in dse_summaries %} -
+{% if dse_cases %} +
+
+ {% for case in dse_cases %} + + {% endfor %} +
+ + {% for case in dse_cases %} +
+ {% for summary in case.summaries %} +

DSE: overview

@@ -625,8 +692,14 @@ {% endif %}
- {% endfor %} + {% endfor %} +
+ {% endfor %} +
+{% endif %} + +{% if dse_report_items %}

All Steps

@@ -639,7 +712,7 @@ - {% for item in report_items %} + {% for item in dse_report_items %}
{{ item.name }} @@ -668,7 +741,9 @@
-{% else %} +{% endif %} + +{% if report_items %} diff --git a/tests/test_reporter.py b/tests/test_reporter.py index a9fb6e5a0..8a33a7374 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -76,10 +76,12 @@ def _create_dse_report_fixture( slurm_system: SlurmSystem, slurm_metadata: SlurmSystemMetadata, gpu_name: str = "NVIDIA H100 80GB HBM3", + name: str = "dse-report", + description: str = "DSE summary sample", ) -> TestRun: test_definition = NCCLTestDefinition( name="dse-nccl", - description="DSE summary sample", + description=description, test_template_name="NcclTest", cmd_args=NCCLCmdArgs( docker_image_url="fake://url/nccl", @@ -91,7 +93,7 @@ def _create_dse_report_fixture( agent_steps=3, ) tr = TestRun( - name="dse-report", + name=name, test=test_definition, num_nodes=2, nodes=["node1", "node2"], @@ -502,6 +504,7 @@ def test_dse_generate_scenario_report_renders_html( report_path = slurm_system.output_path / "dse_scenario.html" html = report_path.read_text() assert "cdn.jsdelivr.net/npm/chart.js" in html + assert "js-dse-tab-button" in html assert "Saved GPU-Hours" in html assert "Exploration Efficiency" in html assert "3 / 8 steps" in html @@ -525,6 +528,32 @@ def test_dse_generate_scenario_report_renders_html( assert "1m 40s" in html +def test_mixed_scenario_renders_dse_tabs_and_standard_table( + slurm_system: SlurmSystem, slurm_metadata: SlurmSystemMetadata, benchmark_tr: TestRun +) -> None: + dse_tr_a = _create_dse_report_fixture(slurm_system, slurm_metadata, name="dse-report-a", description="DSE A") + dse_tr_b = _create_dse_report_fixture(slurm_system, slurm_metadata, name="dse-report-b", description="DSE B") + + benchmark_dir = slurm_system.output_path / benchmark_tr.name / "0" + benchmark_dir.mkdir(parents=True, exist_ok=True) + + reporter = StatusReporter( + slurm_system, + TestScenario(name="mixed_scenario", test_runs=[dse_tr_a, benchmark_tr, dse_tr_b]), + slurm_system.output_path, + ReportConfig(), + ) + + reporter.generate() + + html = (slurm_system.output_path / "mixed_scenario.html").read_text() + assert "dse-report-a" in html + assert "dse-report-b" in html + assert html.count('data-tab-target="dse-case-') == 2 + assert "All Steps" in html + assert benchmark_tr.name in html + + def test_effort_chart_uses_break_for_large_search_space() -> None: chart_data = _build_effort_chart_data(30, 100_000) From 2a9ec7ca16f6ae6dd78c11d3fe948bade6c414d4 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Tue, 24 Mar 2026 18:40:59 +0100 Subject: [PATCH 14/30] visuals completed --- src/cloudai/util/general-report.jinja2 | 88 ++++++++++++++++++-------- tests/test_reporter.py | 3 + 2 files changed, 63 insertions(+), 28 deletions(-) diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index afbfa52bc..60ed51c8c 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -6,16 +6,41 @@ {% endif %} -{% if dse_summaries %} +{% if dse_cases %} -

Interactive chart unavailable. Step count and summary metrics remain available above.

- - {% else %} -

No reward data available.

- {% endif %} - + +

Interactive chart unavailable. Step count and summary metrics remain available above.

+ + {% else %} +

No reward data available.

+ {% endif %} + {% endfor %} diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 57122637c..a38718a72 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -553,6 +553,8 @@ def test_mixed_scenario_renders_dse_tabs_and_standard_table( assert "dse-report-a" in html assert "dse-report-b" in html assert html.count('data-tab-target="dse-case-') == 2 + assert 'id="reward-chart-data-0-0"' in html + assert 'id="reward-chart-data-1-0"' in html assert "All Steps" in html assert benchmark_tr.name in html From 9109bb15f4e5d20f576f7f5e38cabc2f63a3b2a4 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Tue, 24 Mar 2026 22:08:20 +0100 Subject: [PATCH 16/30] huge refactor --- src/cloudai/registration.py | 3 +- src/cloudai/report_generator/dse_report.py | 328 ++++++++ src/cloudai/report_generator/status_report.py | 459 ---------- src/cloudai/report_generator/util.py | 29 + src/cloudai/reporter.py | 185 ++-- src/cloudai/util/dse-report.jinja2 | 415 +++++++++ src/cloudai/util/general-report.jinja2 | 788 +----------------- tests/test_reporter.py | 3 +- 8 files changed, 901 insertions(+), 1309 deletions(-) create mode 100644 src/cloudai/report_generator/dse_report.py delete mode 100644 src/cloudai/report_generator/status_report.py create mode 100644 src/cloudai/util/dse-report.jinja2 diff --git a/src/cloudai/registration.py b/src/cloudai/registration.py index 866baa945..c49fd0b56 100644 --- a/src/cloudai/registration.py +++ b/src/cloudai/registration.py @@ -46,7 +46,7 @@ def register_all(): ) from cloudai.core import Registry from cloudai.models.scenario import ReportConfig - from cloudai.reporter import PerTestReporter, StatusReporter, TarballReporter + from cloudai.reporter import DSEReporter, PerTestReporter, StatusReporter, TarballReporter # Import systems from cloudai.systems.kubernetes import KubernetesInstaller, KubernetesRunner, KubernetesSystem @@ -295,6 +295,7 @@ def register_all(): Registry().add_scenario_report("per_test", PerTestReporter, ReportConfig(enable=True)) Registry().add_scenario_report("status", StatusReporter, ReportConfig(enable=True)) + Registry().add_scenario_report("dse", DSEReporter, ReportConfig(enable=True)) Registry().add_scenario_report("tarball", TarballReporter, ReportConfig(enable=True)) Registry().add_scenario_report( "nixl_bench_summary", diff --git a/src/cloudai/report_generator/dse_report.py b/src/cloudai/report_generator/dse_report.py new file mode 100644 index 000000000..22f981a14 --- /dev/null +++ b/src/cloudai/report_generator/dse_report.py @@ -0,0 +1,328 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import ast +import contextlib +import logging +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import toml + +from cloudai.core import CommandGenStrategy, System, TestRun +from cloudai.models.scenario import TestRunDetails +from cloudai.systems.slurm import SlurmJobMetadata +from cloudai.util.lazy_imports import lazy +from .util import load_system_metadata + +GPU_HOURLY_COST_USD = { + "H100": 4.50, + "B200": 8.00, + "GB200": 10.00, + "GB300": 12.00, +} + + +@dataclass(frozen=True) +class DSEParameterValue: + text: str + is_best: bool + + +@dataclass(frozen=True) +class DSEParameterRow: + name: str + values: list[DSEParameterValue] + + +@dataclass(frozen=True) +class DSEStepRow: + name: str + status_text: str + status_class: str + logs_path: str | None + nodes_text: str + + +@dataclass(frozen=True) +class DSERunSummary: + name: str + saved_time: str + saved_gpu_hours: str + estimated_savings: str + gpu_label: str + avg_step_runtime: str + observed_runtime: str + efficiency_ratio: str + efficiency_steps: str + best_config_toml: str + parameter_rows: list[DSEParameterRow] + reward_chart_data: dict[str, Any] | None + + +@dataclass(frozen=True) +class _StepComputation: + step: int + reward: float + observation_text: str + action: dict[str, Any] + elapsed_time_sec: int | None + is_successful: bool + + +def format_duration(seconds: float | None) -> str: + if seconds is None: + return "n/a" + + seconds = max(float(seconds), 0.0) + if seconds < 60: + return f"{seconds:.1f}s" + + minutes, sec = divmod(round(seconds), 60) + hours, minutes = divmod(minutes, 60) + parts = [] + if hours: + parts.append(f"{hours}h") + if minutes: + parts.append(f"{minutes}m") + if sec or not parts: + parts.append(f"{sec}s") + return " ".join(parts) + + +def format_float(value: float | None, precision: int = 2) -> str: + if value is None: + return "n/a" + return f"{value:.{precision}f}" + + +def format_money(value: float | None) -> str: + if value is None: + return "n/a" + return f"${value:,.2f}" + + +def _safe_literal_eval(raw: Any, default: Any) -> Any: + if isinstance(raw, str): + with contextlib.suppress(SyntaxError, ValueError): + return ast.literal_eval(raw) + return default + + +def _format_scalar(value: Any) -> str: + if isinstance(value, float): + return f"{value:.4f}".rstrip("0").rstrip(".") + return str(value) + + +def _normalize_gpu_family(gpu_name: str | None) -> str | None: + if not gpu_name: + return None + upper = gpu_name.upper() + for family in GPU_HOURLY_COST_USD: + if family in upper: + return family + return None + + +def _step_elapsed_time(step_dir: Path) -> int | None: + slurm_job_path = step_dir / "slurm-job.toml" + if not slurm_job_path.exists(): + return None + + with slurm_job_path.open() as f: + metadata = SlurmJobMetadata.model_validate(toml.load(f)) + return metadata.elapsed_time_sec + + +def _build_reward_chart_data(steps: list[_StepComputation]) -> dict[str, Any] | None: + if not steps: + return None + + best_index = max(range(len(steps)), key=lambda idx: steps[idx].reward) + return { + "labels": [step.step for step in steps], + "rewards": [step.reward for step in steps], + "observations": [step.observation_text for step in steps], + "best_index": best_index, + } + + +def _build_parameter_rows(param_space: dict[str, list[Any]], best_action: dict[str, Any]) -> list[DSEParameterRow]: + rows: list[DSEParameterRow] = [] + for name, values in param_space.items(): + best_value = _format_scalar(best_action.get(name, "n/a")) + rows.append( + DSEParameterRow( + name=name, + values=[DSEParameterValue(text=_format_scalar(value), is_best=_format_scalar(value) == best_value) for value in values], + ) + ) + return rows + + +def _build_iteration_summary( + system: System, + results_root: Path, + test_case: TestRun, + iteration: int, + iteration_dir: Path, + test_runs: list[TestRun], +) -> DSERunSummary | None: + trajectory_file = iteration_dir / "trajectory.csv" + if not trajectory_file.is_file(): + logging.warning(f"No trajectory file found for {test_case.name} at {trajectory_file}") + return None + + df = lazy.pd.read_csv(trajectory_file) + if df.empty: + logging.warning(f"No trajectory data found for {test_case.name} at {trajectory_file}") + return None + + runs_by_step = {test_run.step: test_run for test_run in test_runs} + steps: list[_StepComputation] = [] + for row in df.to_dict(orient="records"): + step_no = int(row["step"]) + action = _safe_literal_eval(row.get("action"), {}) + if not isinstance(action, dict): + action = {} + observation = _safe_literal_eval(row.get("observation"), []) + if not isinstance(observation, list): + observation = [observation] + step_run = runs_by_step.get(step_no) + steps.append( + _StepComputation( + step=step_no, + reward=float(row["reward"]), + observation_text=", ".join(_format_scalar(value) for value in observation) if observation else "n/a", + action=action, + elapsed_time_sec=_step_elapsed_time(iteration_dir / str(step_no)), + is_successful=step_run.test.was_run_successful(step_run).is_successful if step_run else False, + ) + ) + + if not steps: + return None + + steps.sort(key=lambda step: step.step) + best_step = max(steps, key=lambda step: step.reward) + best_step_dump = iteration_dir / str(best_step.step) / CommandGenStrategy.TEST_RUN_DUMP_FILE_NAME + if not best_step_dump.exists(): + logging.warning(f"No test run dump found for best DSE step at {best_step_dump}") + return None + + with best_step_dump.open() as f: + test_run_details = TestRunDetails.model_validate(toml.load(f)) + + best_config_toml = toml.dumps(test_run_details.test_definition.model_dump()) + + elapsed_times = [step.elapsed_time_sec for step in steps if step.elapsed_time_sec is not None] + avg_step_duration_sec = sum(elapsed_times) / len(elapsed_times) if elapsed_times else None + total_runtime_sec = sum(elapsed_times) if elapsed_times else None + total_space = len(test_case.all_combinations) + executed_steps = len(steps) + projected_runtime_sec = avg_step_duration_sec * total_space if avg_step_duration_sec is not None else None + saved_runtime_sec = ( + max(projected_runtime_sec - total_runtime_sec, 0.0) + if projected_runtime_sec is not None and total_runtime_sec is not None + else None + ) + + metadata = load_system_metadata(iteration_dir / str(best_step.step), results_root) + gpu_arch_label = metadata.system.gpu_arch_type if metadata else None + gpu_arch_family = _normalize_gpu_family(gpu_arch_label) + gpus_per_node = getattr(system, "gpus_per_node", None) + total_gpu_hours = ( + (total_runtime_sec / 3600.0) * test_run_details.nnodes * gpus_per_node + if total_runtime_sec is not None and gpus_per_node is not None + else None + ) + projected_gpu_hours = ( + (projected_runtime_sec / 3600.0) * test_run_details.nnodes * gpus_per_node + if projected_runtime_sec is not None and gpus_per_node is not None + else None + ) + saved_gpu_hours = ( + max(projected_gpu_hours - total_gpu_hours, 0.0) + if projected_gpu_hours is not None and total_gpu_hours is not None + else None + ) + estimated_saved_cost_usd = ( + saved_gpu_hours * GPU_HOURLY_COST_USD[gpu_arch_family] + if saved_gpu_hours is not None and gpu_arch_family in GPU_HOURLY_COST_USD + else None + ) + + reduction_factor = total_space / max(executed_steps, 1) + + return DSERunSummary( + name=f"{test_case.name}-{iteration}", + saved_time=format_duration(saved_runtime_sec), + saved_gpu_hours=format_float(saved_gpu_hours, 2), + estimated_savings=format_money(estimated_saved_cost_usd), + gpu_label=gpu_arch_label or "unknown", + avg_step_runtime=format_duration(avg_step_duration_sec), + observed_runtime=format_duration(total_runtime_sec), + efficiency_ratio=f"~{format_float(reduction_factor, 1)}x", + efficiency_steps=f"{executed_steps:,} / {total_space:,} steps", + best_config_toml=best_config_toml, + parameter_rows=_build_parameter_rows(test_case.param_space, best_step.action), + reward_chart_data=_build_reward_chart_data(steps), + ) + + +def build_dse_summaries( + system: System, + results_root: Path, + loaded_test_runs: list[TestRun], + test_cases: list[TestRun], +) -> list[DSERunSummary]: + result: list[DSERunSummary] = [] + + for test_case in test_cases: + if not test_case.is_dse_job: + continue + + case_root = results_root / test_case.name + if not case_root.is_dir(): + continue + + for iteration in range(test_case.iterations): + dse_iteration_runs = [ + tr + for tr in loaded_test_runs + if tr.name == test_case.name and tr.current_iteration != iteration + ] + + iteration_dir = case_root / str(iteration) + if not iteration_dir.is_dir(): + continue + + summary = _build_iteration_summary( + system=system, + results_root=results_root, + test_case=test_case, + iteration=iteration, + iteration_dir=case_root / str(iteration), + test_runs=dse_iteration_runs, + ) + if summary is not None: + result.append(summary) + + return result diff --git a/src/cloudai/report_generator/status_report.py b/src/cloudai/report_generator/status_report.py deleted file mode 100644 index 3ce0c4943..000000000 --- a/src/cloudai/report_generator/status_report.py +++ /dev/null @@ -1,459 +0,0 @@ -# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES -# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations - -import ast -import contextlib -import logging -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any - -import toml -from pydantic import BaseModel - -from cloudai.core import CommandGenStrategy, TestRun, case_name -from cloudai.models.scenario import TestRunDetails -from cloudai.util.lazy_imports import lazy - -GPU_HOURLY_COST_USD = { - "H100": 4.50, - "B200": 8.00, - "GB200": 10.00, - "GB300": 12.00, -} - - -def format_duration(seconds: float | None) -> str: - if seconds is None: - return "n/a" - - seconds = max(float(seconds), 0.0) - if seconds < 60: - return f"{seconds:.1f}s" - - minutes, sec = divmod(round(seconds), 60) - hours, minutes = divmod(minutes, 60) - parts = [] - if hours: - parts.append(f"{hours}h") - if minutes: - parts.append(f"{minutes}m") - if sec or not parts: - parts.append(f"{sec}s") - return " ".join(parts) - - -def format_float(value: float | None, precision: int = 2) -> str: - if value is None: - return "n/a" - return f"{value:.{precision}f}" - - -def format_percent(value: float | None) -> str: - if value is None: - return "n/a" - return f"{value:.2f}%" - - -def format_money(value: float | None) -> str: - if value is None: - return "n/a" - return f"${value:,.2f}" - - -def _safe_literal_eval(raw: Any, default: Any) -> Any: - if isinstance(raw, str): - with contextlib.suppress(SyntaxError, ValueError): - return ast.literal_eval(raw) - return default - - -def _format_scalar(value: Any) -> str: - if isinstance(value, float): - return f"{value:.4f}".rstrip("0").rstrip(".") - return str(value) - - -def _normalize_gpu_family(gpu_name: str | None) -> str | None: - if not gpu_name: - return None - - upper = gpu_name.upper() - for family in GPU_HOURLY_COST_USD: - if family in upper: - return family - return None - - -def _build_reward_chart_data(steps: list["DSEStepData"]) -> dict[str, list[Any]] | None: - if not steps: - return None - - best_index = max(range(len(steps)), key=lambda idx: steps[idx].reward) - return { - "labels": [step.step for step in steps], - "rewards": [step.reward for step in steps], - "observations": [step.observation_display for step in steps], - "best_index": best_index, - } - - -def _build_effort_chart_data(executed_steps: int, total_space: int) -> dict[str, Any] | None: - if total_space <= 0: - return None - - explored_ratio = min(max(executed_steps / total_space, 0.0), 1.0) - reduction_factor = total_space / max(executed_steps, 1) - return { - "explored_ratio": explored_ratio, - "explored_percent": explored_ratio * 100.0, - "avoided_percent": max((1.0 - explored_ratio) * 100.0, 0.0), - "reduction_factor": reduction_factor, - "executed_steps": executed_steps, - "total_space": total_space, - } - - -class _ReportMetadataSystem(BaseModel): - gpu_arch_type: str - - -class _ReportMetadataSlurm(BaseModel): - node_list: str - - -class _ReportSystemMetadata(BaseModel): - system: _ReportMetadataSystem - slurm: _ReportMetadataSlurm - - -class _ReportJobMetadata(BaseModel): - elapsed_time_sec: int - - -def load_system_metadata(run_dir: Path, results_root: Path) -> _ReportSystemMetadata | None: - """Load system metadata from run_dir. At the moment it supports only Slurm.""" - metadata_path = run_dir / "metadata" - if not metadata_path.exists(): - logging.debug(f"No metadata folder found in {run_dir=}") - if not (results_root / "metadata").exists(): - logging.debug(f"No metadata folder found in {results_root=}") - return None - metadata_path = results_root / "metadata" - - node_files = list(metadata_path.glob("node-*.toml")) - if not node_files: - logging.debug(f"No node files found in {metadata_path}") - return None - - node_file = node_files[0] - with node_file.open() as f: - try: - return _ReportSystemMetadata.model_validate(toml.load(f)) - except Exception as e: - logging.debug(f"Error validating metadata for {node_file}: {e}") - - return None - - -@dataclass -class ReportItem: - """Basic report item for general systems.""" - - group_name: str - name: str - description: str - logs_path: str | None - nodes: _ReportSystemMetadata | None - status_text: str - status_class: str - is_dse: bool - - @classmethod - def from_test_runs(cls, test_runs: list[TestRun], results_root: Path) -> list["ReportItem"]: - report_items: list[ReportItem] = [] - for tr in test_runs: - tr_status = tr.test.was_run_successful(tr) - status_text = "PASSED" if tr_status.is_successful else "FAILED" - report_items.append( - ReportItem( - group_name=tr.name, - name=case_name(tr), - description=tr.test.description, - logs_path=f"./{tr.output_path.relative_to(results_root)}" if tr.output_path.exists() else None, - nodes=load_system_metadata(tr.output_path, results_root), - status_text=status_text, - status_class=status_text.lower(), - is_dse=tr.is_dse_job, - ) - ) - return report_items - - -@dataclass -class DSEStepData: - """DSE step data.""" - - step: int - reward: float - observation: list[Any] - observation_display: str - action: dict[str, Any] - elapsed_time_sec: int | None = None - is_successful: bool = False - - -@dataclass -class DSEParameterRow: - """DSE parameter row.""" - - name: str - values: list[str] - best_value: str - - -@dataclass -class DSESummary: - """Summary of a DSE iteration.""" - - name: str - description: str - iteration: int - output_root: Path - output_root_rel_path: str - total_space: int - executed_steps: int - skipped_steps: int - best_step: int | None - best_reward: float | None - avg_step_duration_sec: float | None - total_runtime_sec: float | None - saved_runtime_sec: float | None - failure_count: int - gpu_arch_label: str | None - saved_gpu_hours: float | None - estimated_saved_cost_usd: float | None - best_config_rel_path: str | None - best_config_toml: str | None - analysis_rel_path: str | None - parameter_rows: list[DSEParameterRow] = field(default_factory=list) - reward_chart_data: dict[str, list[Any]] | None = None - effort_chart_data: dict[str, Any] | None = None - - @property - def status_text(self) -> str: - if self.failure_count == 0: - return "PASSED" - if self.failure_count == self.executed_steps: - return "FAILED" - return "PARTIAL" - - @property - def status_style(self) -> str: - return { - "PASSED": "[green]PASSED[/green]", - "FAILED": "[red]FAILED[/red]", - "PARTIAL": "[yellow]PARTIAL[/yellow]", - }[self.status_text] - - -class DSEReportBuilder: - """Build DSE summaries and best-config artifacts from generated results.""" - - def __init__(self, system: Any, results_root: Path, loaded_test_runs: list[TestRun]): - self.system = system - self.results_root = results_root - self.loaded_test_runs = loaded_test_runs - - @staticmethod - def best_config_file_name(tr: TestRun) -> str: - return f"{tr.name}.toml" - - def build(self, original_test_runs: list[TestRun]) -> list[DSESummary]: - summaries: list[DSESummary] = [] - for tr in original_test_runs: - if not tr.is_dse_job: - continue - summaries.extend(self._build_for_test_run(tr)) - return summaries - - def _build_for_test_run(self, original_tr: TestRun) -> list[DSESummary]: - summaries: list[DSESummary] = [] - tr_base_dir = self.results_root / original_tr.name - if not tr_base_dir.exists(): - return summaries - - grouped_trs: dict[int, list[TestRun]] = {} - for tr in self.loaded_test_runs: - if tr.name != original_tr.name: - continue - grouped_trs.setdefault(tr.current_iteration, []).append(tr) - - iteration_dirs = sorted((d for d in tr_base_dir.iterdir() if d.is_dir()), key=lambda p: int(p.name)) - for iter_dir in iteration_dirs: - iteration = int(iter_dir.name) - summary = self._build_iteration_summary(original_tr, iteration, iter_dir, grouped_trs.get(iteration, [])) - if summary is not None: - summaries.append(summary) - return summaries - - def _build_iteration_summary( - self, - original_tr: TestRun, - iteration: int, - iter_dir: Path, - step_trs: list[TestRun], - ) -> DSESummary | None: - trajectory_file = iter_dir / "trajectory.csv" - if not trajectory_file.exists(): - logging.warning(f"No trajectory file found for {original_tr.name} at {trajectory_file}") - return None - - df = lazy.pd.read_csv(trajectory_file) - if df.empty: - return None - - steps_by_number = {tr.step: tr for tr in step_trs} - steps: list[DSEStepData] = [] - for row in df.to_dict(orient="records"): - step_no = int(row["step"]) - action = _safe_literal_eval(row.get("action"), {}) - if not isinstance(action, dict): - action = {} - observation = _safe_literal_eval(row.get("observation"), []) - if not isinstance(observation, list): - observation = [observation] - tr = steps_by_number.get(step_no) - is_successful = tr.test.was_run_successful(tr).is_successful if tr is not None else False - steps.append( - DSEStepData( - step=step_no, - reward=float(row["reward"]), - observation=observation, - observation_display=", ".join(_format_scalar(v) for v in observation) if observation else "n/a", - action=action, - elapsed_time_sec=self._step_elapsed_time(iter_dir / str(step_no)), - is_successful=is_successful, - ) - ) - - if not steps: - return None - - steps.sort(key=lambda step: step.step) - best_step_data = max(steps, key=lambda step: step.reward) - best_step_dir = iter_dir / str(best_step_data.step) - best_step_details = best_step_dir / CommandGenStrategy.TEST_RUN_DUMP_FILE_NAME - if not best_step_details.exists(): - logging.warning(f"No test run dump found for best DSE step at {best_step_details}") - return None - - with best_step_details.open() as f: - trd = TestRunDetails.model_validate(toml.load(f)) - - best_config_path = iter_dir / self.best_config_file_name(original_tr) - with best_config_path.open("w") as f: - toml.dump(trd.test_definition.model_dump(), f) - best_config_toml = toml.dumps(trd.test_definition.model_dump()) - - elapsed_times = [step.elapsed_time_sec for step in steps if step.elapsed_time_sec is not None] - avg_step_duration_sec = sum(elapsed_times) / len(elapsed_times) if elapsed_times else None - total_runtime_sec = sum(elapsed_times) if elapsed_times else None - total_space = len(original_tr.all_combinations) - executed_steps = len(steps) - skipped_steps = max(total_space - executed_steps, 0) - projected_runtime_sec = avg_step_duration_sec * total_space if avg_step_duration_sec is not None else None - saved_runtime_sec = ( - max(projected_runtime_sec - total_runtime_sec, 0.0) - if projected_runtime_sec is not None and total_runtime_sec is not None - else None - ) - - metadata = load_system_metadata(iter_dir / str(best_step_data.step), self.results_root) - gpu_arch_label = metadata.system.gpu_arch_type if metadata else None - gpu_arch_family = _normalize_gpu_family(gpu_arch_label) - num_nodes = trd.nnodes - gpus_per_node = getattr(self.system, "gpus_per_node", None) - total_gpu_hours = ( - (total_runtime_sec / 3600.0) * num_nodes * gpus_per_node - if total_runtime_sec is not None and gpus_per_node is not None - else None - ) - projected_gpu_hours = ( - (projected_runtime_sec / 3600.0) * num_nodes * gpus_per_node - if projected_runtime_sec is not None and gpus_per_node is not None - else None - ) - saved_gpu_hours = ( - max(projected_gpu_hours - total_gpu_hours, 0.0) - if projected_gpu_hours is not None and total_gpu_hours is not None - else None - ) - estimated_saved_cost_usd = ( - saved_gpu_hours * GPU_HOURLY_COST_USD[gpu_arch_family] - if saved_gpu_hours is not None and gpu_arch_family in GPU_HOURLY_COST_USD - else None - ) - - failure_count = sum(1 for step in steps if not step.is_successful) - best_action = best_step_data.action - parameter_rows = [ - DSEParameterRow( - name=name, - values=[_format_scalar(value) for value in values], - best_value=_format_scalar(best_action.get(name, "n/a")), - ) - for name, values in original_tr.param_space.items() - ] - analysis_file = iter_dir / "analysis.csv" - - return DSESummary( - name=original_tr.name, - description=original_tr.test.description, - iteration=iteration, - output_root=iter_dir, - output_root_rel_path=f"./{iter_dir.relative_to(self.results_root)}", - total_space=total_space, - executed_steps=executed_steps, - skipped_steps=skipped_steps, - best_step=best_step_data.step, - best_reward=best_step_data.reward, - avg_step_duration_sec=avg_step_duration_sec, - total_runtime_sec=total_runtime_sec, - saved_runtime_sec=saved_runtime_sec, - failure_count=failure_count, - gpu_arch_label=gpu_arch_label, - saved_gpu_hours=saved_gpu_hours, - estimated_saved_cost_usd=estimated_saved_cost_usd, - best_config_rel_path=f"./{best_config_path.relative_to(self.results_root)}", - best_config_toml=best_config_toml, - analysis_rel_path=f"./{analysis_file.relative_to(self.results_root)}" if analysis_file.exists() else None, - parameter_rows=parameter_rows, - reward_chart_data=_build_reward_chart_data(steps), - effort_chart_data=_build_effort_chart_data(executed_steps, total_space), - ) - - @staticmethod - def _step_elapsed_time(step_dir: Path) -> int | None: - slurm_job_path = step_dir / "slurm-job.toml" - if not slurm_job_path.exists(): - return None - - with slurm_job_path.open() as f: - metadata = _ReportJobMetadata.model_validate(toml.load(f)) - return metadata.elapsed_time_sec diff --git a/src/cloudai/report_generator/util.py b/src/cloudai/report_generator/util.py index ccb1af7d6..53c2c7b43 100644 --- a/src/cloudai/report_generator/util.py +++ b/src/cloudai/report_generator/util.py @@ -15,9 +15,14 @@ # limitations under the License. from __future__ import annotations +import logging +from pathlib import Path from typing import TYPE_CHECKING, List, Tuple +import toml + from cloudai.core import TestRun +from cloudai.systems.slurm import SlurmSystemMetadata from cloudai.util.lazy_imports import lazy if TYPE_CHECKING: @@ -178,3 +183,27 @@ def diff_test_runs(trs: list[TestRun]) -> dict[str, list[str]]: diff[key] = all_values return diff + + +def load_system_metadata(run_dir: Path, results_root: Path) -> SlurmSystemMetadata | None: + metadata_path = run_dir / "metadata" + if not metadata_path.exists(): + logging.debug(f"No metadata folder found in {run_dir=}") + fallback_metadata_path = results_root / "metadata" + if not fallback_metadata_path.exists(): + logging.debug(f"No metadata folder found in {results_root=}") + return None + metadata_path = fallback_metadata_path + + node_files = list(metadata_path.glob("node-*.toml")) + if not node_files: + logging.debug(f"No node files found in {metadata_path}") + return None + + with node_files[0].open() as f: + try: + return SlurmSystemMetadata.model_validate(toml.load(f)) + except Exception as exc: + logging.debug(f"Error validating metadata for {node_files[0]}: {exc}") + return None + diff --git a/src/cloudai/reporter.py b/src/cloudai/reporter.py index a754a822a..98a8713d1 100644 --- a/src/cloudai/reporter.py +++ b/src/cloudai/reporter.py @@ -17,23 +17,45 @@ import contextlib import logging import tarfile -from collections import defaultdict +from dataclasses import dataclass from pathlib import Path +from typing import Optional import jinja2 +import toml from rich import box from rich.console import Console from rich.table import Table -from cloudai.core import Reporter, TestRun -from cloudai.report_generator.status_report import ( - DSEReportBuilder, - DSESummary, - ReportItem, - format_duration, - format_float, - format_money, -) +from cloudai.util.lazy_imports import lazy + +from .core import CommandGenStrategy, Reporter, TestRun, case_name +from .models.scenario import TestRunDetails +from cloudai.report_generator.dse_report import build_dse_summaries +from cloudai.report_generator.util import load_system_metadata + + +@dataclass +class ReportItem: + """Enhanced report item for Slurm systems with node information.""" + + name: str + description: str + logs_path: Optional[str] = None + nodes: Optional[str] = None + + @classmethod + def from_test_runs(cls, test_runs: list[TestRun], results_root: Path) -> list["ReportItem"]: + report_items: list[ReportItem] = [] + for tr in test_runs: + ri = ReportItem(case_name(tr), tr.test.description) + if tr.output_path.exists(): + ri.logs_path = f"./{tr.output_path.relative_to(results_root)}" + if metadata := load_system_metadata(tr.output_path, results_root): + ri.nodes = metadata.slurm.node_list + report_items.append(ri) + + return report_items class PerTestReporter(Reporter): @@ -62,68 +84,55 @@ class StatusReporter(Reporter): """Generates HTML status reports with system-specific templates.""" @property - def templates_dir(self) -> Path: + def template_file_path(self) -> Path: return Path(__file__).parent / "util" + @property + def template_file(self) -> str: + return "general-report.jinja2" + def generate(self) -> None: self.load_test_runs() + self.generate_scenario_report() + self.report_best_dse_config() + self.print_summary() - dse_builder = DSEReportBuilder(self.system, self.results_root, self.trs) - dse_summaries = dse_builder.build(self.test_scenario.test_runs) - - self.to_html(dse_summaries) - self.to_console(dse_summaries) - - def to_html(self, dse_summaries: list[DSESummary]) -> None: - jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(self.templates_dir)) - template = jinja_env.get_template("general-report.jinja2") + def generate_scenario_report(self) -> None: + template = jinja2.Environment(loader=jinja2.FileSystemLoader(self.template_file_path)).get_template( + self.template_file + ) report_items = ReportItem.from_test_runs(self.trs, self.results_root) - dse_cases = self._build_dse_cases(dse_summaries, report_items) - dse_case_names = {case["name"] for case in dse_cases} - dse_report_items = [item for item in report_items if item.group_name in dse_case_names] - standard_report_items = [item for item in report_items if item.group_name not in dse_case_names] - report = template.render( - name=self.test_scenario.name, - report_items=standard_report_items, - dse_cases=dse_cases, - dse_report_items=dse_report_items, - format_duration=format_duration, - format_float=format_float, - format_money=format_money, - ) + report = template.render(name=self.test_scenario.name, report_items=report_items) report_path = self.results_root / f"{self.test_scenario.name}.html" with report_path.open("w") as f: f.write(report) logging.info(f"Generated scenario report at {report_path}") - def _build_dse_cases(self, dse_summaries: list[DSESummary], report_items: list[ReportItem]) -> list[dict[str, object]]: - summaries_by_name: dict[str, list[DSESummary]] = defaultdict(list) - for summary in dse_summaries: - summaries_by_name[summary.name].append(summary) - - items_by_name: dict[str, list[ReportItem]] = defaultdict(list) - for item in report_items: - if item.is_dse: - items_by_name[item.group_name].append(item) - - dse_case_names = [] + def report_best_dse_config(self): for tr in self.test_scenario.test_runs: - if tr.is_dse_job and tr.name not in dse_case_names: - dse_case_names.append(tr.name) - - return [ - { - "name": case_name, - "summaries": summaries_by_name.get(case_name, []), - "report_items": items_by_name.get(case_name, []), - } - for case_name in dse_case_names - if summaries_by_name.get(case_name) - ] - - def to_console(self, dse_summaries: list[DSESummary]): + if not tr.test.is_dse_job: + continue + + tr_root = self.results_root / tr.name / f"{tr.current_iteration}" + trajectory_file = tr_root / "trajectory.csv" + if not trajectory_file.exists(): + logging.warning(f"No trajectory file found for {tr.name} at {trajectory_file}") + continue + + df = lazy.pd.read_csv(trajectory_file) + best_step = df.loc[df["reward"].idxmax()]["step"] + best_step_details = tr_root / f"{best_step}" / CommandGenStrategy.TEST_RUN_DUMP_FILE_NAME + with best_step_details.open() as f: + trd = TestRunDetails.model_validate(toml.load(f)) + + best_config_path = tr_root / f"{tr.name}.toml" + logging.info(f"Writing best config for {tr.name} to {best_config_path}") + with best_config_path.open("w") as f: + toml.dump(trd.test_definition.model_dump(), f) + + def print_summary(self) -> None: if not self.trs: logging.debug("No test runs found, skipping summary.") return @@ -132,31 +141,6 @@ def to_console(self, dse_summaries: list[DSESummary]): for col in ["Case", "Status", "Details"]: table.add_column(col, overflow="fold") - if dse_summaries: - self._add_dse_rows(dse_summaries, table) - else: - self._add_standard_rows(table) - - console = Console() - with console.capture() as capture: - console.print(table) - - logging.info(capture.get()) - - @staticmethod - def _add_dse_rows(dse_summaries: list[DSESummary], table: Table): - for summary in dse_summaries: - details = [ - f"steps={summary.executed_steps}/{summary.total_space}", - f"best_step={summary.best_step}", - f"best_reward={format_float(summary.best_reward, 4)}", - f"failures={summary.failure_count}", - ] - if summary.best_config_rel_path: - details.append(summary.best_config_rel_path) - table.add_row(summary.description, f"[bold]{summary.status_style}[/bold]", "\n".join(details)) - - def _add_standard_rows(self, table: Table): for tr in self.trs: tr_status = tr.test.was_run_successful(tr) sts_text = f"[bold]{'[green]PASSED[/green]' if tr_status.is_successful else '[red]FAILED[/red]'}[/bold]" @@ -164,7 +148,40 @@ def _add_standard_rows(self, table: Table): with contextlib.suppress(ValueError): display_path = str(tr.output_path.absolute().relative_to(Path.cwd())) details_text = f"\n{tr_status.error_message}" if tr_status.error_message else "" - table.add_row(tr.name, sts_text, f"{display_path}{details_text}") + columns = [tr.name, sts_text, f"{display_path}{details_text}"] + table.add_row(*columns) + + console = Console() + with console.capture() as capture: + console.print(table) # doesn't print to stdout, captures only + + logging.info(capture.get()) + + +class DSEReporter(Reporter): + @property + def templates_dir(self) -> Path: + return Path(__file__).parent / "util" + + def generate(self) -> None: + self.load_test_runs() + + dse_cases = build_dse_summaries( + system=self.system, + results_root=self.results_root, + loaded_test_runs=self.trs, + test_cases=self.test_scenario.test_runs, + ) + + jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(self.templates_dir)) + template = jinja_env.get_template("dse-report.jinja2") + + report = template.render(scenario_name=self.test_scenario.name, dse_cases=dse_cases) + report_path = self.results_root / f"{self.test_scenario.name}-dse-report.html" + with report_path.open("w") as f: + f.write(report) + + logging.info(f"Generated scenario report at {report_path}") class TarballReporter(Reporter): diff --git a/src/cloudai/util/dse-report.jinja2 b/src/cloudai/util/dse-report.jinja2 new file mode 100644 index 000000000..ea7a57792 --- /dev/null +++ b/src/cloudai/util/dse-report.jinja2 @@ -0,0 +1,415 @@ +{% extends "base-report.jinja2" %} + +{% block extra_head %} +{% if dse_cases %} + +{% endif %} + +{% if dse_cases %} + +{% endif %} +{% endblock %} + +{% block content %} +{% if dse_cases %} + {% for summary in dse_cases %} +
+
+
+

{{ summary.name }}

+
+ +
+
+
Saved Time
+
{{ summary.saved_time }}
+
+
+
Saved GPU-Hours
+
{{ summary.saved_gpu_hours }}
+
+
+
Estimated Savings
+
{{ summary.estimated_savings }}
+
+
+
GPU Label
+
{{ summary.gpu_label }}
+
+
+ +
+
+
Avg Step Runtime
+
{{ summary.avg_step_runtime }}
+
+
+
Observed Runtime
+
{{ summary.observed_runtime }}
+
+
+
Exploration Efficiency
+
{{ summary.efficiency_ratio }}
+
+
+
Explored Steps
+
{{ summary.efficiency_steps }}
+
+
+ + {% if summary.best_config_toml %} +
+ + Best Config TOML + + + + +
{{ summary.best_config_toml }}
+
+ {% endif %} +
+ +
+

DSE: Exploration Space

+
Test
+ + + + + + + + {% for row in summary.parameter_rows %} + + + + + {% endfor %} + +
ParameterAllowed Values
{{ row.name }} +
+ {% for value in row.values %} + {{ value.text }} + {% endfor %} +
+
+
+ +
+

DSE: Reward Over Steps

+ {% if summary.reward_chart_data %} +
+ +
+ +

Interactive chart unavailable. Numeric report details remain available above.

+ + {% else %} +

No reward data available.

+ {% endif %} +
+
+ {% endfor %} +{% else %} +

No DSE results found.

+{% endif %} +{% endblock %} diff --git a/src/cloudai/util/general-report.jinja2 b/src/cloudai/util/general-report.jinja2 index 6393988c9..b19189ca8 100644 --- a/src/cloudai/util/general-report.jinja2 +++ b/src/cloudai/util/general-report.jinja2 @@ -1,766 +1,28 @@ {% extends "base-report.jinja2" %} -{% block extra_head %} -{% if dse_cases %} - -{% endif %} - -{% if dse_cases %} - -{% endif %} -{% endblock %} - {% block content %} -{% if dse_cases %} -
-
-

DSE Cases

-

Switch between DSE test cases in this scenario.

-
-
-
- {% for case in dse_cases %} - - {% endfor %} -
- - {% for case in dse_cases %} -
- {% set case_idx = loop.index0 %} - {% for summary in case.summaries %} -
-
-
-

DSE: overview

- {{ summary.status_text }} -
- -
-
-
-
-
-
Saved Time
-
{{ format_duration(summary.saved_runtime_sec) }}
-
-
-
Saved GPU-Hours
-
{{ format_float(summary.saved_gpu_hours, 2) }}
-
-
- -
-
-
Estimated Savings
-
{{ format_money(summary.estimated_saved_cost_usd) }}
-
-
GPU Label{{ summary.gpu_arch_label or "unknown" }}
-
- -
-
Avg Step Runtime{{ format_duration(summary.avg_step_duration_sec) }}
-
Observed Runtime{{ format_duration(summary.total_runtime_sec) }}
-
-
-
- {% if summary.effort_chart_data %} -
-
-

Exploration Efficiency

-
-
-
-
~{{ format_float(summary.effort_chart_data["reduction_factor"], 1) }}x
-
reduction in search space
-
-
{{ "{:,}".format(summary.effort_chart_data["executed_steps"]) }} / {{ "{:,}".format(summary.effort_chart_data["total_space"]) }} steps
-
-
- {% endif %} -
- - {% if summary.best_config_toml %} -
- - Best Config TOML - - {% if summary.analysis_rel_path %}BO Analysis{% endif %} - - - -
{{ summary.best_config_toml }}
-
- {% endif %} -
- -
-

DSE: Exploration Space

- - - - - - - - - {% for row in summary.parameter_rows %} - - - - - {% endfor %} - -
ParameterAllowed Values
{{ row.name }} -
- {% for value in row.values %} - {{ value }} - {% endfor %} -
-
-
- -
-

DSE: Reward Over Steps

- {% if summary.reward_chart_data %} -
- -
- -

Interactive chart unavailable. Step count and summary metrics remain available above.

- - {% else %} -

No reward data available.

- {% endif %} -
-
- {% endfor %} - -
- {% endfor %} -
-
-{% endif %} - -{% if dse_report_items %} -
-

All Steps

- - - - - - - - - - - {% for item in dse_report_items %} - - - - - - - {% endfor %} - -
CaseStatusResultsNodes
{{ item.name }} - {% if item.status_text %} - {{ item.status_text }} - {% else %} - unknown - {% endif %} - - {% if item.logs_path %} - logs - {% else %} - no logs - {% endif %} - - {% if item.nodes is not none %} - {{ item.nodes.slurm.node_list }} - {% else %} - no nodes information - {% endif %} -
-
-{% endif %} - -{% if report_items %} - - - - - - {% if report_items | selectattr('nodes') | first is not none %} - - {% endif %} - - {% for item in report_items %} - - - - {% if item.logs_path %} - - {% else %} - - {% endif %} - {% if item.nodes is not none %} - - {% else %} - - {% endif %} - - {% endfor %} -
TestDescriptionResultsNodes
{{ item.name }}{{ item.description }}logsno logs{{ item.nodes }}no nodes information
-{% endif %} -{% endblock %} + + + + + + + + {% for item in report_items %} + + + + {% if item.logs_path %} + + {% else %} + + {% endif %} + {% if item.nodes %} + + {% else %} + + {% endif %} + + {% endfor %} +
TestDescriptionResultsNodes
{{ item.name }}{{ item.description }}logsno logs{{ item.nodes }}No nodes info
+{% endblock %} \ No newline at end of file diff --git a/tests/test_reporter.py b/tests/test_reporter.py index a38718a72..bb1d3ff3b 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -229,7 +229,7 @@ def test_template_file_path(system: System) -> None: reporter = StatusReporter( system, TestScenario(name="test_scenario", test_runs=[]), system.output_path, ReportConfig() ) - assert (reporter.templates_dir / "general-report.jinja2").exists() + assert (reporter.templates_dir / "dse-report.jinja2").exists() MY_REPORT_CALLED = 0 @@ -453,7 +453,6 @@ def test_dse_summary_and_best_config_artifacts(slurm_system: SlurmSystem, slurm_ assert summary.saved_runtime_sec == pytest.approx(100.0) assert summary.saved_gpu_hours == pytest.approx((100.0 / 3600.0) * 16) assert summary.estimated_saved_cost_usd == pytest.approx((summary.saved_gpu_hours or 0) * 4.5) - assert summary.analysis_rel_path is not None assert summary.best_config_rel_path == f"./{dse_tr.name}/0/{dse_tr.name}.toml" assert summary.reward_chart_data is not None assert summary.reward_chart_data["labels"] == [1, 2, 3] From 5c887a5356a899d27986ec9964a5a22f9d69c945 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Tue, 24 Mar 2026 22:16:42 +0100 Subject: [PATCH 17/30] we're back to working state --- src/cloudai/report_generator/dse_report.py | 12 +- src/cloudai/util/dse-report.jinja2 | 292 ++++++++++++++------- 2 files changed, 202 insertions(+), 102 deletions(-) diff --git a/src/cloudai/report_generator/dse_report.py b/src/cloudai/report_generator/dse_report.py index 22f981a14..eb51d82b5 100644 --- a/src/cloudai/report_generator/dse_report.py +++ b/src/cloudai/report_generator/dse_report.py @@ -61,7 +61,7 @@ class DSEStepRow: @dataclass(frozen=True) -class DSERunSummary: +class DSECaseIterationSummary: name: str saved_time: str saved_gpu_hours: str @@ -184,7 +184,7 @@ def _build_iteration_summary( iteration: int, iteration_dir: Path, test_runs: list[TestRun], -) -> DSERunSummary | None: +) -> DSECaseIterationSummary | None: trajectory_file = iteration_dir / "trajectory.csv" if not trajectory_file.is_file(): logging.warning(f"No trajectory file found for {test_case.name} at {trajectory_file}") @@ -271,7 +271,7 @@ def _build_iteration_summary( reduction_factor = total_space / max(executed_steps, 1) - return DSERunSummary( + return DSECaseIterationSummary( name=f"{test_case.name}-{iteration}", saved_time=format_duration(saved_runtime_sec), saved_gpu_hours=format_float(saved_gpu_hours, 2), @@ -292,8 +292,8 @@ def build_dse_summaries( results_root: Path, loaded_test_runs: list[TestRun], test_cases: list[TestRun], -) -> list[DSERunSummary]: - result: list[DSERunSummary] = [] +) -> list[DSECaseIterationSummary]: + result: list[DSECaseIterationSummary] = [] for test_case in test_cases: if not test_case.is_dse_job: @@ -307,7 +307,7 @@ def build_dse_summaries( dse_iteration_runs = [ tr for tr in loaded_test_runs - if tr.name == test_case.name and tr.current_iteration != iteration + if tr.name == test_case.name and tr.current_iteration == iteration ] iteration_dir = case_root / str(iteration) diff --git a/src/cloudai/util/dse-report.jinja2 b/src/cloudai/util/dse-report.jinja2 index ea7a57792..f0157d08f 100644 --- a/src/cloudai/util/dse-report.jinja2 +++ b/src/cloudai/util/dse-report.jinja2 @@ -6,7 +6,68 @@ {% endif %}