Rf/visualize training (#106)

0xideas · web-flow · commit 2131b9741e99 · 2026-02-27T15:50:12.000+01:00
* Rf visualize training

* Adjust single hover styling

* Fix legend

* Add beartype annotaiton to visualize_training
diff --git a/README.md b/README.md
@@ -201,7 +201,7 @@ Please cite with:
   title = {sequifier - causal transformer models for multivariate sequence modelling},
   year = {2025},
   publisher = {GitHub},
-  version = {v1.1.0.4},
+  version = {v1.1.0.5},
   url = {https://github.com/0xideas/sequifier}
 }
 ```
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -15,7 +15,7 @@
 project = 'sequifier'
 copyright = '2025, Leon Luithlen'
 author = 'Leon Luithlen'
-release = 'v1.1.0.4'
+release = 'v1.1.0.5'
 html_baseurl = 'https://www.sequifier.com/'
 
 # -- General configuration ---------------------------------------------------
diff --git a/documentation/consolidated-docs.md b/documentation/consolidated-docs.md
@@ -201,7 +201,7 @@ Please cite with:
   title = {sequifier - causal transformer models for multivariate sequence modelling},
   year = {2025},
   publisher = {GitHub},
-  version = {v1.1.0.4},
+  version = {v1.1.0.5},
   url = {https://github.com/0xideas/sequifier}
 }
 ```
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "sequifier"
-version = "v1.1.0.4"
+version = "v1.1.0.5"
 authors = [
     { name = "Leon Luithlen", email = "leontimnaluithlen@gmail.com" },
 ]
diff --git a/src/sequifier/visualize_training.py b/src/sequifier/visualize_training.py
@@ -6,7 +6,9 @@
 from typing import Any, Optional
 
 import numpy as np
+import plotly.colors as pc  # Added to fetch consistent colors
 import plotly.graph_objects as go
+from beartype import beartype
 
 # Import Loguru and your custom logger config
 from loguru import logger
@@ -72,6 +74,7 @@ def __init__(self, model_name: str):
         self.expected_num_batches: Optional[int] = None
         self.pending_var_loss_epoch: Optional[int] = None
 
+    @beartype
     def parse_file(self, log_file: str) -> TrainingMetrics:
         with open(log_file, "r") as f:
             for line_num, line in enumerate(f, 1):
@@ -83,6 +86,7 @@ def parse_file(self, log_file: str) -> TrainingMetrics:
         self._validate_final_metrics()
         return self.metrics
 
+    @beartype
     def _process_line(self, line: str) -> None:
         """Routes the line to the appropriate sub-parser based on strict string matching."""
         if "[INFO] Validation | Epoch:" in line:
@@ -94,6 +98,7 @@ def _process_line(self, line: str) -> None:
         elif "[INFO] Epoch" in line or "[INFO] Validation" in line:
             self.pending_var_loss_epoch = None
 
+    @beartype
     def _process_validation(self, line: str) -> None:
         match = VAL_PATTERN.search(line)
         if not match:
@@ -115,6 +120,7 @@ def _process_validation(self, line: str) -> None:
         self.metrics.baseline_losses[epoch] = baseline
         self.pending_var_loss_epoch = epoch
 
+    @beartype
     def _process_var_loss(self, line: str) -> None:
         match = VAR_PATTERN.search(line)
         if not match:
@@ -138,6 +144,7 @@ def _process_var_loss(self, line: str) -> None:
 
         self.pending_var_loss_epoch = None
 
+    @beartype
     def _process_training(self, line: str) -> None:
         match = TRAIN_PATTERN.search(line)
         if not match:
@@ -165,6 +172,7 @@ def _process_training(self, line: str) -> None:
 
         self.metrics.train_losses[epoch][batch] = (num_batches, loss)
 
+    @beartype
     def _validate_chronology(self, epoch: int, batch: int, num_batches: int) -> None:
         if self.current_epoch is not None and self.current_batch is not None:
             if epoch == self.current_epoch and batch <= self.current_batch:
@@ -219,12 +227,14 @@ def _validate_final_metrics(self) -> None:
 # -------------------------------------------------------------------------
 # Utility Functions
 # -------------------------------------------------------------------------
+@beartype
 def parse_number(val: str) -> float:
     """Strictly parse numbers, explicitly handling the 'NaN' strings."""
     val = val.strip()
     return np.nan if val == "NaN" else float(val)
 
 
+@beartype
 def parse_args_to_models(args: argparse.Namespace) -> list[str]:
     """Extracts the list of models from a file or comma-separated string."""
     if os.path.isfile(args.models) and args.models.endswith(".txt"):
@@ -235,6 +245,7 @@ def parse_args_to_models(args: argparse.Namespace) -> list[str]:
     return [m.strip() for m in args.models.split(",") if m.strip()]
 
 
+@beartype
 def get_log_filepath(args: argparse.Namespace, model: str) -> str:
     """Finds the appropriate log file for a given model."""
     log_pattern = os.path.join(
@@ -256,6 +267,7 @@ def get_log_filepath(args: argparse.Namespace, model: str) -> str:
     return log_files[0]
 
 
+@beartype
 def format_plot_data(
     metrics: TrainingMetrics, bucket_batches: Optional[int], model: str
 ) -> dict[str, Any]:
@@ -319,6 +331,7 @@ def format_plot_data(
 # -------------------------------------------------------------------------
 # Plotting & Reporting
 # -------------------------------------------------------------------------
+@beartype
 def _generate_single_model_plot(
     model: str, data: dict[str, Any], yaxis_type: str, out_path: str
 ) -> None:
@@ -334,14 +347,22 @@ def _generate_single_model_plot(
 
     fig.add_trace(
         go.Scatter(
-            x=data["val_x"], y=data["val_y"], mode="lines", name="Validation Loss"
+            x=data["val_x"],
+            y=data["val_y"],
+            mode="lines",
+            name="Validation Loss",
+            hovertemplate=f"<b>{model}</b><br>Val Loss: %{{y}}<br>Epoch: %{{x}}<extra></extra>",
         ),
         row=1,
         col=1,
     )
     fig.add_trace(
         go.Scatter(
-            x=data["train_x"], y=data["train_y"], mode="lines", name="Training Loss"
+            x=data["train_x"],
+            y=data["train_y"],
+            mode="lines",
+            name="Training Loss",
+            hovertemplate=f"<b>{model}</b><br>Train Loss: %{{y}}<br>Epoch: %{{x}}<extra></extra>",
         ),
         row=1,
         col=1,
@@ -355,6 +376,7 @@ def _generate_single_model_plot(
                 mode="lines",
                 name="Baseline Loss",
                 line=dict(dash="dash"),
+                hovertemplate=f"<b>{model}</b><br>Baseline Loss: %{{y}}<br>Epoch: %{{x}}<extra></extra>",
             ),
             row=1,
             col=1,
@@ -376,7 +398,15 @@ def _generate_single_model_plot(
                 for e in epochs
             ]
             fig.add_trace(
-                go.Scatter(x=epochs, y=y_norm, mode="lines", name=var), row=1, col=2
+                go.Scatter(
+                    x=epochs,
+                    y=y_norm,
+                    mode="lines",
+                    name=var,
+                    hovertemplate=f"<b>{var}</b>: %{{y}}<br>Epoch: %{{x}}<extra></extra>",
+                ),
+                row=1,
+                col=2,
             )
 
         fig.update_xaxes(title_text="Epoch", dtick=1, row=1, col=2)
@@ -393,6 +423,7 @@ def _generate_single_model_plot(
     logger.info(f"Visualization HTML generated and saved successfully to {out_path}")
 
 
+@beartype
 def _generate_multi_model_plot(
     models: list[str], all_data: dict[str, Any], yaxis_type: str, out_path: str
 ) -> None:
@@ -401,22 +432,39 @@ def _generate_multi_model_plot(
         rows=1, cols=2, subplot_titles=("Validation Losses", "Training Losses")
     )
     baseline_val = None
+    colors = pc.qualitative.Plotly  # Load Plotly's default distinct color array
 
-    for model in models:
+    for i, model in enumerate(models):
         data = all_data[model]
+        color = colors[i % len(colors)]  # Cycle colors if models exceed palette limit
+
+        # Validation trace
         fig.add_trace(
             go.Scatter(
-                x=data["val_x"], y=data["val_y"], mode="lines", name=f"{model} Val Loss"
+                x=data["val_x"],
+                y=data["val_y"],
+                mode="lines",
+                name=model,
+                legendgroup=model,
+                line=dict(color=color),
+                showlegend=True,  # Only show validation trace in legend to prevent duplicates
+                hovertemplate=f"<b>{model}</b><br>Val Loss: %{{y}}<br>Epoch: %{{x}}<extra></extra>",
             ),
             row=1,
             col=1,
         )
+
+        # Training trace
         fig.add_trace(
             go.Scatter(
                 x=data["train_x"],
                 y=data["train_y"],
                 mode="lines",
-                name=f"{model} Train Loss",
+                name=model,
+                legendgroup=model,
+                line=dict(color=color),
+                showlegend=False,  # Hidden from legend, but linked via legendgroup
+                hovertemplate=f"<b>{model}</b><br>Train Loss: %{{y}}<br>Epoch: %{{x}}<extra></extra>",
             ),
             row=1,
             col=2,
@@ -433,20 +481,20 @@ def _generate_multi_model_plot(
                 )
 
     if baseline_val is not None:
-        max_x = max(
-            [max(all_data[m]["train_x"]) for m in models if all_data[m]["train_x"]]
-            + [0]
+        # Plot baseline on the Validation subplot (col=1)
+        max_val_x = max(
+            [max(all_data[m]["val_x"]) for m in models if all_data[m]["val_x"]] + [0]
         )
         fig.add_trace(
             go.Scatter(
-                x=[0, max_x],
+                x=[0, max_val_x],
                 y=[baseline_val, baseline_val],
                 mode="lines",
                 name="Baseline Loss",
-                line=dict(dash="dash"),
+                line=dict(dash="dash", color="black"),
             ),
             row=1,
-            col=2,
+            col=1,
         )
 
     fig.update_xaxes(title_text="Epoch", dtick=1, row=1, col=1)
@@ -459,6 +507,7 @@ def _generate_multi_model_plot(
     logger.info(f"Visualization HTML generated and saved successfully to {out_path}")
 
 
+@beartype
 def generate_html_report(
     all_data: dict[str, Any], models: list[str], args: argparse.Namespace
 ) -> None:
@@ -480,6 +529,7 @@ def generate_html_report(
 # -------------------------------------------------------------------------
 # Orchestrator
 # -------------------------------------------------------------------------
+@beartype
 def visualize_training(args: argparse.Namespace) -> None:
     """Main orchestrator function."""
     models = parse_args_to_models(args)

Original file line number	Diff line number	Diff line change
`@@ -201,7 +201,7 @@ Please cite with:`
`201`	`201`	`title = {sequifier - causal transformer models for multivariate sequence modelling},`
`202`	`202`	`year = {2025},`
`203`	`203`	`publisher = {GitHub},`
`204`		`- version = {v1.1.0.4},`
	`204`	`+ version = {v1.1.0.5},`
`205`	`205`	`url = {https://github.com/0xideas/sequifier}`
`206`	`206`	`}`
`207`	`207`	```
Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"`
`4`	`4`
`5`	`5`	`[project]`
`6`	`6`	`name = "sequifier"`
`7`		`-version = "v1.1.0.4"`
	`7`	`+version = "v1.1.0.5"`
`8`	`8`	`authors = [`
`9`	`9`	`{ name = "Leon Luithlen", email = "leontimnaluithlen@gmail.com" },`
`10`	`10`	`]`