From a60f40a065cf671567e930d18e16a8eb01709094 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Wed, 11 Mar 2026 10:44:32 -0700 Subject: [PATCH 01/19] Utilities for comparing observed to simulated visits --- schedview/compute/__init__.py | 10 ++ schedview/compute/comparesim.py | 233 ++++++++++++++++++++++++++++++++ 2 files changed, 243 insertions(+) create mode 100644 schedview/compute/comparesim.py diff --git a/schedview/compute/__init__.py b/schedview/compute/__init__.py index 185ebcba..bc78c144 100644 --- a/schedview/compute/__init__.py +++ b/schedview/compute/__init__.py @@ -16,7 +16,11 @@ "compute_scalar_metric_at_one_mjd", "compute_scalar_metric_at_mjds", "compute_mixed_scalar_metric", + "compute_obs_sim_offsets", + "compute_offset_stats", + "offsets_of_coord_band", "often_repeated_fields", + "assign_field_hpids", "count_visits_by_sim", "match_visits_across_sims", "compute_matched_visit_delta_statistics", @@ -25,6 +29,12 @@ from .astro import compute_sun_moon_positions, convert_evening_date_to_night_of_survey, night_events from .camera import LsstCameraFootprintPerimeter +from .comparesim import ( + assign_field_hpids, + compute_obs_sim_offsets, + compute_offset_stats, + offsets_of_coord_band, +) from .multisim import ( compute_matched_visit_delta_statistics, count_visits_by_sim, diff --git a/schedview/compute/comparesim.py b/schedview/compute/comparesim.py new file mode 100644 index 00000000..6ad5b6c2 --- /dev/null +++ b/schedview/compute/comparesim.py @@ -0,0 +1,233 @@ +from functools import partial +from typing import Optional, Tuple + +import astropy.units as u +import healpy as hp +import numpy as np +import pandas as pd +from astropy.coordinates import Angle, SkyCoord + +from .multisim import match_visits_across_sims + + +def assign_field_hpids( + simulated_visits: pd.DataFrame, + completed_visits: pd.DataFrame, + nside: int, + coord_match_tolerance_deg: float, + *, + ra_col: str = "fieldRA", + decl_col: str = "fieldDec", + hpid_col: str = "fieldHpid", + inplace: bool = False, +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Populate an id column for simulated and completed visits and + propagate the id from the nearest simulated healpix to completed + visits that lie within a specified angular tolerance. + + Parameters + ---------- + simulated_visits : `pd.DataFrame` + Simulated visit table; must contain columns identified by ``ra_col`` + and ``dec_col`` (both in degrees). An integer column named + ``hpid_col`` will be added or overwritten. + + completed_visits : `pd.DataFrame` + Completed visit table; same column requirements as + ``simulated_visits``. + + field_hpix_nside : `int` + HEALPix nside that defines the pixel resolution. + + field_coord_tolerance_deg : `float` + Maximum angular separation (deg) for a completed visit to be considered + a match to a simulated healpix center. + + ra_col : str, optional, default ``"fieldRA"`` + Column name for right‑ascension values (degrees). + + dec_col : str, optional, default ``"fieldDec"`` + Column name for declination values (degrees). + + hpid_col : str, optional, default ``"fieldHpid"`` + Column name that will store the healpix index. + + inplace : bool, optional, default ``False`` + If ``True`` the input DataFrames are modified in place. If ``False`` + (default) shallow copies are made so the originals stay untouched. + + Returns + ------- + result : Tuple[pd.DataFrame, pd.DataFrame] + ``(simulated_visits_out, completed_visits_out)`` the (possibly copied) + DataFrames with the ``hpid_col`` column populated. + """ + + if not inplace: + simulated_visits = simulated_visits.copy() + completed_visits = completed_visits.copy() + + simulated_visits[hpid_col] = hp.ang2pix( + nside=nside, + theta=simulated_visits[ra_col], + phi=simulated_visits[decl_col], + lonlat=True, + ) + + # Values assigned here will be replaced by closest matches + # in simulated_visit[hpid_col] if there are any within + # field_coord_tolerance_deg + completed_visits[hpid_col] = hp.ang2pix( + nside=nside, + theta=completed_visits[ra_col], + phi=completed_visits[decl_col], + lonlat=True, + ) + + # Find all hpids actually used in the simulation. + sim_hpids = simulated_visits[hpid_col].unique() + sim_hp_ra, sim_hp_dec = hp.pix2ang(nside=nside, ipix=sim_hpids, lonlat=True) + sim_hp_coords = SkyCoord(ra=sim_hp_ra * u.deg, dec=sim_hp_dec * u.deg, frame="icrs") + + # Match completed visits to the nearest hpid actually used in simulations. + completed_coords = SkyCoord( + ra=completed_visits[ra_col].values * u.deg, + dec=completed_visits[decl_col].values * u.deg, + frame="icrs", + ) + sim_hp_match_idx, sim_hp_sep, _ = completed_coords.match_to_catalog_sky(sim_hp_coords) + + # Apply tolerance mask and replace the healpix id where appropriate. + within_tol = sim_hp_sep.deg < coord_match_tolerance_deg + completed_visits.loc[within_tol, hpid_col] = sim_hpids[sim_hp_match_idx[within_tol]] + + return simulated_visits, completed_visits + + +def offsets_of_coord_band(sim_index: int, visits: pd.DataFrame, obs_index: int = 0) -> pd.DataFrame: + """ + Compute the time offset between a set of observations and a + single simulated visit ``sim_index`` for a given (fieldHpid, band) + coordinate pair. + + Parameters + ---------- + sim_index : int + The simulation index to compare against the observation (index 0). + visits : pd.DataFrame + Table of visits that contains at least the columns *sim_index* and + *start_timestamp*. + + Returns + ------- + pd.DataFrame + A DataFrame with columns ``obs_time``, ``sim_time`` and ``delta`` and + an index level ``sim_index``. + """ + + offsets = match_visits_across_sims(visits.set_index("sim_index").start_timestamp, (obs_index, sim_index)) + + # Normalize the order and sense of the offset + if offsets.columns[0] == obs_index: + offsets["delta"] = -1 * offsets["delta"] + else: + offsets = offsets[[obs_index, sim_index, "delta"]] + + assert offsets.columns[0] == obs_index + assert offsets.columns[1] == sim_index + assert offsets.columns[2] == "delta" + assert len(offsets.columns) == 3 + offsets.columns = ["obs_time", "sim_time", "delta"] + offsets["sim_index"] = sim_index + offsets = offsets.set_index("sim_index") + + return offsets + + +def compute_obs_sim_offsets( + visits: pd.DataFrame, + obs_index: int = 0, +) -> pd.DataFrame: + """ + Build a table of offsets for all simulated/completed pairs of visits. + + Parameters + ---------- + visits : `pd.DataFrame` + Table of visits with at least the columns ``sim_index``, + ``fieldHpid`` and ``band``. + obs_index : `int`, optional + ``sim_index`` value for completed (observed) visits (default = 0). + + Returns + ------- + sim_offsets : `pd.DataFrame` + Offsets for every simulated visit, indexed by ``sim_index``, + ``fieldHpid`` and ``band``. + """ + sim_indexes = visits.sim_index.unique() + sim_indexes = sim_indexes[sim_indexes != obs_index] + + offsets = pd.concat( + visits.groupby(["fieldHpid", "band"]).apply(partial(offsets_of_coord_band, i)) for i in sim_indexes + ) + offsets.index = offsets.index.reorder_levels(["sim_index", "fieldHpid", "band"]) + return offsets + + +def compute_offset_stats( + offsets: pd.DataFrame, + visits: Optional[pd.DataFrame] = None, + hhmmss: bool = False, +) -> pd.DataFrame: + """ + Produce a summary table of time offsets between completed and simulated + visits. + + Parameters + ---------- + offsets : `pd.DataFrame` + Result of :func:`compute_obs_sim_offsets`. Must contain a ``delta`` + column and a ``sim_index`` level in the index. + visits : `pd.DataFrame`, optional + The original visits table, required only if observation counts or + labels should be included in the returned ``DataFrame``. + hhmmss : `bool`, optional + If ``True``, convert the numeric statistics (mean, std, etc.) from + seconds to an ``HH:MM:SS`` string representation. + + Returns + ------- + offset_stats : `pd.DataFrame` + A table where each row corresponds to a ``sim_index`` and columns + include match counts, MAD, and the usual descriptive statistics. + """ + abs_delta = np.abs(offsets["delta"]) + abs_delta.name = "abs_delta" + + offset_stats = offsets.groupby("sim_index")["delta"].describe() + offset_stats.insert(0, "match count", offset_stats["count"].astype(int)) + offset_stats.insert(1, "MAD", abs_delta.to_frame().groupby("sim_index")["abs_delta"].median()) + + if visits is not None: + visit_counts = visits.groupby("sim_index").agg({"label": "count"}) + offset_stats.insert( + 0, "obs count", np.full_like(offset_stats["count"], visit_counts.loc[0]).astype(int) + ) + offset_stats.insert(1, "sim count", visit_counts) + offset_stats.insert(3, "#match/#obs", (offset_stats["count"] / offset_stats["obs count"]).round(2)) + offset_stats.insert(4, "#match/#sim", (offset_stats["count"] / offset_stats["sim count"]).round(2)) + + offset_stats.drop(columns="count", inplace=True) + + if hhmmss: + for column in ["MAD", "mean", "std", "min", "25%", "50%", "75%", "max"]: + offset_stats.loc[:, column] = Angle( + (offset_stats.loc[:, column].astype(int).values / 3600) * u.hour + ).to_string(unit=u.hour, sep=":") + + if visits is not None and "label" in visits.columns: + offset_stats.insert(0, "label", visits.groupby("sim_index")["label"].first().to_frame()) + + return offset_stats From bb94e962ef459f629aac184856159dad2a49cc85 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Wed, 11 Mar 2026 11:23:23 -0700 Subject: [PATCH 02/19] Add compute_common_fraction --- schedview/compute/__init__.py | 2 ++ schedview/compute/comparesim.py | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/schedview/compute/__init__.py b/schedview/compute/__init__.py index bc78c144..a8e7281c 100644 --- a/schedview/compute/__init__.py +++ b/schedview/compute/__init__.py @@ -10,6 +10,7 @@ "make_unique_survey_name", "make_scheduler_summary_df", "make_survey_reward_df", + "compute_common_fractions", "compute_maps", "compute_metric_by_visit", "compute_hpix_metric_in_bands", @@ -31,6 +32,7 @@ from .camera import LsstCameraFootprintPerimeter from .comparesim import ( assign_field_hpids, + compute_common_fractions, compute_obs_sim_offsets, compute_offset_stats, offsets_of_coord_band, diff --git a/schedview/compute/comparesim.py b/schedview/compute/comparesim.py index 6ad5b6c2..0cfd7a9a 100644 --- a/schedview/compute/comparesim.py +++ b/schedview/compute/comparesim.py @@ -7,7 +7,7 @@ import pandas as pd from astropy.coordinates import Angle, SkyCoord -from .multisim import match_visits_across_sims +from .multisim import fraction_common, match_visits_across_sims def assign_field_hpids( @@ -231,3 +231,18 @@ def compute_offset_stats( offset_stats.insert(0, "label", visits.groupby("sim_index")["label"].first().to_frame()) return offset_stats + + +def compute_common_fractions(visit_counts: pd.DataFrame, sim_labels: pd.Series, obs_index: int = 0): + fraction_obs_simulated = partial(fraction_common, visit_counts, sim2=obs_index, match_count=False) + fraction_sim_observed = partial(fraction_common, visit_counts, obs_index, match_count=False) + fraction_obs_simulated_num = partial(fraction_common, visit_counts, sim2=obs_index, match_count=True) + fraction_sim_observed_num = partial(fraction_common, visit_counts, obs_index, match_count=True) + + sim_indexes = pd.Series(sim_labels.index.values[sim_labels.index.values != obs_index]) + fraction_completed = sim_labels.loc[sim_indexes, :].copy() + fraction_completed["frac_obs_sim_num"] = sim_indexes.apply(fraction_obs_simulated_num).round(2).values + fraction_completed["frac_sim_obs_num"] = sim_indexes.apply(fraction_sim_observed_num).round(2).values + fraction_completed["frac_obs_sim"] = sim_indexes.apply(fraction_obs_simulated).round(2).values + fraction_completed["frac_sim_obs"] = sim_indexes.apply(fraction_sim_observed).round(2).values + return fraction_completed From 0934cb463293ac7b55be7cf297c8e054933df262 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Wed, 11 Mar 2026 15:07:46 -0700 Subject: [PATCH 03/19] fade sims in visit_vs_time plot --- schedview/plot/visits.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/schedview/plot/visits.py b/schedview/plot/visits.py index 19eb0b7f..d4cc9c42 100644 --- a/schedview/plot/visits.py +++ b/schedview/plot/visits.py @@ -251,7 +251,8 @@ def plot_visit_param_vs_time( source.data["label"] = np.full_like(source.data["start_timestamp"], "") if "sim_alpha" not in source.data: - source.data["sim_alpha"] = np.full_like(source.data["start_timestamp"], 1.0, dtype=np.float64) + source.data["sim_alpha"] = np.full_like(source.data["start_timestamp"], 0.2, dtype=np.float64) + source.data["sim_alpha"][source.data["label"] == "Completed"] = 1.0 scatter_kwargs = {"fill_alpha": 0.0, "line_alpha": "sim_alpha", "name": "timeline"} @@ -413,7 +414,7 @@ def plot_visit_param_vs_time( if (source.data['label'][i] === 'Completed') { source.data['sim_alpha'][i] = 1.0; } else if (['All', source.data['label'][i]].includes(this.value)) { - source.data['sim_alpha'][i] = 0.8; + source.data['sim_alpha'][i] = 0.2; } else { source.data['sim_alpha'][i] = 0.0; } From fba1f38532ff435cd1b06960c0b6b93bfc0f24c4 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Wed, 11 Mar 2026 15:08:57 -0700 Subject: [PATCH 04/19] add plot_obs_vs_sim_time --- schedview/plot/__init__.py | 2 + schedview/plot/comparesim.py | 87 ++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 schedview/plot/comparesim.py diff --git a/schedview/plot/__init__.py b/schedview/plot/__init__.py index 60025125..fc37d39f 100644 --- a/schedview/plot/__init__.py +++ b/schedview/plot/__init__.py @@ -3,6 +3,7 @@ "plot_infeasible", "plot_airmass_vs_time", "plot_alt_vs_time", + "plot_obs_vs_sim_time", "plot_polar_alt_az", "plot_survey_rewards", "create_survey_reward_plot", @@ -42,6 +43,7 @@ from .cadence import create_cadence_plot from .colors import LIGHT_EXTRA_COLORS, LIGHT_PLOT_BAND_COLORS, PLOT_BAND_COLORS, make_band_cmap +from .comparesim import plot_obs_vs_sim_time from .multisim import generate_sim_indicators, overplot_kernel_density_estimates from .nightbf import plot_infeasible, plot_rewards from .nightly import plot_airmass_vs_time, plot_alt_vs_time, plot_polar_alt_az diff --git a/schedview/plot/comparesim.py b/schedview/plot/comparesim.py new file mode 100644 index 00000000..c4115df7 --- /dev/null +++ b/schedview/plot/comparesim.py @@ -0,0 +1,87 @@ +from typing import List, Optional, Tuple + +import bokeh +import bokeh.layouts +import bokeh.models +import bokeh.plotting +import numpy as np +import pandas as pd + +from schedview.plot.colors import make_band_cmap + + +def plot_obs_vs_sim_time( + offsets: pd.DataFrame, tooltips: List[Tuple[str, str]], plot: Optional[bokeh.plotting.figure] = None +) -> bokeh.models.UIElement: + """ + Plot observation times vs simulation times for visits. + + Parameters + ---------- + offsets : `pd.DataFrame` + DataFrame containing visit timing information with columns including + 'obs_time', 'sim_time', 'sim_index', and 'label' as well as any + columns needed for the tooltips. + tooltips : `List[Tuple[str, str]]` + List of tuples defining the tooltip content for hover interactions. + plot : `bokeh.plotting.figure`, optional + Existing bokeh figure to use. If ``None``, a new figure is created. + + Returns + ------- + obs_vs_sim_time_plot : `bokeh.models.UIElement` + A bokeh UI element containing the simulation selector and the plot. + """ + if plot is None: + plot = bokeh.plotting.figure(frame_width=1024, frame_height=512) + + offsets_plot_df = offsets.reset_index().set_index("sim_index") + offsets_plot_df.reset_index(inplace=True) + default_sim_id = offsets_plot_df.sim_index.min() + offsets_plot_df["sim_alpha"] = np.where(offsets_plot_df.sim_index == default_sim_id, 1, 0) + + source = bokeh.models.ColumnDataSource(offsets_plot_df) + + matching_line = bokeh.models.Slope(gradient=1, y_intercept=0, line_color="gray", line_width=1) + plot.add_layout(matching_line) + + scatter_renderer = plot.scatter( + "sim_time", "obs_time", color=make_band_cmap("band"), alpha="sim_alpha", source=source + ) + plot.xaxis[0].formatter = bokeh.models.DatetimeTickFormatter(hours="%H:%M") + plot.xaxis[0].axis_label = "visit start time in simulation" + plot.yaxis[0].formatter = bokeh.models.DatetimeTickFormatter(hours="%H:%M") + plot.yaxis[0].axis_label = "visit start time as completed" + + hover_tool = bokeh.models.HoverTool( + renderers=[scatter_renderer], + tooltips=tooltips, + formatters={"@obs_time": "datetime", "@sim_time": "datetime"}, + ) + plot.add_tools(hover_tool) + + if "label" not in source.column_names: + raise ValueError("A sim selector needs the label column") + sim_labels = offsets.groupby("sim_index")["label"].first().to_frame() + default_sim = sim_labels.loc[default_sim_id, "label"] + sim_selector = bokeh.models.Select( + value=default_sim, options=sim_labels.label.to_list(), name="simselect" + ) + + sim_selector_callback = bokeh.models.CustomJS( + args={"source": source}, + code=""" + for (let i = 0; i < source.data['label'].length; i++) { + if (['All', source.data['label'][i]].includes(this.value)) { + source.data['sim_alpha'][i] = 1.0; + } else { + source.data['sim_alpha'][i] = 0.0; + } + } + source.change.emit() + """, + ) + sim_selector.js_on_change("value", sim_selector_callback) + + ui_element = bokeh.layouts.column([sim_selector, plot]) + return ui_element From 9caadc9301a42f5869188938f5975d092ed1b685 Mon Sep 17 00:00:00 2001 From: "Eric H. Neilsen, Jr." Date: Thu, 12 Mar 2026 09:08:54 -0500 Subject: [PATCH 05/19] tests for assign_field_hpids --- schedview/compute/comparesim.py | 52 ++++++++------- tests/test_compute_comparesim.py | 106 +++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+), 23 deletions(-) create mode 100644 tests/test_compute_comparesim.py diff --git a/schedview/compute/comparesim.py b/schedview/compute/comparesim.py index 0cfd7a9a..127bb572 100644 --- a/schedview/compute/comparesim.py +++ b/schedview/compute/comparesim.py @@ -75,32 +75,38 @@ def assign_field_hpids( lonlat=True, ) - # Values assigned here will be replaced by closest matches - # in simulated_visit[hpid_col] if there are any within - # field_coord_tolerance_deg - completed_visits[hpid_col] = hp.ang2pix( - nside=nside, - theta=completed_visits[ra_col], - phi=completed_visits[decl_col], - lonlat=True, - ) + if completed_visits.empty: + # If completed_visits is empty, all we have to do is create the column + # in the empty DataFrame and we are done. + completed_visits[hpid_col] = [] + completed_visits[hpid_col] = completed_visits[hpid_col].astype(int) + else: + # Values assigned here will be replaced by closest matches + # in simulated_visit[hpid_col] if there are any within + # field_coord_tolerance_deg + completed_visits[hpid_col] = hp.ang2pix( + nside=nside, + theta=completed_visits[ra_col], + phi=completed_visits[decl_col], + lonlat=True, + ) - # Find all hpids actually used in the simulation. - sim_hpids = simulated_visits[hpid_col].unique() - sim_hp_ra, sim_hp_dec = hp.pix2ang(nside=nside, ipix=sim_hpids, lonlat=True) - sim_hp_coords = SkyCoord(ra=sim_hp_ra * u.deg, dec=sim_hp_dec * u.deg, frame="icrs") + # Find all hpids actually used in the simulation. + sim_hpids = simulated_visits[hpid_col].unique() + sim_hp_ra, sim_hp_dec = hp.pix2ang(nside=nside, ipix=sim_hpids, lonlat=True) + sim_hp_coords = SkyCoord(ra=sim_hp_ra * u.deg, dec=sim_hp_dec * u.deg, frame="icrs") - # Match completed visits to the nearest hpid actually used in simulations. - completed_coords = SkyCoord( - ra=completed_visits[ra_col].values * u.deg, - dec=completed_visits[decl_col].values * u.deg, - frame="icrs", - ) - sim_hp_match_idx, sim_hp_sep, _ = completed_coords.match_to_catalog_sky(sim_hp_coords) + # Match completed visits to the nearest hpid used in simulations. + completed_coords = SkyCoord( + ra=completed_visits[ra_col].values * u.deg, + dec=completed_visits[decl_col].values * u.deg, + frame="icrs", + ) + sim_hp_match_idx, sim_hp_sep, _ = completed_coords.match_to_catalog_sky(sim_hp_coords) - # Apply tolerance mask and replace the healpix id where appropriate. - within_tol = sim_hp_sep.deg < coord_match_tolerance_deg - completed_visits.loc[within_tol, hpid_col] = sim_hpids[sim_hp_match_idx[within_tol]] + # Apply tolerance mask and replace the healpix id where appropriate. + within_tol = sim_hp_sep.deg < coord_match_tolerance_deg + completed_visits.loc[within_tol, hpid_col] = sim_hpids[sim_hp_match_idx[within_tol]] return simulated_visits, completed_visits diff --git a/tests/test_compute_comparesim.py b/tests/test_compute_comparesim.py new file mode 100644 index 00000000..75cdb1eb --- /dev/null +++ b/tests/test_compute_comparesim.py @@ -0,0 +1,106 @@ +import unittest + +import healpy as hp +import numpy as np +import pandas as pd + +from schedview.compute.comparesim import assign_field_hpids + + +class TestAssignFieldHpids(unittest.TestCase): + + def setUp(self): + self.nside = 2**10 + + # Pick a few random but reproducible healpix indices. + rng = np.random.default_rng(42) + self.sim_hpids = rng.integers(0, hp.nside2npix(self.nside), size=5) + ra, dec = hp.pix2ang(self.nside, self.sim_hpids, lonlat=True) + self.simulated_visits = pd.DataFrame({"fieldRA": ra, "fieldDec": dec}) + + # Build completed visits based on the simulated ones. + completed = self.simulated_visits.copy() + # First two rows: small offset (< tolerance), so should be reassigned. + completed.loc[0:1, "fieldRA"] += 0.001 + completed.loc[0:1, "fieldDec"] += 0.001 + # Next two rows: large offset (> tolerance), so should keep own hpid. + completed.loc[2:3, "fieldRA"] += 1.0 + completed.loc[2:3, "fieldDec"] += 1.0 + # Add a few completely unrelated visits. + extra_ra = np.array([10.0, 150.0, 250.0]) + extra_dec = np.array([-30.0, 0.0, 45.0]) + extra = pd.DataFrame({"fieldRA": extra_ra, "fieldDec": extra_dec}) + self.completed_visits = pd.concat([completed, extra], ignore_index=True) + + self.tolerance_deg = 0.01 + + def test_assign_field_hpids_basic(self): + sim_out, comp_out = assign_field_hpids( + self.simulated_visits, + self.completed_visits, + nside=self.nside, + coord_match_tolerance_deg=self.tolerance_deg, + inplace=False, + ) + + # Simulated IDs should equal the original healpix indices. + np.testing.assert_array_equal(sim_out["fieldHpid"].values, self.sim_hpids) + + # Within‑tolerance rows (0 and 1) should have the same hpid as the + # corresponding simulated rows. + self.assertEqual(comp_out.loc[0, "fieldHpid"], self.sim_hpids[0]) + self.assertEqual(comp_out.loc[1, "fieldHpid"], self.sim_hpids[1]) + + # Rows with a large offset should retain the hpid computed from their + # own coordinates + self.assertNotEqual(comp_out.loc[2, "fieldHpid"], self.sim_hpids[2]) + self.assertNotEqual(comp_out.loc[3, "fieldHpid"], self.sim_hpids[3]) + + # Ensure the original DataFrames are unchanged. + self.assertNotIn("fieldHpid", self.simulated_visits.columns) + self.assertNotIn("fieldHpid", self.completed_visits.columns) + + def test_assign_field_hpids_inplace(self): + + sim_original = self.simulated_visits.copy() + comp_original = self.completed_visits.copy() + + sim_out, comp_out = assign_field_hpids( + sim_original, + comp_original, + nside=self.nside, + coord_match_tolerance_deg=self.tolerance_deg, + inplace=True, + ) + + # Identity checks – the returned objects are the same as the inputs. + self.assertIs(sim_out, sim_original) + self.assertIs(comp_out, comp_original) + + # Verify that the hpid column now exists on the original objects. + self.assertIn("fieldHpid", sim_original.columns) + self.assertIn("fieldHpid", comp_original.columns) + # Re‑use the basic assertions for correctness. + np.testing.assert_array_equal(sim_original["fieldHpid"].values, self.sim_hpids) + self.assertEqual(comp_original.loc[0, "fieldHpid"], self.sim_hpids[0]) + self.assertEqual(comp_original.loc[1, "fieldHpid"], self.sim_hpids[1]) + + def test_assign_field_hpids_empty_completed(self): + + empty_completed = pd.DataFrame(columns=["fieldRA", "fieldDec"]) + sim_out, comp_out = assign_field_hpids( + self.simulated_visits, + empty_completed, + nside=self.nside, + coord_match_tolerance_deg=self.tolerance_deg, + inplace=False, + ) + # Simulated IDs must be correct. + np.testing.assert_array_equal(sim_out["fieldHpid"].values, self.sim_hpids) + # Completed DataFrame should remain empty but still have the column. + self.assertTrue(comp_out.empty) + self.assertIn("fieldHpid", comp_out.columns) + + +if __name__ == "__main__": + unittest.main() From 1e188b69825d1f0c213d079577b2be65718ba990 Mon Sep 17 00:00:00 2001 From: "Eric H. Neilsen, Jr." Date: Thu, 12 Mar 2026 10:41:02 -0500 Subject: [PATCH 06/19] tests for offsets_of_coord_band --- schedview/compute/comparesim.py | 10 ++--- tests/test_compute_comparesim.py | 69 +++++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/schedview/compute/comparesim.py b/schedview/compute/comparesim.py index 127bb572..f0c0155f 100644 --- a/schedview/compute/comparesim.py +++ b/schedview/compute/comparesim.py @@ -119,15 +119,15 @@ def offsets_of_coord_band(sim_index: int, visits: pd.DataFrame, obs_index: int = Parameters ---------- - sim_index : int + sim_index : `int` The simulation index to compare against the observation (index 0). - visits : pd.DataFrame - Table of visits that contains at least the columns *sim_index* and - *start_timestamp*. + visits : `pd.DataFrame` + Table of visits that contains at least the columns ``sim_index`` and + ``start_timestamp``. Returns ------- - pd.DataFrame + offsets: `pd.DataFrame` A DataFrame with columns ``obs_time``, ``sim_time`` and ``delta`` and an index level ``sim_index``. """ diff --git a/tests/test_compute_comparesim.py b/tests/test_compute_comparesim.py index 75cdb1eb..3ba12466 100644 --- a/tests/test_compute_comparesim.py +++ b/tests/test_compute_comparesim.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from schedview.compute.comparesim import assign_field_hpids +from schedview.compute.comparesim import assign_field_hpids, offsets_of_coord_band class TestAssignFieldHpids(unittest.TestCase): @@ -102,5 +102,72 @@ def test_assign_field_hpids_empty_completed(self): self.assertIn("fieldHpid", comp_out.columns) +class TestOffsetOfCoordBand(unittest.TestCase): + + def setUp(self): + self.num_test_visits = 10 + + # generate a set of self.num_test_visits times separated by 500 to 600 + # seconds, and generate a dataframe self.sim_visits with those times. + start_time = pd.Timestamp("2020-01-01 00:00:00", tz="UTC") + time_diffs = np.random.uniform(500, 600, self.num_test_visits) + times = [ + start_time + pd.Timedelta(seconds=np.sum(time_diffs[:i])) for i in range(self.num_test_visits) + ] + self.sim_visits = pd.DataFrame( + { + "sim_index": [1] * self.num_test_visits, + "start_timestamp": times, + "fieldRA": [10.0] * self.num_test_visits, + "fieldDec": [0.0] * self.num_test_visits, + "band": ["r"] * self.num_test_visits, + } + ) + + # randomly but repeatably generate a set of self.num_test_visits + # offsets between -60 and 60 seconds, self.offsets, and apply these to + # self.sim_visits to get self.obs_visits + rng = np.random.default_rng(6563) + self.offsets = rng.uniform(-60, 60, self.num_test_visits) + self.obs_visits = self.sim_visits.copy() + self.obs_visits["start_timestamp"] = self.obs_visits["start_timestamp"] + pd.to_timedelta( + self.offsets, unit="s" + ) + self.obs_visits["sim_index"] = [0] * self.num_test_visits + + def test_equal_numbers(self): + visits = pd.concat( + [self.sim_visits.assign(sim_index=0), self.obs_visits.assign(sim_index=1)], ignore_index=True + ) + + # Verify that we can recover the time offsets + result = offsets_of_coord_band(0, visits, 1) + np.testing.assert_array_almost_equal(result["delta"].values, self.offsets, decimal=4) + + # Verify that we can reverse the sign + result = offsets_of_coord_band(1, visits, 0) + np.testing.assert_array_almost_equal(result["delta"].values, -1 * self.offsets, decimal=4) + + def test_missing_some(self): + rng = np.random.default_rng(6564) + dropped_indexes = rng.choice(len(self.sim_visits), 3, replace=False) + kept_indexes = np.setdiff1d(np.arange(len(self.sim_visits)), dropped_indexes) + remaining_offsets = self.offsets[kept_indexes] + + reduced_sim_visits = self.sim_visits.drop(dropped_indexes).reset_index(drop=True) + + visits = pd.concat( + [reduced_sim_visits.assign(sim_index=0), self.obs_visits.assign(sim_index=1)], ignore_index=True + ) + + # Visits that weren't dropped should be recoverable + result = offsets_of_coord_band(0, visits, 1) + np.testing.assert_array_almost_equal(result["delta"].values, remaining_offsets, decimal=4) + + # Reverse sim and obs, and see if it still works + result = offsets_of_coord_band(1, visits, 0) + np.testing.assert_array_almost_equal(result["delta"].values, -1 * remaining_offsets, decimal=4) + + if __name__ == "__main__": unittest.main() From b50874103e8d3a078417b5e0035a429cf208d2ec Mon Sep 17 00:00:00 2001 From: "Eric H. Neilsen, Jr." Date: Thu, 12 Mar 2026 10:53:27 -0500 Subject: [PATCH 07/19] Improve rng handling --- tests/test_compute_comparesim.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/test_compute_comparesim.py b/tests/test_compute_comparesim.py index 3ba12466..95ec3919 100644 --- a/tests/test_compute_comparesim.py +++ b/tests/test_compute_comparesim.py @@ -6,6 +6,8 @@ from schedview.compute.comparesim import assign_field_hpids, offsets_of_coord_band +RANDOM_NUMBER_GENERATOR = np.random.default_rng(6563) + class TestAssignFieldHpids(unittest.TestCase): @@ -13,8 +15,7 @@ def setUp(self): self.nside = 2**10 # Pick a few random but reproducible healpix indices. - rng = np.random.default_rng(42) - self.sim_hpids = rng.integers(0, hp.nside2npix(self.nside), size=5) + self.sim_hpids = RANDOM_NUMBER_GENERATOR.integers(0, hp.nside2npix(self.nside), size=5) ra, dec = hp.pix2ang(self.nside, self.sim_hpids, lonlat=True) self.simulated_visits = pd.DataFrame({"fieldRA": ra, "fieldDec": dec}) @@ -127,8 +128,7 @@ def setUp(self): # randomly but repeatably generate a set of self.num_test_visits # offsets between -60 and 60 seconds, self.offsets, and apply these to # self.sim_visits to get self.obs_visits - rng = np.random.default_rng(6563) - self.offsets = rng.uniform(-60, 60, self.num_test_visits) + self.offsets = RANDOM_NUMBER_GENERATOR.uniform(-60, 60, self.num_test_visits) self.obs_visits = self.sim_visits.copy() self.obs_visits["start_timestamp"] = self.obs_visits["start_timestamp"] + pd.to_timedelta( self.offsets, unit="s" @@ -149,8 +149,7 @@ def test_equal_numbers(self): np.testing.assert_array_almost_equal(result["delta"].values, -1 * self.offsets, decimal=4) def test_missing_some(self): - rng = np.random.default_rng(6564) - dropped_indexes = rng.choice(len(self.sim_visits), 3, replace=False) + dropped_indexes = RANDOM_NUMBER_GENERATOR.choice(len(self.sim_visits), 3, replace=False) kept_indexes = np.setdiff1d(np.arange(len(self.sim_visits)), dropped_indexes) remaining_offsets = self.offsets[kept_indexes] From 9eea967b2b24352a8c44614f654e43bd296220b2 Mon Sep 17 00:00:00 2001 From: "Eric H. Neilsen, Jr." Date: Thu, 12 Mar 2026 11:54:06 -0500 Subject: [PATCH 08/19] add tests for compute_obs_sim_offsets --- tests/test_compute_comparesim.py | 88 ++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 3 deletions(-) diff --git a/tests/test_compute_comparesim.py b/tests/test_compute_comparesim.py index 95ec3919..cc5fcb2e 100644 --- a/tests/test_compute_comparesim.py +++ b/tests/test_compute_comparesim.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from schedview.compute.comparesim import assign_field_hpids, offsets_of_coord_band +from schedview.compute.comparesim import assign_field_hpids, compute_obs_sim_offsets, offsets_of_coord_band RANDOM_NUMBER_GENERATOR = np.random.default_rng(6563) @@ -110,14 +110,13 @@ def setUp(self): # generate a set of self.num_test_visits times separated by 500 to 600 # seconds, and generate a dataframe self.sim_visits with those times. - start_time = pd.Timestamp("2020-01-01 00:00:00", tz="UTC") + start_time = pd.Timestamp("2027-01-01 00:00:00", tz="UTC") time_diffs = np.random.uniform(500, 600, self.num_test_visits) times = [ start_time + pd.Timedelta(seconds=np.sum(time_diffs[:i])) for i in range(self.num_test_visits) ] self.sim_visits = pd.DataFrame( { - "sim_index": [1] * self.num_test_visits, "start_timestamp": times, "fieldRA": [10.0] * self.num_test_visits, "fieldDec": [0.0] * self.num_test_visits, @@ -168,5 +167,88 @@ def test_missing_some(self): np.testing.assert_array_almost_equal(result["delta"].values, -1 * remaining_offsets, decimal=4) +class TestComputeObsSimOffsets(unittest.TestCase): + + def setUp(self): + self.num_visits_per_hpid_band = 5 + self.num_hpid_band = 3 + self.num_sims = 3 + + # Generate a pandas.DataFrame, hpid_band_df, of length + # self.num_hpid_band, + # with fieldRA, fieldDec, band, and fieldHpid corresponding to + # fieldRA and fieldDec with healpy nside 32 + # fieldRA and fieldDec should be randomly but repeatably generate. + # band should be randomly but repeatably selected from u, g, r, i, z, y + nside = 32 + hpid_band_df = pd.DataFrame() + hpid_band_df["fieldHpid"] = RANDOM_NUMBER_GENERATOR.integers( + 0, hp.nside2npix(nside), size=self.num_hpid_band + ) + hpid_band_df["fieldRA"], hpid_band_df["fieldDec"] = hp.pix2ang( + nside, hpid_band_df["fieldHpid"], lonlat=True + ) + + # Generate random bands from u, g, r, i, z, y + bands = ["u", "g", "r", "i", "z", "y"] + hpid_band_df["band"] = RANDOM_NUMBER_GENERATOR.choice(bands, size=self.num_hpid_band) + + self.hpid_band_df = hpid_band_df + + # Generate a set of simulations + sim_dfs = [] + start_time = pd.Timestamp("2027-01-01 00:00:00", tz="UTC") + for sim_index in range(1, self.num_sims + 1): + sim_df = pd.DataFrame() + visit_data = [] + start_timestamp = start_time + + # in this simulation, observe each hpid_band + for _, hpid_band in self.hpid_band_df.iterrows(): + for time_diff in RANDOM_NUMBER_GENERATOR.uniform(500, 600, self.num_visits_per_hpid_band): + start_timestamp = start_timestamp + pd.Timedelta(seconds=time_diff) + visit_data.append( + { + "sim_index": sim_index, + "start_timestamp": start_timestamp, + "fieldRA": hpid_band["fieldRA"], + "fieldDec": hpid_band["fieldDec"], + "band": hpid_band["band"], + "fieldHpid": hpid_band["fieldHpid"], + } + ) + + sim_df = pd.DataFrame(visit_data) + sim_dfs.append(sim_df) + + # Generate an obs_df with is similar to the first sim, but with offsets + obs_df = sim_dfs[0].copy() + self.offsets = RANDOM_NUMBER_GENERATOR.uniform(-6, 6, len(obs_df)) + obs_df["start_timestamp"] = obs_df["start_timestamp"] + pd.to_timedelta(self.offsets, unit="s") + obs_df["sim_index"] = 0 + + self.visits = pd.concat([obs_df] + sim_dfs, ignore_index=True) + + def test_compute_obs_sim_offsets_basic(self): + # Test basic functionality of compute_obs_sim_offsets + result = compute_obs_sim_offsets(self.visits, obs_index=0) + + self.assertIsNotNone(result) + self.assertIn("delta", result.columns) + + # Test that we recover offsets + np.testing.assert_array_almost_equal( + result.sort_values("sim_time").loc[1, "delta"].values, self.offsets, decimal=4 + ) + + # Should have a multiindex with sim_index, fieldHpid, and band + self.assertTrue(isinstance(result.index, pd.MultiIndex)) + self.assertEqual(result.index.names, ["sim_index", "fieldHpid", "band"]) + + # Should have at least one entry for each simulation + sim_indexes = result.index.get_level_values("sim_index").unique() + assert set(sim_indexes) == set(range(self.num_sims + 1)) - set([0]) + + if __name__ == "__main__": unittest.main() From 47e9a4718cea69d9a5f79e6bcd8a1da3ddb31450 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Thu, 12 Mar 2026 07:57:03 -0700 Subject: [PATCH 09/19] add asserts to offsets_of_coord_band --- schedview/compute/comparesim.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/schedview/compute/comparesim.py b/schedview/compute/comparesim.py index f0c0155f..78363f70 100644 --- a/schedview/compute/comparesim.py +++ b/schedview/compute/comparesim.py @@ -131,6 +131,11 @@ def offsets_of_coord_band(sim_index: int, visits: pd.DataFrame, obs_index: int = A DataFrame with columns ``obs_time``, ``sim_time`` and ``delta`` and an index level ``sim_index``. """ + # This function is intended to be run on a DataFrame on a single + # field/band combination, not on the whole set of visits, e.g. for a night. + for col in ("band", "fieldHpid"): + if col in visits.columns: + assert len(visits[col].unique()) == 1 offsets = match_visits_across_sims(visits.set_index("sim_index").start_timestamp, (obs_index, sim_index)) From 680f212704c95b19814c90f8b7b69250d2a48c45 Mon Sep 17 00:00:00 2001 From: "Eric H. Neilsen, Jr." Date: Thu, 12 Mar 2026 12:29:24 -0500 Subject: [PATCH 10/19] Add tests for compute_offset_stats --- tests/test_compute_comparesim.py | 141 +++++++++++++++++++------------ 1 file changed, 86 insertions(+), 55 deletions(-) diff --git a/tests/test_compute_comparesim.py b/tests/test_compute_comparesim.py index cc5fcb2e..dc55042e 100644 --- a/tests/test_compute_comparesim.py +++ b/tests/test_compute_comparesim.py @@ -4,7 +4,12 @@ import numpy as np import pandas as pd -from schedview.compute.comparesim import assign_field_hpids, compute_obs_sim_offsets, offsets_of_coord_band +from schedview.compute.comparesim import ( + assign_field_hpids, + compute_obs_sim_offsets, + compute_offset_stats, + offsets_of_coord_band, +) RANDOM_NUMBER_GENERATOR = np.random.default_rng(6563) @@ -167,67 +172,73 @@ def test_missing_some(self): np.testing.assert_array_almost_equal(result["delta"].values, -1 * remaining_offsets, decimal=4) +def _generate_test_visits(num_visits_per_hpid_band, num_hpid_band, num_sims): + """Generate visits DataFrame for testing.""" + + # Generate a pandas.DataFrame, hpid_band_df, of length + # num_hpid_band, + # with fieldRA, fieldDec, band, and fieldHpid corresponding to + # fieldRA and fieldDec with healpy nside 32 + # fieldRA and fieldDec should be randomly but repeatably generate. + # band should be randomly but repeatably selected from u, g, r, i, z, y + nside = 32 + hpid_band_df = pd.DataFrame() + hpid_band_df["fieldHpid"] = RANDOM_NUMBER_GENERATOR.integers(0, hp.nside2npix(nside), size=num_hpid_band) + hpid_band_df["fieldRA"], hpid_band_df["fieldDec"] = hp.pix2ang( + nside, hpid_band_df["fieldHpid"], lonlat=True + ) + + # Generate random bands from u, g, r, i, z, y + bands = ["u", "g", "r", "i", "z", "y"] + hpid_band_df["band"] = RANDOM_NUMBER_GENERATOR.choice(bands, size=num_hpid_band) + + # Generate a set of simulations + sim_dfs = [] + start_time = pd.Timestamp("2027-01-01 00:00:00", tz="UTC") + for sim_index in range(1, num_sims + 1): + sim_df = pd.DataFrame() + visit_data = [] + start_timestamp = start_time + + # in this simulation, observe each hpid_band + for _, hpid_band in hpid_band_df.iterrows(): + for time_diff in RANDOM_NUMBER_GENERATOR.uniform(500, 600, num_visits_per_hpid_band): + start_timestamp = start_timestamp + pd.Timedelta(seconds=time_diff) + visit_data.append( + { + "sim_index": sim_index, + "start_timestamp": start_timestamp, + "fieldRA": hpid_band["fieldRA"], + "fieldDec": hpid_band["fieldDec"], + "band": hpid_band["band"], + "fieldHpid": hpid_band["fieldHpid"], + "label": f"Sim {sim_index}", + } + ) + + sim_df = pd.DataFrame(visit_data) + sim_dfs.append(sim_df) + + # Generate an obs_df with is similar to the first sim, but with offsets + obs_df = sim_dfs[0].copy() + offsets = RANDOM_NUMBER_GENERATOR.uniform(-6, 6, len(obs_df)) + obs_df["start_timestamp"] = obs_df["start_timestamp"] + pd.to_timedelta(offsets, unit="s") + obs_df["sim_index"] = 0 + obs_df["lable"] = "Completed" + + visits = pd.concat([obs_df] + sim_dfs, ignore_index=True) + return visits, offsets, hpid_band_df + + class TestComputeObsSimOffsets(unittest.TestCase): def setUp(self): self.num_visits_per_hpid_band = 5 self.num_hpid_band = 3 self.num_sims = 3 - - # Generate a pandas.DataFrame, hpid_band_df, of length - # self.num_hpid_band, - # with fieldRA, fieldDec, band, and fieldHpid corresponding to - # fieldRA and fieldDec with healpy nside 32 - # fieldRA and fieldDec should be randomly but repeatably generate. - # band should be randomly but repeatably selected from u, g, r, i, z, y - nside = 32 - hpid_band_df = pd.DataFrame() - hpid_band_df["fieldHpid"] = RANDOM_NUMBER_GENERATOR.integers( - 0, hp.nside2npix(nside), size=self.num_hpid_band + self.visits, self.offsets, self.hpid_band_df = _generate_test_visits( + self.num_visits_per_hpid_band, self.num_hpid_band, self.num_sims ) - hpid_band_df["fieldRA"], hpid_band_df["fieldDec"] = hp.pix2ang( - nside, hpid_band_df["fieldHpid"], lonlat=True - ) - - # Generate random bands from u, g, r, i, z, y - bands = ["u", "g", "r", "i", "z", "y"] - hpid_band_df["band"] = RANDOM_NUMBER_GENERATOR.choice(bands, size=self.num_hpid_band) - - self.hpid_band_df = hpid_band_df - - # Generate a set of simulations - sim_dfs = [] - start_time = pd.Timestamp("2027-01-01 00:00:00", tz="UTC") - for sim_index in range(1, self.num_sims + 1): - sim_df = pd.DataFrame() - visit_data = [] - start_timestamp = start_time - - # in this simulation, observe each hpid_band - for _, hpid_band in self.hpid_band_df.iterrows(): - for time_diff in RANDOM_NUMBER_GENERATOR.uniform(500, 600, self.num_visits_per_hpid_band): - start_timestamp = start_timestamp + pd.Timedelta(seconds=time_diff) - visit_data.append( - { - "sim_index": sim_index, - "start_timestamp": start_timestamp, - "fieldRA": hpid_band["fieldRA"], - "fieldDec": hpid_band["fieldDec"], - "band": hpid_band["band"], - "fieldHpid": hpid_band["fieldHpid"], - } - ) - - sim_df = pd.DataFrame(visit_data) - sim_dfs.append(sim_df) - - # Generate an obs_df with is similar to the first sim, but with offsets - obs_df = sim_dfs[0].copy() - self.offsets = RANDOM_NUMBER_GENERATOR.uniform(-6, 6, len(obs_df)) - obs_df["start_timestamp"] = obs_df["start_timestamp"] + pd.to_timedelta(self.offsets, unit="s") - obs_df["sim_index"] = 0 - - self.visits = pd.concat([obs_df] + sim_dfs, ignore_index=True) def test_compute_obs_sim_offsets_basic(self): # Test basic functionality of compute_obs_sim_offsets @@ -250,5 +261,25 @@ def test_compute_obs_sim_offsets_basic(self): assert set(sim_indexes) == set(range(self.num_sims + 1)) - set([0]) +class TestComputeOffsetStats(unittest.TestCase): + + def setUp(self): + self.num_visits_per_hpid_band = 5 + self.num_hpid_band = 3 + self.num_sims = 3 + self.visits, self.offset_dt, self.hpid_band_df = _generate_test_visits( + self.num_visits_per_hpid_band, self.num_hpid_band, self.num_sims + ) + self.offsets = compute_obs_sim_offsets(self.visits, obs_index=0) + + def test_compute_obs_sim_offsets_basic(self): + offset_stats = compute_offset_stats(self.offsets, self.visits) + assert len(offset_stats) == self.num_sims + assert offset_stats.loc[1, "sim count"] == self.num_visits_per_hpid_band * self.num_hpid_band + offset_dt_stats = pd.Series(self.offset_dt).describe() + for col in offset_dt_stats.index[1:]: + self.assertAlmostEqual(offset_dt_stats[col], offset_stats.loc[1, col], 5) + + if __name__ == "__main__": unittest.main() From d3d0b2918eea6ba067801fa33c97fab345cfce3a Mon Sep 17 00:00:00 2001 From: "Eric H. Neilsen, Jr." Date: Thu, 12 Mar 2026 12:36:07 -0500 Subject: [PATCH 11/19] Fix future warning --- schedview/compute/comparesim.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/schedview/compute/comparesim.py b/schedview/compute/comparesim.py index 78363f70..30e50874 100644 --- a/schedview/compute/comparesim.py +++ b/schedview/compute/comparesim.py @@ -181,7 +181,8 @@ def compute_obs_sim_offsets( sim_indexes = sim_indexes[sim_indexes != obs_index] offsets = pd.concat( - visits.groupby(["fieldHpid", "band"]).apply(partial(offsets_of_coord_band, i)) for i in sim_indexes + visits.groupby(["fieldHpid", "band"]).apply(partial(offsets_of_coord_band, i), include_groups=False) + for i in sim_indexes ) offsets.index = offsets.index.reorder_levels(["sim_index", "fieldHpid", "band"]) return offsets From 37a1937dbdc37f2f9bdba80bf79fbb333111e95d Mon Sep 17 00:00:00 2001 From: "Eric H. Neilsen, Jr." Date: Thu, 12 Mar 2026 12:45:52 -0500 Subject: [PATCH 12/19] quiet performance warning --- tests/test_compute_comparesim.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_compute_comparesim.py b/tests/test_compute_comparesim.py index dc55042e..5247735f 100644 --- a/tests/test_compute_comparesim.py +++ b/tests/test_compute_comparesim.py @@ -249,7 +249,9 @@ def test_compute_obs_sim_offsets_basic(self): # Test that we recover offsets np.testing.assert_array_almost_equal( - result.sort_values("sim_time").loc[1, "delta"].values, self.offsets, decimal=4 + result.loc[1, :].sort_values("sim_time").loc[:, "delta"].values, + self.offsets, + decimal=4, ) # Should have a multiindex with sim_index, fieldHpid, and band From bba2d34760552aa4b80175a46bd0b3b30573d83c Mon Sep 17 00:00:00 2001 From: "Eric H. Neilsen, Jr." Date: Thu, 12 Mar 2026 12:54:46 -0500 Subject: [PATCH 13/19] Add docstring to compute_common_fractions --- schedview/compute/comparesim.py | 45 +++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/schedview/compute/comparesim.py b/schedview/compute/comparesim.py index 30e50874..29c07770 100644 --- a/schedview/compute/comparesim.py +++ b/schedview/compute/comparesim.py @@ -246,6 +246,51 @@ def compute_offset_stats( def compute_common_fractions(visit_counts: pd.DataFrame, sim_labels: pd.Series, obs_index: int = 0): + """ + Compute fractions of visits in common between observed & simulated visits. + + Parameters + ---------- + visit_counts : `pd.DataFrame` + A table of visit counts by field and simulation, where each row + corresponds to a field and each column corresponds to a simulation. + The values are integers representing the number of visits to each field + in each simulation. (Such a ``pd.DataFrame`` is returned by + ``schedview.compute.multisim.count_visits_by_sim``.) + sim_labels : `pd.Series` + A Series with simulation indices as the index and labels as values. + This is used to label the returned DataFrame. + obs_index : `int`, optional + The index of the observation (completed visits) "simulation." This + is used as the reference simulation for computing fractions. + Default is 0. + + Returns + ------- + fraction_completed : `pd.DataFrame` + A DataFrame with the following columns: + + ``frac_obs_sim_num``: The fraction of occurrences of coordinate/band + combinations observed that were also observed in the simulation + (count-based). + ``frac_sim_obs_num``: The fraction of occurrences of coordinate/band + combinations simulated that were observed (count-based). + ``frac_obs_sim``: The fraction of coordinate/band combinations + observed at least once that were also observed at least once in + the simulation (occurrence-based). + ``frac_sim_obs``: The fraction of coordinate/band combinations + simulated at least once that were observed at least once + (occurrence-based). + + The index is the simulation indices (excluding ``obs_index``). + + Notes + ----- + This function calculates several fractions comparing observations (from + the simulation identified by ``obs_index``) with simulations (identified + by other simulation indices). The fractions are computed using the + ``fraction_common`` function from ``schedview.compute.multisim``. + """ fraction_obs_simulated = partial(fraction_common, visit_counts, sim2=obs_index, match_count=False) fraction_sim_observed = partial(fraction_common, visit_counts, obs_index, match_count=False) fraction_obs_simulated_num = partial(fraction_common, visit_counts, sim2=obs_index, match_count=True) From aed3780f85b2529868d095a0af21b7045e19e28f Mon Sep 17 00:00:00 2001 From: "Eric H. Neilsen, Jr." Date: Thu, 12 Mar 2026 13:36:44 -0500 Subject: [PATCH 14/19] Add tests for compute_common_fractions --- tests/test_compute_comparesim.py | 49 ++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/test_compute_comparesim.py b/tests/test_compute_comparesim.py index 5247735f..b4f2c58a 100644 --- a/tests/test_compute_comparesim.py +++ b/tests/test_compute_comparesim.py @@ -8,6 +8,7 @@ assign_field_hpids, compute_obs_sim_offsets, compute_offset_stats, + compute_common_fractions, offsets_of_coord_band, ) @@ -283,5 +284,53 @@ def test_compute_obs_sim_offsets_basic(self): self.assertAlmostEqual(offset_dt_stats[col], offset_stats.loc[1, col], 5) +class TestComputeCommonFractions(unittest.TestCase): + + def setUp(self): + # Create a simple test visit counts DataFrame + # This simulates what would come from count_visits_by_sim + + self.visit_counts = pd.DataFrame( + { + "fieldHpid": [100, 101, 102, 103, 100, 101, 102, 102], + "band": ["u", "u", "u", "u", "g", "g", "g", "g"], + 0: [1, 1, 1, 1, 2, 2, 0, 0], + 1: [1, 1, 1, 1, 2, 2, 0, 0], + 2: [2, 2, 1, 1, 2, 2, 0, 0], + 3: [1, 1, 1, 1, 1, 1, 1, 1], + } + ).set_index(["fieldHpid", "band"]) + + self.num_sims = len(self.visit_counts.columns) - 1 + + # Create sim_labels Series + self.sim_labels = pd.Series( + ["Completed", "Sim 1", "Sim 2", "Sim 3"], index=[0, 1, 2, 3], name="label" + ) + + def test_compute_common_fractions(self): + """Test basic functionality of compute_common_fractions.""" + result = compute_common_fractions(self.visit_counts, self.sim_labels, obs_index=0) + + # Should return a DataFrame + self.assertIsInstance(result, pd.DataFrame) + + # Should have the expected columns + expected_columns = ["label", "frac_obs_sim_num", "frac_sim_obs_num", "frac_obs_sim", "frac_sim_obs"] + self.assertListEqual(list(result.columns), expected_columns) + + # Make sure we have one row per sim + self.assertListEqual(list(result.index), (1 + np.arange(self.num_sims)).tolist()) + + # Check that all values are between 0 and 1 + fraction_columns = ["frac_obs_sim_num", "frac_sim_obs_num", "frac_obs_sim", "frac_sim_obs"] + for col in fraction_columns: + assert (np.all(result[col]) <= 1) and (np.all(result[col] >= 0)) + + np.testing.assert_array_equal(result.loc[1, fraction_columns], [1.0, 1.0, 1.0, 1.0]) + np.testing.assert_array_equal(result.loc[2, fraction_columns], [1.0, 0.8, 1.0, 1.0]) + np.testing.assert_array_equal(result.loc[3, fraction_columns], [0.75, 0.75, 1.0, 0.75]) + + if __name__ == "__main__": unittest.main() From 8c6de5e829ba9a2efe16accca5bcf6a27fb80ad0 Mon Sep 17 00:00:00 2001 From: "Eric H. Neilsen, Jr." Date: Thu, 12 Mar 2026 14:00:09 -0500 Subject: [PATCH 15/19] Add plot_obs_vs_time test --- schedview/compute/comparesim.py | 5 ++- tests/test_plot_comparesim.py | 55 +++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 tests/test_plot_comparesim.py diff --git a/schedview/compute/comparesim.py b/schedview/compute/comparesim.py index 29c07770..2e65bb1e 100644 --- a/schedview/compute/comparesim.py +++ b/schedview/compute/comparesim.py @@ -257,7 +257,7 @@ def compute_common_fractions(visit_counts: pd.DataFrame, sim_labels: pd.Series, The values are integers representing the number of visits to each field in each simulation. (Such a ``pd.DataFrame`` is returned by ``schedview.compute.multisim.count_visits_by_sim``.) - sim_labels : `pd.Series` + sim_labels : `pd.Series` or `pd.DataFrame` A Series with simulation indices as the index and labels as values. This is used to label the returned DataFrame. obs_index : `int`, optional @@ -291,6 +291,9 @@ def compute_common_fractions(visit_counts: pd.DataFrame, sim_labels: pd.Series, by other simulation indices). The fractions are computed using the ``fraction_common`` function from ``schedview.compute.multisim``. """ + if isinstance(sim_labels, pd.Series): + sim_labels = sim_labels.to_frame() + fraction_obs_simulated = partial(fraction_common, visit_counts, sim2=obs_index, match_count=False) fraction_sim_observed = partial(fraction_common, visit_counts, obs_index, match_count=False) fraction_obs_simulated_num = partial(fraction_common, visit_counts, sim2=obs_index, match_count=True) diff --git a/tests/test_plot_comparesim.py b/tests/test_plot_comparesim.py new file mode 100644 index 00000000..99732f23 --- /dev/null +++ b/tests/test_plot_comparesim.py @@ -0,0 +1,55 @@ +from datetime import datetime +from unittest import TestCase + +import bokeh.models +import bokeh.plotting +import pandas as pd + +import schedview.plot.comparesim + + +class TestPlotCompareSim(TestCase): + + def setUp(self): + # Create test data that mimics what would come from + # compute_obs_sim_offsets + # Do not worry about making delta correspond to obs and sim times, + # because it does not matter here. + obs_times = [datetime(2030, 1, 1, hr, 0, 0) for hr in range(6)] + sim_times = [datetime(2030, 1, 1, hr, 15, 0) for hr in range(6)] + self.test_offsets = pd.DataFrame( + { + "obs_time": obs_times, + "sim_time": sim_times, + "sim_index": [0, 0, 0, 1, 1, 1], + "label": ["Completed", "Completed", "Completed", "Sim 1", "Sim 1", "Sim 1"], + "fieldRA": [10.0, 20.0, 30.0, 10.0, 20.0, 30.0], + "fieldDec": [0.0, 10.0, 20.0, 0.0, 10.0, 20.0], + "band": ["u", "g", "r", "u", "g", "r"], + "delta": [0, 0, 0, 0, 0, 0], + } + ).set_index(["sim_index", "fieldRA", "fieldDec"]) + + def test_plot_obs_vs_sim_time_basic(self): + """Test basic functionality of plot_obs_vs_sim_time.""" + tooltips = [ + ("Observation Time", "@obs_time{%F %H:%M}"), + ("Simulation Time", "@sim_time{%F %H:%M}"), + ("Band", "@band"), + ] + + result = schedview.plot.comparesim.plot_obs_vs_sim_time(self.test_offsets, tooltips) + + # Check that first child is a Select widget (sim selector) + self.assertIsInstance(result.children[0], bokeh.models.Select) + + # Check that second child is a bokeh figure + plot = result.children[1] + self.assertIsInstance(plot, bokeh.plotting.figure) + + # Check that tooltips were properly set up + self.assertGreater(len(plot.tools), 0) + + # Check that there are data points + source = plot.renderers[0].data_source + self.assertGreater(len(source.data["obs_time"]), 0) From 85c963f2d92f95ee2c027f2578a4df3e0bd3c608 Mon Sep 17 00:00:00 2001 From: "Eric H. Neilsen, Jr." Date: Thu, 12 Mar 2026 15:14:18 -0500 Subject: [PATCH 16/19] isort --- tests/test_compute_comparesim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_compute_comparesim.py b/tests/test_compute_comparesim.py index b4f2c58a..a607d202 100644 --- a/tests/test_compute_comparesim.py +++ b/tests/test_compute_comparesim.py @@ -6,9 +6,9 @@ from schedview.compute.comparesim import ( assign_field_hpids, + compute_common_fractions, compute_obs_sim_offsets, compute_offset_stats, - compute_common_fractions, offsets_of_coord_band, ) From 24b021f32524d97a8241fe849b4b0dc859e78205 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Thu, 19 Mar 2026 14:20:15 -0700 Subject: [PATCH 17/19] remove compute_common_fraction --- schedview/compute/__init__.py | 2 - schedview/compute/comparesim.py | 65 +------------------------------- tests/test_compute_comparesim.py | 24 ------------ 3 files changed, 1 insertion(+), 90 deletions(-) diff --git a/schedview/compute/__init__.py b/schedview/compute/__init__.py index a8e7281c..bc78c144 100644 --- a/schedview/compute/__init__.py +++ b/schedview/compute/__init__.py @@ -10,7 +10,6 @@ "make_unique_survey_name", "make_scheduler_summary_df", "make_survey_reward_df", - "compute_common_fractions", "compute_maps", "compute_metric_by_visit", "compute_hpix_metric_in_bands", @@ -32,7 +31,6 @@ from .camera import LsstCameraFootprintPerimeter from .comparesim import ( assign_field_hpids, - compute_common_fractions, compute_obs_sim_offsets, compute_offset_stats, offsets_of_coord_band, diff --git a/schedview/compute/comparesim.py b/schedview/compute/comparesim.py index 2e65bb1e..aa0a4e32 100644 --- a/schedview/compute/comparesim.py +++ b/schedview/compute/comparesim.py @@ -7,7 +7,7 @@ import pandas as pd from astropy.coordinates import Angle, SkyCoord -from .multisim import fraction_common, match_visits_across_sims +from .multisim import match_visits_across_sims def assign_field_hpids( @@ -243,66 +243,3 @@ def compute_offset_stats( offset_stats.insert(0, "label", visits.groupby("sim_index")["label"].first().to_frame()) return offset_stats - - -def compute_common_fractions(visit_counts: pd.DataFrame, sim_labels: pd.Series, obs_index: int = 0): - """ - Compute fractions of visits in common between observed & simulated visits. - - Parameters - ---------- - visit_counts : `pd.DataFrame` - A table of visit counts by field and simulation, where each row - corresponds to a field and each column corresponds to a simulation. - The values are integers representing the number of visits to each field - in each simulation. (Such a ``pd.DataFrame`` is returned by - ``schedview.compute.multisim.count_visits_by_sim``.) - sim_labels : `pd.Series` or `pd.DataFrame` - A Series with simulation indices as the index and labels as values. - This is used to label the returned DataFrame. - obs_index : `int`, optional - The index of the observation (completed visits) "simulation." This - is used as the reference simulation for computing fractions. - Default is 0. - - Returns - ------- - fraction_completed : `pd.DataFrame` - A DataFrame with the following columns: - - ``frac_obs_sim_num``: The fraction of occurrences of coordinate/band - combinations observed that were also observed in the simulation - (count-based). - ``frac_sim_obs_num``: The fraction of occurrences of coordinate/band - combinations simulated that were observed (count-based). - ``frac_obs_sim``: The fraction of coordinate/band combinations - observed at least once that were also observed at least once in - the simulation (occurrence-based). - ``frac_sim_obs``: The fraction of coordinate/band combinations - simulated at least once that were observed at least once - (occurrence-based). - - The index is the simulation indices (excluding ``obs_index``). - - Notes - ----- - This function calculates several fractions comparing observations (from - the simulation identified by ``obs_index``) with simulations (identified - by other simulation indices). The fractions are computed using the - ``fraction_common`` function from ``schedview.compute.multisim``. - """ - if isinstance(sim_labels, pd.Series): - sim_labels = sim_labels.to_frame() - - fraction_obs_simulated = partial(fraction_common, visit_counts, sim2=obs_index, match_count=False) - fraction_sim_observed = partial(fraction_common, visit_counts, obs_index, match_count=False) - fraction_obs_simulated_num = partial(fraction_common, visit_counts, sim2=obs_index, match_count=True) - fraction_sim_observed_num = partial(fraction_common, visit_counts, obs_index, match_count=True) - - sim_indexes = pd.Series(sim_labels.index.values[sim_labels.index.values != obs_index]) - fraction_completed = sim_labels.loc[sim_indexes, :].copy() - fraction_completed["frac_obs_sim_num"] = sim_indexes.apply(fraction_obs_simulated_num).round(2).values - fraction_completed["frac_sim_obs_num"] = sim_indexes.apply(fraction_sim_observed_num).round(2).values - fraction_completed["frac_obs_sim"] = sim_indexes.apply(fraction_obs_simulated).round(2).values - fraction_completed["frac_sim_obs"] = sim_indexes.apply(fraction_sim_observed).round(2).values - return fraction_completed diff --git a/tests/test_compute_comparesim.py b/tests/test_compute_comparesim.py index a607d202..a83b62b4 100644 --- a/tests/test_compute_comparesim.py +++ b/tests/test_compute_comparesim.py @@ -6,7 +6,6 @@ from schedview.compute.comparesim import ( assign_field_hpids, - compute_common_fractions, compute_obs_sim_offsets, compute_offset_stats, offsets_of_coord_band, @@ -308,29 +307,6 @@ def setUp(self): ["Completed", "Sim 1", "Sim 2", "Sim 3"], index=[0, 1, 2, 3], name="label" ) - def test_compute_common_fractions(self): - """Test basic functionality of compute_common_fractions.""" - result = compute_common_fractions(self.visit_counts, self.sim_labels, obs_index=0) - - # Should return a DataFrame - self.assertIsInstance(result, pd.DataFrame) - - # Should have the expected columns - expected_columns = ["label", "frac_obs_sim_num", "frac_sim_obs_num", "frac_obs_sim", "frac_sim_obs"] - self.assertListEqual(list(result.columns), expected_columns) - - # Make sure we have one row per sim - self.assertListEqual(list(result.index), (1 + np.arange(self.num_sims)).tolist()) - - # Check that all values are between 0 and 1 - fraction_columns = ["frac_obs_sim_num", "frac_sim_obs_num", "frac_obs_sim", "frac_sim_obs"] - for col in fraction_columns: - assert (np.all(result[col]) <= 1) and (np.all(result[col] >= 0)) - - np.testing.assert_array_equal(result.loc[1, fraction_columns], [1.0, 1.0, 1.0, 1.0]) - np.testing.assert_array_equal(result.loc[2, fraction_columns], [1.0, 0.8, 1.0, 1.0]) - np.testing.assert_array_equal(result.loc[3, fraction_columns], [0.75, 0.75, 1.0, 0.75]) - if __name__ == "__main__": unittest.main() From 486583f26c173e277a82600a9c46d683f1a55bda Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Thu, 19 Mar 2026 14:36:20 -0700 Subject: [PATCH 18/19] Add default_sim option to plot_visit_param_vs_time --- schedview/plot/visits.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/schedview/plot/visits.py b/schedview/plot/visits.py index d4cc9c42..671aa052 100644 --- a/schedview/plot/visits.py +++ b/schedview/plot/visits.py @@ -176,6 +176,7 @@ def plot_visit_param_vs_time( offered_columns: Iterable[str] = tuple(), num_plots: int = 1, user_figure_kwargs: dict | None = None, + default_sim: str = "All", **kwargs, ) -> bokeh.models.ui.ui_element.UIElement: """Plot a column in the visit table vs. time. @@ -200,8 +201,10 @@ def plot_visit_param_vs_time( A list of columns to be offered in the dropdown selector. num_plots: int Number of parallel timelines.. - user_figure_kwargs: dict on None + user_figure_kwargs: dict or None Arguments passed along to bokeh.plotting.figure + default_sim: str + Label of default sim to show (defaults to ``All``) **kwargs Additional keyword arguments to be passed to `bokeh.plotting.figure.scatter`. @@ -251,7 +254,12 @@ def plot_visit_param_vs_time( source.data["label"] = np.full_like(source.data["start_timestamp"], "") if "sim_alpha" not in source.data: - source.data["sim_alpha"] = np.full_like(source.data["start_timestamp"], 0.2, dtype=np.float64) + if default_sim == "All": + source.data["sim_alpha"] = np.full_like(source.data["start_timestamp"], 0.2, dtype=np.float64) + else: + source.data["sim_alpha"] = np.full_like(source.data["start_timestamp"], 0.0, dtype=np.float64) + source.data["sim_alpha"][source.data["label"] == default_sim] = 0.2 + source.data["sim_alpha"][source.data["label"] == "Completed"] = 1.0 scatter_kwargs = {"fill_alpha": 0.0, "line_alpha": "sim_alpha", "name": "timeline"} @@ -404,7 +412,8 @@ def plot_visit_param_vs_time( if "label" not in source.column_names: raise ValueError("A sim selector needs the label column") options = ["All"] + list(o for o in np.unique(source.data["label"]) if o != "Completed") - default_sim = "All" + if default_sim not in options: + raise ValueError(f"Default sim must be one of {options}") sim_selector = bokeh.models.Select(value=default_sim, options=options, name="simselect") sim_selector_callback = bokeh.models.CustomJS( From ba6c1eb9f81a6089af635db9138bbdec0497c334 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Wed, 25 Mar 2026 10:52:49 -0700 Subject: [PATCH 19/19] Minor updates for PR comments --- schedview/compute/comparesim.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/schedview/compute/comparesim.py b/schedview/compute/comparesim.py index aa0a4e32..319af53c 100644 --- a/schedview/compute/comparesim.py +++ b/schedview/compute/comparesim.py @@ -13,8 +13,8 @@ def assign_field_hpids( simulated_visits: pd.DataFrame, completed_visits: pd.DataFrame, - nside: int, - coord_match_tolerance_deg: float, + nside: int = 262144, + coord_match_tolerance_deg: float = 0.002277777778, *, ra_col: str = "fieldRA", decl_col: str = "fieldDec", @@ -29,20 +29,29 @@ def assign_field_hpids( Parameters ---------- simulated_visits : `pd.DataFrame` - Simulated visit table; must contain columns identified by ``ra_col`` + Simulated visit table containing visits from all simulations to be + compared. It must contain columns identified by ``ra_col`` and ``dec_col`` (both in degrees). An integer column named - ``hpid_col`` will be added or overwritten. + ``hpid_col`` will be added or overwritten. Pointings in this + ``DataFrame`` are the reference pointings to which pointings from + completed visits will be matched. completed_visits : `pd.DataFrame` Completed visit table; same column requirements as - ``simulated_visits``. + ``simulated_visits``. If ``inplace`` is ``True`` and the column + designated by ``hpid_col`` already exists, values in that column + will be replaced. field_hpix_nside : `int` - HEALPix nside that defines the pixel resolution. + HEALPix nside that defines the pixel resolution. Defaults to + nside=2**18 (about 0.8 arcseconds), so pointings with significantly + different ditherings are considered distinct. field_coord_tolerance_deg : `float` Maximum angular separation (deg) for a completed visit to be considered - a match to a simulated healpix center. + a match to a simulated healpix center. Defaults to 0.002277777778, + or about 10 arcsecords, which is large enough to match across observed + pointing offsets, but not usually across different dithers. ra_col : str, optional, default ``"fieldRA"`` Column name for right‑ascension values (degrees).