diff --git a/schedview/compute/__init__.py b/schedview/compute/__init__.py index 185ebcba..bc78c144 100644 --- a/schedview/compute/__init__.py +++ b/schedview/compute/__init__.py @@ -16,7 +16,11 @@ "compute_scalar_metric_at_one_mjd", "compute_scalar_metric_at_mjds", "compute_mixed_scalar_metric", + "compute_obs_sim_offsets", + "compute_offset_stats", + "offsets_of_coord_band", "often_repeated_fields", + "assign_field_hpids", "count_visits_by_sim", "match_visits_across_sims", "compute_matched_visit_delta_statistics", @@ -25,6 +29,12 @@ from .astro import compute_sun_moon_positions, convert_evening_date_to_night_of_survey, night_events from .camera import LsstCameraFootprintPerimeter +from .comparesim import ( + assign_field_hpids, + compute_obs_sim_offsets, + compute_offset_stats, + offsets_of_coord_band, +) from .multisim import ( compute_matched_visit_delta_statistics, count_visits_by_sim, diff --git a/schedview/compute/comparesim.py b/schedview/compute/comparesim.py new file mode 100644 index 00000000..319af53c --- /dev/null +++ b/schedview/compute/comparesim.py @@ -0,0 +1,254 @@ +from functools import partial +from typing import Optional, Tuple + +import astropy.units as u +import healpy as hp +import numpy as np +import pandas as pd +from astropy.coordinates import Angle, SkyCoord + +from .multisim import match_visits_across_sims + + +def assign_field_hpids( + simulated_visits: pd.DataFrame, + completed_visits: pd.DataFrame, + nside: int = 262144, + coord_match_tolerance_deg: float = 0.002277777778, + *, + ra_col: str = "fieldRA", + decl_col: str = "fieldDec", + hpid_col: str = "fieldHpid", + inplace: bool = False, +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """ + Populate an id column for simulated and completed visits and + propagate the id from the nearest simulated healpix to completed + visits that lie within a specified angular tolerance. + + Parameters + ---------- + simulated_visits : `pd.DataFrame` + Simulated visit table containing visits from all simulations to be + compared. It must contain columns identified by ``ra_col`` + and ``dec_col`` (both in degrees). An integer column named + ``hpid_col`` will be added or overwritten. Pointings in this + ``DataFrame`` are the reference pointings to which pointings from + completed visits will be matched. + + completed_visits : `pd.DataFrame` + Completed visit table; same column requirements as + ``simulated_visits``. If ``inplace`` is ``True`` and the column + designated by ``hpid_col`` already exists, values in that column + will be replaced. + + field_hpix_nside : `int` + HEALPix nside that defines the pixel resolution. Defaults to + nside=2**18 (about 0.8 arcseconds), so pointings with significantly + different ditherings are considered distinct. + + field_coord_tolerance_deg : `float` + Maximum angular separation (deg) for a completed visit to be considered + a match to a simulated healpix center. Defaults to 0.002277777778, + or about 10 arcsecords, which is large enough to match across observed + pointing offsets, but not usually across different dithers. + + ra_col : str, optional, default ``"fieldRA"`` + Column name for right‑ascension values (degrees). + + dec_col : str, optional, default ``"fieldDec"`` + Column name for declination values (degrees). + + hpid_col : str, optional, default ``"fieldHpid"`` + Column name that will store the healpix index. + + inplace : bool, optional, default ``False`` + If ``True`` the input DataFrames are modified in place. If ``False`` + (default) shallow copies are made so the originals stay untouched. + + Returns + ------- + result : Tuple[pd.DataFrame, pd.DataFrame] + ``(simulated_visits_out, completed_visits_out)`` the (possibly copied) + DataFrames with the ``hpid_col`` column populated. + """ + + if not inplace: + simulated_visits = simulated_visits.copy() + completed_visits = completed_visits.copy() + + simulated_visits[hpid_col] = hp.ang2pix( + nside=nside, + theta=simulated_visits[ra_col], + phi=simulated_visits[decl_col], + lonlat=True, + ) + + if completed_visits.empty: + # If completed_visits is empty, all we have to do is create the column + # in the empty DataFrame and we are done. + completed_visits[hpid_col] = [] + completed_visits[hpid_col] = completed_visits[hpid_col].astype(int) + else: + # Values assigned here will be replaced by closest matches + # in simulated_visit[hpid_col] if there are any within + # field_coord_tolerance_deg + completed_visits[hpid_col] = hp.ang2pix( + nside=nside, + theta=completed_visits[ra_col], + phi=completed_visits[decl_col], + lonlat=True, + ) + + # Find all hpids actually used in the simulation. + sim_hpids = simulated_visits[hpid_col].unique() + sim_hp_ra, sim_hp_dec = hp.pix2ang(nside=nside, ipix=sim_hpids, lonlat=True) + sim_hp_coords = SkyCoord(ra=sim_hp_ra * u.deg, dec=sim_hp_dec * u.deg, frame="icrs") + + # Match completed visits to the nearest hpid used in simulations. + completed_coords = SkyCoord( + ra=completed_visits[ra_col].values * u.deg, + dec=completed_visits[decl_col].values * u.deg, + frame="icrs", + ) + sim_hp_match_idx, sim_hp_sep, _ = completed_coords.match_to_catalog_sky(sim_hp_coords) + + # Apply tolerance mask and replace the healpix id where appropriate. + within_tol = sim_hp_sep.deg < coord_match_tolerance_deg + completed_visits.loc[within_tol, hpid_col] = sim_hpids[sim_hp_match_idx[within_tol]] + + return simulated_visits, completed_visits + + +def offsets_of_coord_band(sim_index: int, visits: pd.DataFrame, obs_index: int = 0) -> pd.DataFrame: + """ + Compute the time offset between a set of observations and a + single simulated visit ``sim_index`` for a given (fieldHpid, band) + coordinate pair. + + Parameters + ---------- + sim_index : `int` + The simulation index to compare against the observation (index 0). + visits : `pd.DataFrame` + Table of visits that contains at least the columns ``sim_index`` and + ``start_timestamp``. + + Returns + ------- + offsets: `pd.DataFrame` + A DataFrame with columns ``obs_time``, ``sim_time`` and ``delta`` and + an index level ``sim_index``. + """ + # This function is intended to be run on a DataFrame on a single + # field/band combination, not on the whole set of visits, e.g. for a night. + for col in ("band", "fieldHpid"): + if col in visits.columns: + assert len(visits[col].unique()) == 1 + + offsets = match_visits_across_sims(visits.set_index("sim_index").start_timestamp, (obs_index, sim_index)) + + # Normalize the order and sense of the offset + if offsets.columns[0] == obs_index: + offsets["delta"] = -1 * offsets["delta"] + else: + offsets = offsets[[obs_index, sim_index, "delta"]] + + assert offsets.columns[0] == obs_index + assert offsets.columns[1] == sim_index + assert offsets.columns[2] == "delta" + assert len(offsets.columns) == 3 + offsets.columns = ["obs_time", "sim_time", "delta"] + offsets["sim_index"] = sim_index + offsets = offsets.set_index("sim_index") + + return offsets + + +def compute_obs_sim_offsets( + visits: pd.DataFrame, + obs_index: int = 0, +) -> pd.DataFrame: + """ + Build a table of offsets for all simulated/completed pairs of visits. + + Parameters + ---------- + visits : `pd.DataFrame` + Table of visits with at least the columns ``sim_index``, + ``fieldHpid`` and ``band``. + obs_index : `int`, optional + ``sim_index`` value for completed (observed) visits (default = 0). + + Returns + ------- + sim_offsets : `pd.DataFrame` + Offsets for every simulated visit, indexed by ``sim_index``, + ``fieldHpid`` and ``band``. + """ + sim_indexes = visits.sim_index.unique() + sim_indexes = sim_indexes[sim_indexes != obs_index] + + offsets = pd.concat( + visits.groupby(["fieldHpid", "band"]).apply(partial(offsets_of_coord_band, i), include_groups=False) + for i in sim_indexes + ) + offsets.index = offsets.index.reorder_levels(["sim_index", "fieldHpid", "band"]) + return offsets + + +def compute_offset_stats( + offsets: pd.DataFrame, + visits: Optional[pd.DataFrame] = None, + hhmmss: bool = False, +) -> pd.DataFrame: + """ + Produce a summary table of time offsets between completed and simulated + visits. + + Parameters + ---------- + offsets : `pd.DataFrame` + Result of :func:`compute_obs_sim_offsets`. Must contain a ``delta`` + column and a ``sim_index`` level in the index. + visits : `pd.DataFrame`, optional + The original visits table, required only if observation counts or + labels should be included in the returned ``DataFrame``. + hhmmss : `bool`, optional + If ``True``, convert the numeric statistics (mean, std, etc.) from + seconds to an ``HH:MM:SS`` string representation. + + Returns + ------- + offset_stats : `pd.DataFrame` + A table where each row corresponds to a ``sim_index`` and columns + include match counts, MAD, and the usual descriptive statistics. + """ + abs_delta = np.abs(offsets["delta"]) + abs_delta.name = "abs_delta" + + offset_stats = offsets.groupby("sim_index")["delta"].describe() + offset_stats.insert(0, "match count", offset_stats["count"].astype(int)) + offset_stats.insert(1, "MAD", abs_delta.to_frame().groupby("sim_index")["abs_delta"].median()) + + if visits is not None: + visit_counts = visits.groupby("sim_index").agg({"label": "count"}) + offset_stats.insert( + 0, "obs count", np.full_like(offset_stats["count"], visit_counts.loc[0]).astype(int) + ) + offset_stats.insert(1, "sim count", visit_counts) + offset_stats.insert(3, "#match/#obs", (offset_stats["count"] / offset_stats["obs count"]).round(2)) + offset_stats.insert(4, "#match/#sim", (offset_stats["count"] / offset_stats["sim count"]).round(2)) + + offset_stats.drop(columns="count", inplace=True) + + if hhmmss: + for column in ["MAD", "mean", "std", "min", "25%", "50%", "75%", "max"]: + offset_stats.loc[:, column] = Angle( + (offset_stats.loc[:, column].astype(int).values / 3600) * u.hour + ).to_string(unit=u.hour, sep=":") + + if visits is not None and "label" in visits.columns: + offset_stats.insert(0, "label", visits.groupby("sim_index")["label"].first().to_frame()) + + return offset_stats diff --git a/schedview/plot/__init__.py b/schedview/plot/__init__.py index 60025125..fc37d39f 100644 --- a/schedview/plot/__init__.py +++ b/schedview/plot/__init__.py @@ -3,6 +3,7 @@ "plot_infeasible", "plot_airmass_vs_time", "plot_alt_vs_time", + "plot_obs_vs_sim_time", "plot_polar_alt_az", "plot_survey_rewards", "create_survey_reward_plot", @@ -42,6 +43,7 @@ from .cadence import create_cadence_plot from .colors import LIGHT_EXTRA_COLORS, LIGHT_PLOT_BAND_COLORS, PLOT_BAND_COLORS, make_band_cmap +from .comparesim import plot_obs_vs_sim_time from .multisim import generate_sim_indicators, overplot_kernel_density_estimates from .nightbf import plot_infeasible, plot_rewards from .nightly import plot_airmass_vs_time, plot_alt_vs_time, plot_polar_alt_az diff --git a/schedview/plot/comparesim.py b/schedview/plot/comparesim.py new file mode 100644 index 00000000..c4115df7 --- /dev/null +++ b/schedview/plot/comparesim.py @@ -0,0 +1,87 @@ +from typing import List, Optional, Tuple + +import bokeh +import bokeh.layouts +import bokeh.models +import bokeh.plotting +import numpy as np +import pandas as pd + +from schedview.plot.colors import make_band_cmap + + +def plot_obs_vs_sim_time( + offsets: pd.DataFrame, tooltips: List[Tuple[str, str]], plot: Optional[bokeh.plotting.figure] = None +) -> bokeh.models.UIElement: + """ + Plot observation times vs simulation times for visits. + + Parameters + ---------- + offsets : `pd.DataFrame` + DataFrame containing visit timing information with columns including + 'obs_time', 'sim_time', 'sim_index', and 'label' as well as any + columns needed for the tooltips. + tooltips : `List[Tuple[str, str]]` + List of tuples defining the tooltip content for hover interactions. + plot : `bokeh.plotting.figure`, optional + Existing bokeh figure to use. If ``None``, a new figure is created. + + Returns + ------- + obs_vs_sim_time_plot : `bokeh.models.UIElement` + A bokeh UI element containing the simulation selector and the plot. + """ + if plot is None: + plot = bokeh.plotting.figure(frame_width=1024, frame_height=512) + + offsets_plot_df = offsets.reset_index().set_index("sim_index") + offsets_plot_df.reset_index(inplace=True) + default_sim_id = offsets_plot_df.sim_index.min() + offsets_plot_df["sim_alpha"] = np.where(offsets_plot_df.sim_index == default_sim_id, 1, 0) + + source = bokeh.models.ColumnDataSource(offsets_plot_df) + + matching_line = bokeh.models.Slope(gradient=1, y_intercept=0, line_color="gray", line_width=1) + plot.add_layout(matching_line) + + scatter_renderer = plot.scatter( + "sim_time", "obs_time", color=make_band_cmap("band"), alpha="sim_alpha", source=source + ) + plot.xaxis[0].formatter = bokeh.models.DatetimeTickFormatter(hours="%H:%M") + plot.xaxis[0].axis_label = "visit start time in simulation" + plot.yaxis[0].formatter = bokeh.models.DatetimeTickFormatter(hours="%H:%M") + plot.yaxis[0].axis_label = "visit start time as completed" + + hover_tool = bokeh.models.HoverTool( + renderers=[scatter_renderer], + tooltips=tooltips, + formatters={"@obs_time": "datetime", "@sim_time": "datetime"}, + ) + plot.add_tools(hover_tool) + + if "label" not in source.column_names: + raise ValueError("A sim selector needs the label column") + sim_labels = offsets.groupby("sim_index")["label"].first().to_frame() + default_sim = sim_labels.loc[default_sim_id, "label"] + sim_selector = bokeh.models.Select( + value=default_sim, options=sim_labels.label.to_list(), name="simselect" + ) + + sim_selector_callback = bokeh.models.CustomJS( + args={"source": source}, + code=""" + for (let i = 0; i < source.data['label'].length; i++) { + if (['All', source.data['label'][i]].includes(this.value)) { + source.data['sim_alpha'][i] = 1.0; + } else { + source.data['sim_alpha'][i] = 0.0; + } + } + source.change.emit() + """, + ) + sim_selector.js_on_change("value", sim_selector_callback) + + ui_element = bokeh.layouts.column([sim_selector, plot]) + return ui_element diff --git a/schedview/plot/visits.py b/schedview/plot/visits.py index 19eb0b7f..671aa052 100644 --- a/schedview/plot/visits.py +++ b/schedview/plot/visits.py @@ -176,6 +176,7 @@ def plot_visit_param_vs_time( offered_columns: Iterable[str] = tuple(), num_plots: int = 1, user_figure_kwargs: dict | None = None, + default_sim: str = "All", **kwargs, ) -> bokeh.models.ui.ui_element.UIElement: """Plot a column in the visit table vs. time. @@ -200,8 +201,10 @@ def plot_visit_param_vs_time( A list of columns to be offered in the dropdown selector. num_plots: int Number of parallel timelines.. - user_figure_kwargs: dict on None + user_figure_kwargs: dict or None Arguments passed along to bokeh.plotting.figure + default_sim: str + Label of default sim to show (defaults to ``All``) **kwargs Additional keyword arguments to be passed to `bokeh.plotting.figure.scatter`. @@ -251,7 +254,13 @@ def plot_visit_param_vs_time( source.data["label"] = np.full_like(source.data["start_timestamp"], "") if "sim_alpha" not in source.data: - source.data["sim_alpha"] = np.full_like(source.data["start_timestamp"], 1.0, dtype=np.float64) + if default_sim == "All": + source.data["sim_alpha"] = np.full_like(source.data["start_timestamp"], 0.2, dtype=np.float64) + else: + source.data["sim_alpha"] = np.full_like(source.data["start_timestamp"], 0.0, dtype=np.float64) + source.data["sim_alpha"][source.data["label"] == default_sim] = 0.2 + + source.data["sim_alpha"][source.data["label"] == "Completed"] = 1.0 scatter_kwargs = {"fill_alpha": 0.0, "line_alpha": "sim_alpha", "name": "timeline"} @@ -403,7 +412,8 @@ def plot_visit_param_vs_time( if "label" not in source.column_names: raise ValueError("A sim selector needs the label column") options = ["All"] + list(o for o in np.unique(source.data["label"]) if o != "Completed") - default_sim = "All" + if default_sim not in options: + raise ValueError(f"Default sim must be one of {options}") sim_selector = bokeh.models.Select(value=default_sim, options=options, name="simselect") sim_selector_callback = bokeh.models.CustomJS( @@ -413,7 +423,7 @@ def plot_visit_param_vs_time( if (source.data['label'][i] === 'Completed') { source.data['sim_alpha'][i] = 1.0; } else if (['All', source.data['label'][i]].includes(this.value)) { - source.data['sim_alpha'][i] = 0.8; + source.data['sim_alpha'][i] = 0.2; } else { source.data['sim_alpha'][i] = 0.0; } diff --git a/tests/test_compute_comparesim.py b/tests/test_compute_comparesim.py new file mode 100644 index 00000000..a83b62b4 --- /dev/null +++ b/tests/test_compute_comparesim.py @@ -0,0 +1,312 @@ +import unittest + +import healpy as hp +import numpy as np +import pandas as pd + +from schedview.compute.comparesim import ( + assign_field_hpids, + compute_obs_sim_offsets, + compute_offset_stats, + offsets_of_coord_band, +) + +RANDOM_NUMBER_GENERATOR = np.random.default_rng(6563) + + +class TestAssignFieldHpids(unittest.TestCase): + + def setUp(self): + self.nside = 2**10 + + # Pick a few random but reproducible healpix indices. + self.sim_hpids = RANDOM_NUMBER_GENERATOR.integers(0, hp.nside2npix(self.nside), size=5) + ra, dec = hp.pix2ang(self.nside, self.sim_hpids, lonlat=True) + self.simulated_visits = pd.DataFrame({"fieldRA": ra, "fieldDec": dec}) + + # Build completed visits based on the simulated ones. + completed = self.simulated_visits.copy() + # First two rows: small offset (< tolerance), so should be reassigned. + completed.loc[0:1, "fieldRA"] += 0.001 + completed.loc[0:1, "fieldDec"] += 0.001 + # Next two rows: large offset (> tolerance), so should keep own hpid. + completed.loc[2:3, "fieldRA"] += 1.0 + completed.loc[2:3, "fieldDec"] += 1.0 + # Add a few completely unrelated visits. + extra_ra = np.array([10.0, 150.0, 250.0]) + extra_dec = np.array([-30.0, 0.0, 45.0]) + extra = pd.DataFrame({"fieldRA": extra_ra, "fieldDec": extra_dec}) + self.completed_visits = pd.concat([completed, extra], ignore_index=True) + + self.tolerance_deg = 0.01 + + def test_assign_field_hpids_basic(self): + sim_out, comp_out = assign_field_hpids( + self.simulated_visits, + self.completed_visits, + nside=self.nside, + coord_match_tolerance_deg=self.tolerance_deg, + inplace=False, + ) + + # Simulated IDs should equal the original healpix indices. + np.testing.assert_array_equal(sim_out["fieldHpid"].values, self.sim_hpids) + + # Within‑tolerance rows (0 and 1) should have the same hpid as the + # corresponding simulated rows. + self.assertEqual(comp_out.loc[0, "fieldHpid"], self.sim_hpids[0]) + self.assertEqual(comp_out.loc[1, "fieldHpid"], self.sim_hpids[1]) + + # Rows with a large offset should retain the hpid computed from their + # own coordinates + self.assertNotEqual(comp_out.loc[2, "fieldHpid"], self.sim_hpids[2]) + self.assertNotEqual(comp_out.loc[3, "fieldHpid"], self.sim_hpids[3]) + + # Ensure the original DataFrames are unchanged. + self.assertNotIn("fieldHpid", self.simulated_visits.columns) + self.assertNotIn("fieldHpid", self.completed_visits.columns) + + def test_assign_field_hpids_inplace(self): + + sim_original = self.simulated_visits.copy() + comp_original = self.completed_visits.copy() + + sim_out, comp_out = assign_field_hpids( + sim_original, + comp_original, + nside=self.nside, + coord_match_tolerance_deg=self.tolerance_deg, + inplace=True, + ) + + # Identity checks – the returned objects are the same as the inputs. + self.assertIs(sim_out, sim_original) + self.assertIs(comp_out, comp_original) + + # Verify that the hpid column now exists on the original objects. + self.assertIn("fieldHpid", sim_original.columns) + self.assertIn("fieldHpid", comp_original.columns) + # Re‑use the basic assertions for correctness. + np.testing.assert_array_equal(sim_original["fieldHpid"].values, self.sim_hpids) + self.assertEqual(comp_original.loc[0, "fieldHpid"], self.sim_hpids[0]) + self.assertEqual(comp_original.loc[1, "fieldHpid"], self.sim_hpids[1]) + + def test_assign_field_hpids_empty_completed(self): + + empty_completed = pd.DataFrame(columns=["fieldRA", "fieldDec"]) + sim_out, comp_out = assign_field_hpids( + self.simulated_visits, + empty_completed, + nside=self.nside, + coord_match_tolerance_deg=self.tolerance_deg, + inplace=False, + ) + # Simulated IDs must be correct. + np.testing.assert_array_equal(sim_out["fieldHpid"].values, self.sim_hpids) + # Completed DataFrame should remain empty but still have the column. + self.assertTrue(comp_out.empty) + self.assertIn("fieldHpid", comp_out.columns) + + +class TestOffsetOfCoordBand(unittest.TestCase): + + def setUp(self): + self.num_test_visits = 10 + + # generate a set of self.num_test_visits times separated by 500 to 600 + # seconds, and generate a dataframe self.sim_visits with those times. + start_time = pd.Timestamp("2027-01-01 00:00:00", tz="UTC") + time_diffs = np.random.uniform(500, 600, self.num_test_visits) + times = [ + start_time + pd.Timedelta(seconds=np.sum(time_diffs[:i])) for i in range(self.num_test_visits) + ] + self.sim_visits = pd.DataFrame( + { + "start_timestamp": times, + "fieldRA": [10.0] * self.num_test_visits, + "fieldDec": [0.0] * self.num_test_visits, + "band": ["r"] * self.num_test_visits, + } + ) + + # randomly but repeatably generate a set of self.num_test_visits + # offsets between -60 and 60 seconds, self.offsets, and apply these to + # self.sim_visits to get self.obs_visits + self.offsets = RANDOM_NUMBER_GENERATOR.uniform(-60, 60, self.num_test_visits) + self.obs_visits = self.sim_visits.copy() + self.obs_visits["start_timestamp"] = self.obs_visits["start_timestamp"] + pd.to_timedelta( + self.offsets, unit="s" + ) + self.obs_visits["sim_index"] = [0] * self.num_test_visits + + def test_equal_numbers(self): + visits = pd.concat( + [self.sim_visits.assign(sim_index=0), self.obs_visits.assign(sim_index=1)], ignore_index=True + ) + + # Verify that we can recover the time offsets + result = offsets_of_coord_band(0, visits, 1) + np.testing.assert_array_almost_equal(result["delta"].values, self.offsets, decimal=4) + + # Verify that we can reverse the sign + result = offsets_of_coord_band(1, visits, 0) + np.testing.assert_array_almost_equal(result["delta"].values, -1 * self.offsets, decimal=4) + + def test_missing_some(self): + dropped_indexes = RANDOM_NUMBER_GENERATOR.choice(len(self.sim_visits), 3, replace=False) + kept_indexes = np.setdiff1d(np.arange(len(self.sim_visits)), dropped_indexes) + remaining_offsets = self.offsets[kept_indexes] + + reduced_sim_visits = self.sim_visits.drop(dropped_indexes).reset_index(drop=True) + + visits = pd.concat( + [reduced_sim_visits.assign(sim_index=0), self.obs_visits.assign(sim_index=1)], ignore_index=True + ) + + # Visits that weren't dropped should be recoverable + result = offsets_of_coord_band(0, visits, 1) + np.testing.assert_array_almost_equal(result["delta"].values, remaining_offsets, decimal=4) + + # Reverse sim and obs, and see if it still works + result = offsets_of_coord_band(1, visits, 0) + np.testing.assert_array_almost_equal(result["delta"].values, -1 * remaining_offsets, decimal=4) + + +def _generate_test_visits(num_visits_per_hpid_band, num_hpid_band, num_sims): + """Generate visits DataFrame for testing.""" + + # Generate a pandas.DataFrame, hpid_band_df, of length + # num_hpid_band, + # with fieldRA, fieldDec, band, and fieldHpid corresponding to + # fieldRA and fieldDec with healpy nside 32 + # fieldRA and fieldDec should be randomly but repeatably generate. + # band should be randomly but repeatably selected from u, g, r, i, z, y + nside = 32 + hpid_band_df = pd.DataFrame() + hpid_band_df["fieldHpid"] = RANDOM_NUMBER_GENERATOR.integers(0, hp.nside2npix(nside), size=num_hpid_band) + hpid_band_df["fieldRA"], hpid_band_df["fieldDec"] = hp.pix2ang( + nside, hpid_band_df["fieldHpid"], lonlat=True + ) + + # Generate random bands from u, g, r, i, z, y + bands = ["u", "g", "r", "i", "z", "y"] + hpid_band_df["band"] = RANDOM_NUMBER_GENERATOR.choice(bands, size=num_hpid_band) + + # Generate a set of simulations + sim_dfs = [] + start_time = pd.Timestamp("2027-01-01 00:00:00", tz="UTC") + for sim_index in range(1, num_sims + 1): + sim_df = pd.DataFrame() + visit_data = [] + start_timestamp = start_time + + # in this simulation, observe each hpid_band + for _, hpid_band in hpid_band_df.iterrows(): + for time_diff in RANDOM_NUMBER_GENERATOR.uniform(500, 600, num_visits_per_hpid_band): + start_timestamp = start_timestamp + pd.Timedelta(seconds=time_diff) + visit_data.append( + { + "sim_index": sim_index, + "start_timestamp": start_timestamp, + "fieldRA": hpid_band["fieldRA"], + "fieldDec": hpid_band["fieldDec"], + "band": hpid_band["band"], + "fieldHpid": hpid_band["fieldHpid"], + "label": f"Sim {sim_index}", + } + ) + + sim_df = pd.DataFrame(visit_data) + sim_dfs.append(sim_df) + + # Generate an obs_df with is similar to the first sim, but with offsets + obs_df = sim_dfs[0].copy() + offsets = RANDOM_NUMBER_GENERATOR.uniform(-6, 6, len(obs_df)) + obs_df["start_timestamp"] = obs_df["start_timestamp"] + pd.to_timedelta(offsets, unit="s") + obs_df["sim_index"] = 0 + obs_df["lable"] = "Completed" + + visits = pd.concat([obs_df] + sim_dfs, ignore_index=True) + return visits, offsets, hpid_band_df + + +class TestComputeObsSimOffsets(unittest.TestCase): + + def setUp(self): + self.num_visits_per_hpid_band = 5 + self.num_hpid_band = 3 + self.num_sims = 3 + self.visits, self.offsets, self.hpid_band_df = _generate_test_visits( + self.num_visits_per_hpid_band, self.num_hpid_band, self.num_sims + ) + + def test_compute_obs_sim_offsets_basic(self): + # Test basic functionality of compute_obs_sim_offsets + result = compute_obs_sim_offsets(self.visits, obs_index=0) + + self.assertIsNotNone(result) + self.assertIn("delta", result.columns) + + # Test that we recover offsets + np.testing.assert_array_almost_equal( + result.loc[1, :].sort_values("sim_time").loc[:, "delta"].values, + self.offsets, + decimal=4, + ) + + # Should have a multiindex with sim_index, fieldHpid, and band + self.assertTrue(isinstance(result.index, pd.MultiIndex)) + self.assertEqual(result.index.names, ["sim_index", "fieldHpid", "band"]) + + # Should have at least one entry for each simulation + sim_indexes = result.index.get_level_values("sim_index").unique() + assert set(sim_indexes) == set(range(self.num_sims + 1)) - set([0]) + + +class TestComputeOffsetStats(unittest.TestCase): + + def setUp(self): + self.num_visits_per_hpid_band = 5 + self.num_hpid_band = 3 + self.num_sims = 3 + self.visits, self.offset_dt, self.hpid_band_df = _generate_test_visits( + self.num_visits_per_hpid_band, self.num_hpid_band, self.num_sims + ) + self.offsets = compute_obs_sim_offsets(self.visits, obs_index=0) + + def test_compute_obs_sim_offsets_basic(self): + offset_stats = compute_offset_stats(self.offsets, self.visits) + assert len(offset_stats) == self.num_sims + assert offset_stats.loc[1, "sim count"] == self.num_visits_per_hpid_band * self.num_hpid_band + offset_dt_stats = pd.Series(self.offset_dt).describe() + for col in offset_dt_stats.index[1:]: + self.assertAlmostEqual(offset_dt_stats[col], offset_stats.loc[1, col], 5) + + +class TestComputeCommonFractions(unittest.TestCase): + + def setUp(self): + # Create a simple test visit counts DataFrame + # This simulates what would come from count_visits_by_sim + + self.visit_counts = pd.DataFrame( + { + "fieldHpid": [100, 101, 102, 103, 100, 101, 102, 102], + "band": ["u", "u", "u", "u", "g", "g", "g", "g"], + 0: [1, 1, 1, 1, 2, 2, 0, 0], + 1: [1, 1, 1, 1, 2, 2, 0, 0], + 2: [2, 2, 1, 1, 2, 2, 0, 0], + 3: [1, 1, 1, 1, 1, 1, 1, 1], + } + ).set_index(["fieldHpid", "band"]) + + self.num_sims = len(self.visit_counts.columns) - 1 + + # Create sim_labels Series + self.sim_labels = pd.Series( + ["Completed", "Sim 1", "Sim 2", "Sim 3"], index=[0, 1, 2, 3], name="label" + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_plot_comparesim.py b/tests/test_plot_comparesim.py new file mode 100644 index 00000000..99732f23 --- /dev/null +++ b/tests/test_plot_comparesim.py @@ -0,0 +1,55 @@ +from datetime import datetime +from unittest import TestCase + +import bokeh.models +import bokeh.plotting +import pandas as pd + +import schedview.plot.comparesim + + +class TestPlotCompareSim(TestCase): + + def setUp(self): + # Create test data that mimics what would come from + # compute_obs_sim_offsets + # Do not worry about making delta correspond to obs and sim times, + # because it does not matter here. + obs_times = [datetime(2030, 1, 1, hr, 0, 0) for hr in range(6)] + sim_times = [datetime(2030, 1, 1, hr, 15, 0) for hr in range(6)] + self.test_offsets = pd.DataFrame( + { + "obs_time": obs_times, + "sim_time": sim_times, + "sim_index": [0, 0, 0, 1, 1, 1], + "label": ["Completed", "Completed", "Completed", "Sim 1", "Sim 1", "Sim 1"], + "fieldRA": [10.0, 20.0, 30.0, 10.0, 20.0, 30.0], + "fieldDec": [0.0, 10.0, 20.0, 0.0, 10.0, 20.0], + "band": ["u", "g", "r", "u", "g", "r"], + "delta": [0, 0, 0, 0, 0, 0], + } + ).set_index(["sim_index", "fieldRA", "fieldDec"]) + + def test_plot_obs_vs_sim_time_basic(self): + """Test basic functionality of plot_obs_vs_sim_time.""" + tooltips = [ + ("Observation Time", "@obs_time{%F %H:%M}"), + ("Simulation Time", "@sim_time{%F %H:%M}"), + ("Band", "@band"), + ] + + result = schedview.plot.comparesim.plot_obs_vs_sim_time(self.test_offsets, tooltips) + + # Check that first child is a Select widget (sim selector) + self.assertIsInstance(result.children[0], bokeh.models.Select) + + # Check that second child is a bokeh figure + plot = result.children[1] + self.assertIsInstance(plot, bokeh.plotting.figure) + + # Check that tooltips were properly set up + self.assertGreater(len(plot.tools), 0) + + # Check that there are data points + source = plot.renderers[0].data_source + self.assertGreater(len(source.data["obs_time"]), 0)