lsst · ehneilsen · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/schedview/compute/__init__.py b/schedview/compute/__init__.py
@@ -16,7 +16,11 @@
     "compute_scalar_metric_at_one_mjd",
     "compute_scalar_metric_at_mjds",
     "compute_mixed_scalar_metric",
+    "compute_obs_sim_offsets",
+    "compute_offset_stats",
+    "offsets_of_coord_band",
     "often_repeated_fields",
+    "assign_field_hpids",
     "count_visits_by_sim",
     "match_visits_across_sims",
     "compute_matched_visit_delta_statistics",
@@ -25,6 +29,12 @@
 
 from .astro import compute_sun_moon_positions, convert_evening_date_to_night_of_survey, night_events
 from .camera import LsstCameraFootprintPerimeter
+from .comparesim import (
+    assign_field_hpids,
+    compute_obs_sim_offsets,
+    compute_offset_stats,
+    offsets_of_coord_band,
+)
 from .multisim import (
     compute_matched_visit_delta_statistics,
     count_visits_by_sim,

diff --git a/schedview/compute/comparesim.py b/schedview/compute/comparesim.py
@@ -0,0 +1,254 @@
+from functools import partial
+from typing import Optional, Tuple
+
+import astropy.units as u
+import healpy as hp
+import numpy as np
+import pandas as pd
+from astropy.coordinates import Angle, SkyCoord
+
+from .multisim import match_visits_across_sims
+
+
+def assign_field_hpids(
+    simulated_visits: pd.DataFrame,
+    completed_visits: pd.DataFrame,
+    nside: int = 262144,
+    coord_match_tolerance_deg: float = 0.002277777778,
+    *,
+    ra_col: str = "fieldRA",
+    decl_col: str = "fieldDec",
+    hpid_col: str = "fieldHpid",
+    inplace: bool = False,
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """
+    Populate an id column for simulated and completed visits and
+    propagate the id from the nearest simulated healpix to completed
+    visits that lie within a specified angular tolerance.
+
+    Parameters
+    ----------
+    simulated_visits : `pd.DataFrame`
+        Simulated visit table containing visits from all simulations to be
+        compared. It must contain columns identified by ``ra_col``
+        and ``dec_col`` (both in degrees).  An integer column named
+        ``hpid_col`` will be added or overwritten. Pointings in this
+        ``DataFrame`` are the reference pointings to which pointings from
+        completed visits will be matched.
+
+    completed_visits : `pd.DataFrame`
+        Completed visit table; same column requirements as
+        ``simulated_visits``. If ``inplace`` is ``True`` and the column
+        designated by ``hpid_col`` already exists, values in that column
+        will be replaced.
+
+    field_hpix_nside : `int`
+        HEALPix nside that defines the pixel resolution. Defaults to
+        nside=2**18 (about 0.8 arcseconds), so pointings with significantly
+        different ditherings are considered distinct.
+
+    field_coord_tolerance_deg : `float`
+        Maximum angular separation (deg) for a completed visit to be considered
+        a match to a simulated healpix center. Defaults to 0.002277777778,
+        or about 10 arcsecords, which is large enough to match across observed
+        pointing offsets, but not usually across different dithers.
+
+    ra_col : str, optional, default ``"fieldRA"``
+        Column name for right‑ascension values (degrees).
+
+    dec_col : str, optional, default ``"fieldDec"``
+        Column name for declination values (degrees).
+
+    hpid_col : str, optional, default ``"fieldHpid"``
+        Column name that will store the healpix index.
+
+    inplace : bool, optional, default ``False``
+        If ``True`` the input DataFrames are modified in place.  If ``False``
+        (default) shallow copies are made so the originals stay untouched.
+
+    Returns
+    -------
+    result : Tuple[pd.DataFrame, pd.DataFrame]
+        ``(simulated_visits_out, completed_visits_out)`` the (possibly copied)
+        DataFrames with the ``hpid_col`` column populated.
+    """
+
+    if not inplace:
+        simulated_visits = simulated_visits.copy()
+        completed_visits = completed_visits.copy()
+
+    simulated_visits[hpid_col] = hp.ang2pix(
+        nside=nside,
+        theta=simulated_visits[ra_col],
+        phi=simulated_visits[decl_col],
+        lonlat=True,
+    )
+
+    if completed_visits.empty:
+        # If completed_visits is empty, all we have to do is create the column
+        # in the empty DataFrame and we are done.
+        completed_visits[hpid_col] = []
+        completed_visits[hpid_col] = completed_visits[hpid_col].astype(int)
+    else:
+        # Values assigned here will be replaced by closest matches
+        # in simulated_visit[hpid_col] if there are any within
+        # field_coord_tolerance_deg
+        completed_visits[hpid_col] = hp.ang2pix(
+            nside=nside,
+            theta=completed_visits[ra_col],
+            phi=completed_visits[decl_col],
+            lonlat=True,
+        )
+
+        # Find all hpids actually used in the simulation.
+        sim_hpids = simulated_visits[hpid_col].unique()
+        sim_hp_ra, sim_hp_dec = hp.pix2ang(nside=nside, ipix=sim_hpids, lonlat=True)
+        sim_hp_coords = SkyCoord(ra=sim_hp_ra * u.deg, dec=sim_hp_dec * u.deg, frame="icrs")
+
+        # Match completed visits to the nearest hpid used in simulations.
+        completed_coords = SkyCoord(
+            ra=completed_visits[ra_col].values * u.deg,
+            dec=completed_visits[decl_col].values * u.deg,
+            frame="icrs",
+        )
+        sim_hp_match_idx, sim_hp_sep, _ = completed_coords.match_to_catalog_sky(sim_hp_coords)
+
+        # Apply tolerance mask and replace the healpix id where appropriate.
+        within_tol = sim_hp_sep.deg < coord_match_tolerance_deg
+        completed_visits.loc[within_tol, hpid_col] = sim_hpids[sim_hp_match_idx[within_tol]]
+
+    return simulated_visits, completed_visits
+
+
+def offsets_of_coord_band(sim_index: int, visits: pd.DataFrame, obs_index: int = 0) -> pd.DataFrame:
+    """
+    Compute the time offset between a set of observations and a
+    single simulated visit ``sim_index`` for a given (fieldHpid, band)
+    coordinate pair.
+
+    Parameters
+    ----------
+    sim_index : `int`
+        The simulation index to compare against the observation (index 0).
+    visits : `pd.DataFrame`
+        Table of visits that contains at least the columns ``sim_index`` and
+        ``start_timestamp``.
+
+    Returns
+    -------
+    offsets: `pd.DataFrame`
+        A DataFrame with columns ``obs_time``, ``sim_time`` and ``delta`` and
+        an index level ``sim_index``.
+    """
+    # This function is intended to be run on a DataFrame on a single
+    # field/band combination, not on the whole set of visits, e.g. for a night.
+    for col in ("band", "fieldHpid"):
+        if col in visits.columns:
+            assert len(visits[col].unique()) == 1
+
+    offsets = match_visits_across_sims(visits.set_index("sim_index").start_timestamp, (obs_index, sim_index))
+
+    # Normalize the order and sense of the offset
+    if offsets.columns[0] == obs_index:
+        offsets["delta"] = -1 * offsets["delta"]
+    else:
+        offsets = offsets[[obs_index, sim_index, "delta"]]
+
+    assert offsets.columns[0] == obs_index
+    assert offsets.columns[1] == sim_index
+    assert offsets.columns[2] == "delta"
+    assert len(offsets.columns) == 3
+    offsets.columns = ["obs_time", "sim_time", "delta"]
+    offsets["sim_index"] = sim_index
+    offsets = offsets.set_index("sim_index")
+
+    return offsets
+
+
+def compute_obs_sim_offsets(
+    visits: pd.DataFrame,
+    obs_index: int = 0,
+) -> pd.DataFrame:
+    """
+    Build a table of offsets for all simulated/completed pairs of visits.
+
+    Parameters
+    ----------
+    visits : `pd.DataFrame`
+        Table of visits with at least the columns ``sim_index``,
+        ``fieldHpid`` and ``band``.
+    obs_index : `int`, optional
+        ``sim_index`` value for completed (observed) visits (default = 0).
+
+    Returns
+    -------
+    sim_offsets : `pd.DataFrame`
+        Offsets for every simulated visit, indexed by ``sim_index``,
+        ``fieldHpid`` and ``band``.
+    """
+    sim_indexes = visits.sim_index.unique()
+    sim_indexes = sim_indexes[sim_indexes != obs_index]
+
+    offsets = pd.concat(
+        visits.groupby(["fieldHpid", "band"]).apply(partial(offsets_of_coord_band, i), include_groups=False)
+        for i in sim_indexes
+    )
+    offsets.index = offsets.index.reorder_levels(["sim_index", "fieldHpid", "band"])
+    return offsets
+
+
+def compute_offset_stats(
+    offsets: pd.DataFrame,
+    visits: Optional[pd.DataFrame] = None,
+    hhmmss: bool = False,
+) -> pd.DataFrame:
+    """
+    Produce a summary table of time offsets between completed and simulated
+    visits.
+
+    Parameters
+    ----------
+    offsets :  `pd.DataFrame`
+        Result of :func:`compute_obs_sim_offsets`.  Must contain a ``delta``
+        column and a ``sim_index`` level in the index.
+    visits : `pd.DataFrame`, optional
+        The original visits table, required only if observation counts or
+        labels should be included in the returned ``DataFrame``.
+    hhmmss : `bool`, optional
+        If ``True``, convert the numeric statistics (mean, std, etc.) from
+        seconds to an ``HH:MM:SS`` string representation.
+
+    Returns
+    -------
+    offset_stats : `pd.DataFrame`
+        A table where each row corresponds to a ``sim_index`` and columns
+        include match counts, MAD, and the usual descriptive statistics.
+    """
+    abs_delta = np.abs(offsets["delta"])
+    abs_delta.name = "abs_delta"
+
+    offset_stats = offsets.groupby("sim_index")["delta"].describe()
+    offset_stats.insert(0, "match count", offset_stats["count"].astype(int))
+    offset_stats.insert(1, "MAD", abs_delta.to_frame().groupby("sim_index")["abs_delta"].median())
+
+    if visits is not None:
+        visit_counts = visits.groupby("sim_index").agg({"label": "count"})
+        offset_stats.insert(
+            0, "obs count", np.full_like(offset_stats["count"], visit_counts.loc[0]).astype(int)
+        )
+        offset_stats.insert(1, "sim count", visit_counts)
+        offset_stats.insert(3, "#match/#obs", (offset_stats["count"] / offset_stats["obs count"]).round(2))
+        offset_stats.insert(4, "#match/#sim", (offset_stats["count"] / offset_stats["sim count"]).round(2))
+
+    offset_stats.drop(columns="count", inplace=True)
+
+    if hhmmss:
+        for column in ["MAD", "mean", "std", "min", "25%", "50%", "75%", "max"]:
+            offset_stats.loc[:, column] = Angle(
+                (offset_stats.loc[:, column].astype(int).values / 3600) * u.hour
+            ).to_string(unit=u.hour, sep=":")
+
+    if visits is not None and "label" in visits.columns:
+        offset_stats.insert(0, "label", visits.groupby("sim_index")["label"].first().to_frame())
+
+    return offset_stats
diff --git a/schedview/plot/__init__.py b/schedview/plot/__init__.py
@@ -3,6 +3,7 @@
     "plot_infeasible",
     "plot_airmass_vs_time",
     "plot_alt_vs_time",
+    "plot_obs_vs_sim_time",
     "plot_polar_alt_az",
     "plot_survey_rewards",
     "create_survey_reward_plot",
@@ -42,6 +43,7 @@
 
 from .cadence import create_cadence_plot
 from .colors import LIGHT_EXTRA_COLORS, LIGHT_PLOT_BAND_COLORS, PLOT_BAND_COLORS, make_band_cmap
+from .comparesim import plot_obs_vs_sim_time
 from .multisim import generate_sim_indicators, overplot_kernel_density_estimates
 from .nightbf import plot_infeasible, plot_rewards
 from .nightly import plot_airmass_vs_time, plot_alt_vs_time, plot_polar_alt_az

diff --git a/schedview/plot/comparesim.py b/schedview/plot/comparesim.py
@@ -0,0 +1,87 @@
+from typing import List, Optional, Tuple
+
+import bokeh
+import bokeh.layouts
+import bokeh.models
+import bokeh.plotting
+import numpy as np
+import pandas as pd
+
+from schedview.plot.colors import make_band_cmap
+
+
+def plot_obs_vs_sim_time(
+    offsets: pd.DataFrame, tooltips: List[Tuple[str, str]], plot: Optional[bokeh.plotting.figure] = None
+) -> bokeh.models.UIElement:
+    """
+    Plot observation times vs simulation times for visits.
+
+    Parameters
+    ----------
+    offsets : `pd.DataFrame`
+        DataFrame containing visit timing information with columns including
+        'obs_time', 'sim_time', 'sim_index', and 'label' as well as any
+        columns needed for the tooltips.
+    tooltips : `List[Tuple[str, str]]`
+        List of tuples defining the tooltip content for hover interactions.
+    plot : `bokeh.plotting.figure`, optional
+        Existing bokeh figure to use. If ``None``, a new figure is created.
+
+    Returns
+    -------
+    obs_vs_sim_time_plot : `bokeh.models.UIElement`
+        A bokeh UI element containing the simulation selector and the plot.
+    """
+    if plot is None:
+        plot = bokeh.plotting.figure(frame_width=1024, frame_height=512)
+
+    offsets_plot_df = offsets.reset_index().set_index("sim_index")
+    offsets_plot_df.reset_index(inplace=True)
+    default_sim_id = offsets_plot_df.sim_index.min()
+    offsets_plot_df["sim_alpha"] = np.where(offsets_plot_df.sim_index == default_sim_id, 1, 0)
+
+    source = bokeh.models.ColumnDataSource(offsets_plot_df)
+
+    matching_line = bokeh.models.Slope(gradient=1, y_intercept=0, line_color="gray", line_width=1)
+    plot.add_layout(matching_line)
+
+    scatter_renderer = plot.scatter(
+        "sim_time", "obs_time", color=make_band_cmap("band"), alpha="sim_alpha", source=source
+    )
+    plot.xaxis[0].formatter = bokeh.models.DatetimeTickFormatter(hours="%H:%M")
+    plot.xaxis[0].axis_label = "visit start time in simulation"
+    plot.yaxis[0].formatter = bokeh.models.DatetimeTickFormatter(hours="%H:%M")
+    plot.yaxis[0].axis_label = "visit start time as completed"
+
+    hover_tool = bokeh.models.HoverTool(
+        renderers=[scatter_renderer],
+        tooltips=tooltips,
+        formatters={"@obs_time": "datetime", "@sim_time": "datetime"},
+    )
+    plot.add_tools(hover_tool)
+
+    if "label" not in source.column_names:
+        raise ValueError("A sim selector needs the label column")
+    sim_labels = offsets.groupby("sim_index")["label"].first().to_frame()
+    default_sim = sim_labels.loc[default_sim_id, "label"]
+    sim_selector = bokeh.models.Select(
+        value=default_sim, options=sim_labels.label.to_list(), name="simselect"
+    )
+
+    sim_selector_callback = bokeh.models.CustomJS(
+        args={"source": source},
+        code="""
+            for (let i = 0; i < source.data['label'].length; i++) {
+                if (['All', source.data['label'][i]].includes(this.value)) {
+                    source.data['sim_alpha'][i] = 1.0;
+                } else {
+                    source.data['sim_alpha'][i] = 0.0;
+                }
+            }
+            source.change.emit()
+        """,
+    )
+    sim_selector.js_on_change("value", sim_selector_callback)
+
+    ui_element = bokeh.layouts.column([sim_selector, plot])
+    return ui_element