diff --git a/bin/testPfsTargetSpectraPartialIO.py b/bin/testPfsTargetSpectraPartialIO.py new file mode 100755 index 00000000..4f7777cf --- /dev/null +++ b/bin/testPfsTargetSpectraPartialIO.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +import os +from time import time +import resource +from argparse import ArgumentParser +from pfs.datamodel import PfsCalibrated + + +def get_bytes_read(pid): + """Return the number of bytes read so far for the given process ID.""" + + with open(f"/proc/{pid}/io", "r") as f: + for line in f: + if line.startswith("rchar:"): + return int(line.split()[1]) + + return 0 + + +def main(filename, **kwargs): + # Remove None values from kwargs + kwargs = {k: v for k, v in kwargs.items() if v is not None} + + # Get the PID of the current process + pid = os.getpid() + + usage = resource.getrusage(resource.RUSAGE_SELF) + bytes_start = get_bytes_read(pid) + time_start = time() + print('Bytes read so far:', bytes_start) + print('Max RSS:', usage.ru_maxrss, 'kB') + print('Minor page faults so far:', usage.ru_minflt) + print('Major page faults so far:', usage.ru_majflt) + + # Load a subset of the spectra + spectra = PfsCalibrated.readFits(filename, **kwargs) + print(f"Loaded {len(spectra)} spectra with kwargs: {kwargs}") + + usage = resource.getrusage(resource.RUSAGE_SELF) + bytes_partial = get_bytes_read(pid) + time_partial = time() + print('Bytes read during loading:', bytes_partial - bytes_start) + print('Time taken to load a subset of spectra:', time_partial - time_start) + print('Max RSS:', usage.ru_maxrss, 'kB') + print('Minor page faults so far:', usage.ru_minflt) + print('Major page faults so far:', usage.ru_majflt) + + # Load the entire file + spectra = PfsCalibrated.readFits(filename) + print(f"Loaded {len(spectra)} spectra.") + + usage = resource.getrusage(resource.RUSAGE_SELF) + bytes_all = get_bytes_read(pid) + time_all = time() + print('Bytes read during loading:', bytes_all - bytes_partial) + print('Time taken to load all spectra:', time_all - time_partial) + print('Max RSS:', usage.ru_maxrss, 'kB') + print('Minor page faults so far:', usage.ru_minflt) + print('Major page faults so far:', usage.ru_majflt) + + +if __name__ == "__main__": + parser = ArgumentParser(description=""" +Transitioning to gen3 Butler resulted in combining target spectra into large FITS files which +contain many PfsSingle or PfsObject spectra. This script can be used to benchmark the IO cost +saved by partially loading PfsCalibrated or PfsCoadd spectra, rather than loading the +entire file. +""") + parser.add_argument("filename", help="Name of the file to load.") + parser.add_argument("--targetId", type=int, help="Target IDs to load (default: all).") + parser.add_argument("--targetType", type=int, help="TargetType to load (default: all).") + parser.add_argument("--catId", type=int, help="Objects with catId to load (default: all).") + args = parser.parse_args() + main(args.filename, + targetId=args.targetId) diff --git a/datamodel.txt b/datamodel.txt index 9cc4ce98..de6092c9 100644 --- a/datamodel.txt +++ b/datamodel.txt @@ -699,6 +699,64 @@ The format will be identical to that of the pfsObject files. -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +Physical parameter measurements for Galactic Archeology targets including flux-calibrated +combined spectra and synthetic stellar template fits. + + "pfsStar-%05d-%05d-%s-%016x-%03d-0x%016x.fits" + % (catId, tract, patch, objId, nVisit % 1000, pfsVisitHash) + +The format is similar to pfsObject files, but with additional HDUs and a different fluxTable format. + +HDU #j VELCORR Velocity corrections used at each visit [BINARY FITS TABLE] +HDU #i STELLARPARAMS Fundamental stellar parameter measurements [BINARY FITS TABLE] +HDU #l STELLARCOVAR Covariance matrix of stellar parameters [BINARY FITS TABLE] n*n +HDU #k ABUND Single element abundances [BINARY FITS TABLE] +HDU #m ABUNDCOVAR Covarinace matrix of single element abundances [32-bit FLOAT] n*n + +In the data tables outlined below, we allow for the possibility of multiple measurements of the same +physical parameters indicated by the `method` field but we provide the full covariance matrix for +the primary method only. The column `covarId` indicates the position of the parameter within the +corresponding covariance matrix. + +The VELCORR table lists the velocity corrections used at each visit: + + visit visit identifier 32-bit int + JD Julian Date of the visit 32-bit float + helio Heliocentric correction at the visit [km s-1] 32-bit float + bary Barycentric correction at the visit [km s-2] 32-bit float + +The STELLARPARAMS table lists the measured fundamental parameters + + method Method of measuring the parameters string + frame Reference frame for measuring the velocity string + param Stellar parameter string + covarId Index of parameter within the covariance matrix 8-bit uint + unit Physical unit of the parameter string + value Measured value of the parameter 32-bit float + valueErr Uncertainty of the measured value 32-bit float + flag Measurement flag (true means bad) bool + status Flags describing the quality of the measurement string + +The ABUND table lists the measured single element abundances + + method Method of measuring the parameters string + element Element name string + covarId Index of parameter within the covariance matrix 8-bit uint + value Measured abundance 32-bit float + valueErr Uncertainty of the measured abundance 32-bit float + flag Measurement flag (true means bad) bool + status Flags describing the quality of the measurement string + +The FLUXTABLE of pfsStar files is extended and includes the following additional columns: + + model Best fit fluxed template model 32-bit float + cont Continuum model 32-bit float + norm_flux Continuum-normalized flux 32-bit float + norm_error Error of continuum-normalized flux 32-bit float + norm_model Best fit continuum-normalized model 32-bit float + +-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + 2D spectral line measurements The 2D pipeline measures the position and flux of sky and arc lines as part of the reduction process. diff --git a/python/pfs/datamodel/__init__.py b/python/pfs/datamodel/__init__.py index 04c97a92..da81f310 100644 --- a/python/pfs/datamodel/__init__.py +++ b/python/pfs/datamodel/__init__.py @@ -22,3 +22,4 @@ from .pfsFiberNorms import * from .pfsZCandidates import * from .pfsCoZCandidates import * +from .ga import * diff --git a/python/pfs/datamodel/fluxTable.py b/python/pfs/datamodel/fluxTable.py index c60c1cd4..192827f7 100644 --- a/python/pfs/datamodel/fluxTable.py +++ b/python/pfs/datamodel/fluxTable.py @@ -38,11 +38,11 @@ class FluxTable: _hduName = "FLUX_TABLE" # HDU name to use def __init__(self, wavelength, flux, error, mask, flags): - dims = np.array([len(wavelength.shape), len(flux.shape), len(error.shape), len(mask.shape)]) - lengths = set([wavelength.shape, flux.shape, error.shape, mask.shape]) - if np.any(dims != 1) or len(lengths) > 1: - raise RuntimeError("Bad shapes for wavelength,flux,error,mask: %s,%s,%s,%s" % - (wavelength.shape, flux.shape, error.shape, mask.shape)) + self.checkShapes(wavelength=wavelength, + flux=flux, + error=error, + mask=mask) + self.wavelength = wavelength self.flux = flux self.error = error @@ -53,6 +53,17 @@ def __len__(self): """Return number of elements""" return len(self.wavelength) + def checkShapes(self, **kwargs): + keys = list(sorted(kwargs.keys())) + dims = np.array([len(kwargs[k].shape) for k in keys]) + lengths = set([kwargs[k].shape for k in keys]) + + if np.any(dims != 1) or len(lengths) > 1: + names = ','.join(keys) + shapes = ','.join([str(kwargs[k].shape) for k in keys]) + raise RuntimeError("Bad shapes for %s: %s" % + (names, shapes)) + def toFits(self, fits): """Write to a FITS file diff --git a/python/pfs/datamodel/ga.py b/python/pfs/datamodel/ga.py new file mode 100644 index 00000000..1b37b2aa --- /dev/null +++ b/python/pfs/datamodel/ga.py @@ -0,0 +1,638 @@ +import os +import numpy as np +from enum import IntFlag + +from .pfsConfig import DocEnum +from .notes import makeNotesClass, Notes +from .pfsFiberArray import PfsFiberArray +from .fluxTable import FluxTable +from .pfsTable import PfsTable, Column +from .utils import inheritDocstrings +from .utils import astropyHeaderToDict, astropyHeaderFromDict +from .utils import calculatePfsVisitHash, wraparoundNVisit +from .masks import MaskHelper +from .observations import Observations + +GA_DAMD_VER = 2 + +__all__ = [ + "TempFitFlag", + "VelocityCorrections", + "StellarParams", + "Abundances", + "MeasurementFlags", + "StarFluxTable", + "PfsStarNotes", + "PfsStar", + "StarCatalogTable", + "PfsStarCatalogNotes", + "PfsStarCatalog", +] + +class TempFitFlag(IntFlag): + """Flags for template and RV fitting.""" + + # NOTE: Keep this consistent with the flags in pfs.ga.pfsspec.tempfit.TempFitFlag + + OK = 0 # No flags + BADINIT = 1 << 0 # "Bad initial values" + NOCONVERGE = 1 << 1 # "Optimization did not convergence" + BADCONVERGE = 1 << 2 # "Bad convergence" + MAXITER = 1 << 3 # "Maximum iterations reached" + PARAMEDGE = 1 << 4 # "Template parameters at the bounds" + BADERROR = 1 << 5 # "Errors could not be calculated" + BADCOV = 1 << 6 # "Covariance matrix could not be calculated" + UNLIKELYPRIOR = 1 << 7 # "Unlikely prior" + RVGUESSMULTIMODAL = 1 << 8 # "log L(RV) during guess is multimodal" + RVFITMULTIMODAL = 1 << 9 # "log L(RV) around best fit is multimodal" + NODATA = 1 << 10 # No data to fit, possibly missing 2d output + + +class VelocityCorrections(PfsTable): + """A table of velocity corrections applied to the individual visits.""" + + damdVer = GA_DAMD_VER + schema = [ + Column("visit", np.int32, "ID of the visit these corrections apply for", -1), + Column("JD", np.float32, "Julian date of the visit", -1), + Column("helio", np.float32, "Heliocentric correction", np.nan), + Column("bary", np.float32, "Barycentric correction", np.nan), + ] + fitsExtName = 'VELCORR' + + +class StellarParams(PfsTable): + """List of measured stellar parameters for a target.""" + + damdVer = GA_DAMD_VER + schema = [ + Column("method", str, "Line-of-sight velocity measurement method", ""), + Column("frame", str, "Reference frame of velocity: helio, bary", ""), + Column("param", str, "Stellar parameter: v_los, M_H, T_eff, log_g, a_M", ""), + Column("covarId", np.uint8, "Param position within covariance matrix", -1), + Column("unit", str, "Physical unit of parameter", ""), + Column("value", np.float32, "Stellar parameter value", np.nan), + Column("valueErr", np.float32, "Stellar parameter error", np.nan), + # TODO: add quantiles or similar for MCMC results + Column("flag", bool, "Measurement flag (true means bad)", False), + Column("status", str, "Measurement flags", ""), + ] + fitsExtName = 'STELLARPARAM' + + +class Abundances(PfsTable): + """List of measured abundance parameters for stellar targets.""" + + damdVer = GA_DAMD_VER + schema = [ + Column("method", str, "Abundance measurement method", ""), + Column("element", str, "Chemical element the abundance is measured for", ""), + Column("covarId", np.uint8, "Param position within covariance matrix", -1), + Column("value", np.float32, "Abundance value", np.nan), + Column("valueErr", np.float32, "Abundance error", np.nan), + # TODO: will we have systematic errors? + Column("flag", bool, "Measurement flag (true means bad)", False), + Column("status", str, "Measurement flags", ""), + ] + fitsExtName = 'ABUND' + +class MeasurementFlags(PfsTable): + """List of measurement flags for a target, for each algorithm.""" + + damdVer = GA_DAMD_VER + schema = [ + Column("method", str, "Measurement method", ""), + Column("flag", bool, "Measurement flag (true means bad)", False), + Column("status", str, "Measurement flags", ""), + ] + fitsExtName = 'MEASFLAG' + +class StarFluxTable(FluxTable): + """Table of coadded fluxes at near-original sampling and model fits + + Merged and coadded spectra have been resampled to a standard wavelength + sampling. This representation provides coadded fluxes at approximately the + native wavelength sampling, for those that want the data with a minimum of + resampling. This is mostly of use for single exposures and coadds made from + back-to-back exposures with the same top-end configuration. For coadds made + from exposures with different top-end configurations, the different + wavelength samplings obtained from the different fibers means there's no + single native wavelength sampling, and so this is less useful. + + This is like a `pfs.datamodel.PfsSimpleSpectrum`, except that it includes a + variance array, and is written to a FITS HDU rather than a file (so it can + be incorporated within a `pfs.datamodel.PfsSpectrum`). + + Parameters + ---------- + wavelength : `numpy.ndarray` of `float` + Array of wavelengths. + flux : `numpy.ndarray` of `float` + Array of fluxes. + error : `numpy.ndarray` of `float` + Array of flux errors. + model : `numpy.ndarray` of `float` + Array of best-fit model flux. + cont : `numpy.ndarray` of `float` + Array of continuum model. + norm_flux : `numpy.ndarray` of `float` + Array of continuum-normalized flux. + norm_error : `numpy.ndarray` of `float` + Array of continuum-normalized flux error. + norm_model : `numpy.ndarray` of `float` + Array of continuum-normalized model. + mask : `numpy.ndarray` of `int` + Array of mask pixels. + flags : `pfs.datamodel.MaskHelper` + Helper for dealing with symbolic names for mask values. + """ + _hduName = "FLUX_TABLE" # HDU name to use + + def __init__(self, wavelength, flux, error, model, cont, norm_flux, norm_error, norm_model, mask, flags): + self.checkShapes(wavelength=wavelength, + flux=flux, + error=error, + model=model, + cont=cont, + norm_flux=norm_flux, + norm_error=norm_error, + norm_model=norm_model, + mask=mask) + + self.wavelength = wavelength + self.flux = flux + self.error = error + self.model = model + self.cont = cont + self.norm_flux = norm_flux + self.norm_error = norm_error + self.norm_model = norm_model + self.mask = mask + self.flags = flags + + def toFits(self, fits): + """Write to a FITS file + + Parameters + ---------- + fits : `astropy.io.fits.HDUList` + Opened FITS file. + """ + # NOTE: When making any changes to this method that modify the output + # format, increment the DAMD_VER header value and record the change in + # the versions.txt file. + from astropy.io.fits import BinTableHDU, Column + header = astropyHeaderFromDict(self.flags.toFitsHeader()) + header['DAMD_VER'] = (GA_DAMD_VER, "StarFluxTable datamodel version") + hdu = BinTableHDU.from_columns([ + Column("wavelength", "D", array=self.wavelength), + Column("flux", "E", array=self.flux), + Column("error", "E", array=self.error), + Column("model", "E", array=self.model), + Column("cont", "E", array=self.cont), + Column("norm_flux", "E", array=self.norm_flux), + Column("norm_error", "E", array=self.norm_error), + Column("norm_model", "E", array=self.norm_model), + Column("mask", "K", array=self.mask), + ], header=header, name=self._hduName) + fits.append(hdu) + + @classmethod + def fromFits(cls, fits): + """Construct from a FITS file + + Parameters + ---------- + fits : `astropy.io.fits.HDUList` + Opened FITS file. + + Returns + ------- + self : `FluxTable` + Constructed `FluxTable`. + """ + hdu = fits[cls._hduName] + header = astropyHeaderToDict(hdu.header) + flags = MaskHelper.fromFitsHeader(header) + return cls(hdu.data["wavelength"].astype(float), + hdu.data["flux"].astype(np.float32), + hdu.data["error"].astype(np.float32), + hdu.data["model"].astype(np.float32), + hdu.data["cont"].astype(np.float32), + hdu.data["norm_flux"].astype(np.float32), + hdu.data["norm_error"].astype(np.float32), + hdu.data["norm_model"].astype(np.float32), + hdu.data["mask"].astype(np.int32), + flags) + + +PfsStarNotes = makeNotesClass( + "PfsStarNotes", + [] +) + + +@inheritDocstrings +class PfsStar(PfsFiberArray): + """Coadded spectrum of a GA target with derived quantities. + + Produced by ˙˙gapipe`` + + Parameters + ---------- + target : `pfs.datamodel.Target` + Target information. + observations : `pfs.datamodel.Observations` + Observations of the target. + wavelength : `numpy.ndarray` of `float` + Array of wavelengths. + flux : `numpy.ndarray` of `float` + Array of fluxes. + mask : `numpy.ndarray` of `int` + Array of mask pixels. + sky : `numpy.ndarray` of `float` + Array of sky values. + covar : `numpy.ndarray` of `float` + Near-diagonal (diagonal and either side) part of the covariance matrix. + covar2 : `numpy.ndarray` of `float` + Low-resolution non-sparse covariance estimate. + flags : `MaskHelper` + Helper for dealing with symbolic names for mask values. + metadata : `dict` (`str`: POD), optional + Keyword-value pairs for the header. + fluxTable : `pfs.datamodel.StarFluxTable`, optional + Table of coadded fluxes and continuum-normalized flux from contributing observations. + stellarParams: `pfs.datamodel.StellarParams`, optional + Table of measured stellar parameters. + velocityCorrections: `pfs.datamodel.VelocityCorrections`, optional + Table of velocity corrections applied to the individual visits. + abundances: `pfs.datamodel.Abundances`, optional + Table of measured abundance parameters. + paramsCovar: `numpy.ndarray` of `float`, optional + Covariance matrix for stellar parameters. + abundCovar: `numpy.ndarray` of `float`, optional + Covariance matrix for abundance parameters. + measurementFlags: `pfs.datamodel.MeasurementFlags`, optional + Table of measurement flags for a target, for each algorithm. + notes : `Notes`, optional + Reduction notes. + """ + + filenameFormat = ("pfsStar-%(catId)05d-%(tract)05d-%(patch)s-%(objId)016x" + "-%(nVisit)03d-0x%(pfsVisitHash)016x.fits") + filenameRegex = r"^pfsStar-(\d{5})-(\d{5})-(.*)-([0-9a-f]{16})-(\d{3})-0x([0-9a-f]{16})\.fits.*$" + filenameKeys = [("catId", int), ("tract", int), ("patch", str), ("objId", int), + ("nVisit", int), ("pfsVisitHash", int)] + NotesClass = PfsStarNotes + FluxTableClass = StarFluxTable + + StellarParamsCovarFitsExtName = "STELLARCOVAR" + AbundancesCovarFitsExtName = "ABUNDCOVAR" + + def __init__( + self, + target, + observations, + wavelength, + flux, + mask, + sky, + covar, + covar2, + flags, + metadata=None, + fluxTable=None, + stellarParams=None, + velocityCorrections=None, + abundances=None, + paramsCovar=None, + abundCovar=None, + measurementFlags=None, + notes: Notes = None, + ): + super().__init__(target, observations, wavelength, flux, mask, sky, + covar, covar2, flags, metadata=metadata, fluxTable=fluxTable, notes=notes) + + self.stellarParams = stellarParams + self.velocityCorrections = velocityCorrections + self.abundances = abundances + self.paramsCovar = paramsCovar + self.abundCovar = abundCovar + self.measurementFlags = measurementFlags + + def validate(self): + """Validate that all the arrays are of the expected shape""" + super().validate() + + # TODO: write any validation code + + @classmethod + def _readImpl(cls, fits): + data = super()._readImpl(fits) + + if VelocityCorrections.fitsExtName in fits: + data["velocityCorrections"] = VelocityCorrections.readHdu(fits) + if StellarParams.fitsExtName in fits: + data["stellarParams"] = StellarParams.readHdu(fits) + if cls.StellarParamsCovarFitsExtName in fits: + data["paramsCovar"] = fits[cls.StellarParamsCovarFitsExtName].data.astype(np.float32) + if Abundances.fitsExtName in fits: + data["abundances"] = Abundances.readHdu(fits) + if cls.AbundancesCovarFitsExtName in fits: + data["abundCovar"] = fits[cls.AbundancesCovarFitsExtName].data.astype(np.float32) + if MeasurementFlags.fitsExtName in fits: + data["measurementFlags"] = MeasurementFlags.readHdu(fits) + + return data + + def _writeImpl(self, fits): + from astropy.io.fits import ImageHDU + + header = super()._writeImpl(fits) + + if self.velocityCorrections is not None: + self.velocityCorrections.writeHdu(fits) + if self.stellarParams is not None: + self.stellarParams.writeHdu(fits) + if self.paramsCovar is not None: + fits.append(ImageHDU(self.paramsCovar.astype(np.float32), + header=header, + name=self.StellarParamsCovarFitsExtName)) + if self.abundances is not None: + self.abundances.writeHdu(fits) + if self.abundCovar is not None: + fits.append(ImageHDU(self.abundCovar.astype(np.float32), + header=header, + name=self.AbundancesCovarFitsExtName)) + if self.measurementFlags is not None: + self.measurementFlags.writeHdu(fits) + + return header + + +class StarCatalogTable(PfsTable): + """Catalog of stellar objects with associated parameters.""" + + damdVer = GA_DAMD_VER + schema = [ + Column("catId", np.int32, "PFS catalog identifier", -1), + Column("objId", np.int32, "GA object identifier.", -1), + Column("ra", np.float32, "Right ascension ICRS [deg]", np.nan), + Column("dec", np.float32, "Declination ICRS [deg]", np.nan), + Column("epoch", str, "Coordinate epoch [Jyr]", ''), + Column("pmRa", np.float32, "Proper motion pmracosdec [mas/yr]", np.nan), + Column("pmDec", np.float32, "Proper motion dec [mas/yr]", np.nan), + Column("parallax", np.float32, "Parallax [mas]", np.nan), + Column("targetType", np.int16, "Target type.", -1), + Column("targetPriority", np.int16, "Target priority.", -1), + Column("proposalId", str, "Proposal ID", ""), + Column("obCode", str, "Observing Block", ""), + + Column("fiberId", np.int16, "Fiber identifier", -1), + Column("spectrograph", np.int16, "Spectrograph identifier", -1), + Column("nVisit", np.int16, "Number of visits", -1), + Column("pfsVisitHash", np.int64, "PFS visit hash", -1), + + Column("nVisit_b", np.int16, "Number of visits in B", -1), + Column("nVisit_m", np.int16, "Number of visits in M", -1), + Column("nVisit_r", np.int16, "Number of visits in R", -1), + Column("nVisit_n", np.int16, "Number of visits in N", -1), + + Column("expTimeEff_b", np.float32, "Effective exposure time in B [s]", np.nan), + Column("expTimeEff_m", np.float32, "Effective exposure time in M [s]", np.nan), + Column("expTimeEff_r", np.float32, "Effective exposure time in R [s]", np.nan), + Column("expTimeEff_n", np.float32, "Effective exposure time in N [s]", np.nan), + + Column("snr_b", np.float32, "Signal-to-noise ratio in B", np.nan), + Column("snr_m", np.float32, "Signal-to-noise ratio in M", np.nan), + Column("snr_r", np.float32, "Signal-to-noise ratio in R", np.nan), + Column("snr_n", np.float32, "Signal-to-noise ratio in N", np.nan), + + Column("v_los", np.float32, "Radial velocity [km/s]", np.nan), + Column("v_losErr", np.float32, "Radial velocity error [km/s]", np.nan), + Column("v_losStatus", str, "Radial velocity flag", ""), + Column("EBV", np.float32, "E(B-V) extinction", np.nan), + Column("EBVErr", np.float32, "E(B-V) extinction error", np.nan), + Column("EBVStatus", str, "E(B-V) extinction flag", ""), + Column("T_eff", np.float32, "Effective temperature [K]", np.nan), + Column("T_effErr", np.float32, "Effective temperature error [K]", np.nan), + Column("T_effStatus", str, "Effective temperature flag", ""), + Column("M_H", np.float32, "Metallicity [dex]", np.nan), + Column("M_HErr", np.float32, "Metallicity error [dex]", np.nan), + Column("M_HStatus", str, "Metallicity flag", ""), + Column("a_M", np.float32, "Alpha abundance [dex]", np.nan), + Column("a_MErr", np.float32, "Alpha abundance error [dex]", np.nan), + Column("a_MStatus", str, "Alpha abundance flag", ""), + Column("C", np.float32, "Relative carbon", np.nan), + Column("CErr", np.float32, "Relative carbon error", np.nan), + Column("CStatus", str, "Relative carbon flag", ""), + Column("log_g", np.float32, "log g", np.nan), + Column("log_gErr", np.float32, "log g error", np.nan), + Column("log_gStatus", str, "log g flag", ""), + + Column("tempfitflag", bool, "Measurement flag (true means bad)", False), + Column("tempfitstatus", str, "Measurement flags", ""), + ] + fitsExtName = 'STARCATALOG' + + +PfsStarCatalogNotes = makeNotesClass( + "PfsStarCatalogNotes", + [] +) + + +class PfsStarCatalog(): + filenameFormat = ("pfsStarCatalog-%(catId)05d-%(nVisit)03d-0x%(pfsVisitHash)016x.fits") + filenameRegex = r"^pfsStarCatalog-(\d{5})-([0-9]{3})-0x([0-9a-f]{16})\.fits.*$" + filenameKeys = [("catId", int), ("nVisit", int), ("pfsVisitHash", int)] + + NotesClass = PfsStarCatalogNotes + + def __init__( + self, + catId, + observations: Observations, + catalog: StarCatalogTable, + metadata=None, + notes: Notes = None): + + self.catId = catId + self.observations = observations + self.nVisit = wraparoundNVisit(len(observations.visit)) + self.pfsVisitHash = calculatePfsVisitHash(observations.visit) + self.catalog = catalog + self.metadata = metadata if metadata is not None else {} + self.notes = notes if notes is not None else self.NotesClass() + self.length = len(catalog) + + self.validate() + + def getIdentity(self): + """Return the identity of the catalog + + Returns + ------- + identity : `dict` + Key-value pairs that identify this catalog + """ + identity = dict( + catId=self.catId, + visit=self.observations.visit, + nVisit=self.nVisit, + pfsVisitHash=self.pfsVisitHash + ) + return identity + + def validate(self): + + # TODO: Make sure catId is the same for each object + + pass + + def __len__(self): + """Return the length of the arrays""" + return self.length + + @classmethod + def _readImpl(cls, fits): + data = {} + + version = fits[0].header["DAMD_VER"] + + if version >= GA_DAMD_VER: + data["notes"] = cls.NotesClass.readFits(fits) + + try: + observations = Observations.fromFits(fits) + data['observations'] = observations + except KeyError as exc: + # Only want to catch "Extension XXX not found." + if not exc.args[0].startswith("Extension"): + raise + data['observations'] = None + + try: + catalog = StarCatalogTable.readHdu(fits) + data['catalog'] = catalog + data['catId'] = catalog.catId[0] + except KeyError as exc: + # Only want to catch "Extension XXX not found." + if not exc.args[0].startswith("Extension"): + raise + data['catalog'] = None + + return data + + @classmethod + def readFits(cls, filename): + """ + Read from FITS file + + This API is intended for use by the LSST data butler, which handles + translating the desired identity into a filename. + + Parameters + ---------- + filename : `str` + Filename of FITS file. + + Returns + ------- + self : ``cls`` + Constructed instance, from FITS file. + """ + + import astropy.io.fits + with astropy.io.fits.open(filename) as fd: + data = cls._readImpl(fd) + return cls(**data) + + @classmethod + def read(cls, catId, pfsVisitHash, dirName="."): + """Read from file + + This API is intended for use by science users, as it allows selection + of the correct file from parameters that make sense, such as which + catId, objId, etc. + + Parameters + ---------- + identity : `dict` + Keyword-value pairs identifying the data of interest. Common keywords + include ``catId``, ``tract``, ``patch``, ``objId``. + dirName : `str`, optional + Directory from which to read. + + Returns + ------- + self : ``cls`` + Spectrum read from file. + """ + filename = os.path.join(dirName, cls.filenameFormat % (catId, pfsVisitHash)) + return cls.readFits(filename) + + def _writeImpl(self, fits): + """ + Implementation for writing to FITS file + + Parameters + ---------- + fits : `astropy.io.fits.HDUList` + List of FITS HDUs. This has a Primary HDU already, the header of + which may be supplemented with additional keywords. + """ + + from astropy.io.fits import Header + + header = Header() + + header['DAMD_VER'] = GA_DAMD_VER + + if self.observations is not None: + # Override catId from class + self.observations.catId = np.array(self.observations.num * [self.catId]) + self.observations.toFits(fits) + if self.catalog is not None: + self.catalog.writeHdu(fits) + if self.metadata is not None: + header.extend(astropyHeaderFromDict(self.metadata)) + if self.notes is not None: + self.notes.writeFits(fits) + + return header + + def writeFits(self, filename): + """Write to FITS file + + This API is intended for use by the LSST data butler, which handles + translating the desired identity into a filename. + + Parameters + ---------- + filename : `str` + Filename of FITS file. + """ + from astropy.io.fits import HDUList + fits = HDUList() + header = self._writeImpl(fits) + fits[0].header.update(header) + + with open(filename, "wb") as fd: + fits.writeto(fd) + + def write(self, dirName="."): + """Write to file + + This API is intended for use by science users, as it allows setting the + correct filename from parameters that make sense, such as which + catId, objId, etc. + + Parameters + ---------- + dirName : `str`, optional + Directory to which to write. + """ + identity = self.getIdentity() + filename = os.path.join(dirName, self.filenameFormat % identity) + return self.writeFits(filename) diff --git a/python/pfs/datamodel/pfsFiberArray.py b/python/pfs/datamodel/pfsFiberArray.py index e8929f02..9c08c9e9 100644 --- a/python/pfs/datamodel/pfsFiberArray.py +++ b/python/pfs/datamodel/pfsFiberArray.py @@ -44,6 +44,7 @@ class PfsFiberArray(PfsSimpleSpectrum): """ filenameFormat = None # Subclasses should override NotesClass: Type[Notes] # Subclasses should override + FluxTableClass: Type[FluxTable] = FluxTable # Subclasses may override def __init__( self, @@ -115,7 +116,7 @@ def _readImpl(cls, fits): data["covar"] = fits["COVAR"].data.astype(np.float32) data["covar2"] = fits["COVAR2"].data.astype(np.float32) try: - fluxTable = FluxTable.fromFits(fits) + fluxTable = cls.FluxTableClass.fromFits(fits) except KeyError as exc: # Only want to catch "Extension XXX not found." if not exc.args[0].startswith("Extension"): @@ -152,3 +153,4 @@ def _writeImpl(self, fits): self.notes.writeFits(fits) if self.fluxTable is not None: self.fluxTable.toFits(fits) + return header diff --git a/python/pfs/datamodel/pfsTargetSpectra.py b/python/pfs/datamodel/pfsTargetSpectra.py index a62dcc3c..e03257fb 100644 --- a/python/pfs/datamodel/pfsTargetSpectra.py +++ b/python/pfs/datamodel/pfsTargetSpectra.py @@ -119,13 +119,18 @@ def __contains__(self, target) -> bool: return target in self.spectra @classmethod - def readFits(cls, filename: str) -> "PfsTargetSpectra": - """Read from FITS file + def readFits(cls, filename: str, **kwargs) -> "PfsTargetSpectra": + """Read a collection of spectra from a FITS file, optionally + filtered by keyword arguments. Parameters ---------- filename : `str` Filename of FITS file. + **kwargs : `dict` + Keyword arguments to filter the spectra. The keys supported depend on the + subclass of `PfsTargetSpectra`. For example, the `PfsFiberArray` class supports + filtering by `targetType`, `catId`, and `objId`. Returns ------- @@ -133,7 +138,7 @@ def readFits(cls, filename: str) -> "PfsTargetSpectra": Constructed instance, from FITS file. """ - def readData(hduName: str, dtype: type) -> Union[np.ndarray, List[np.ndarray]]: + def readData(hduName: str, dtype: type, mask=None) -> Union[np.ndarray, List[np.ndarray]]: """Read data from FITS file Parameters @@ -142,6 +147,8 @@ def readData(hduName: str, dtype: type) -> Union[np.ndarray, List[np.ndarray]]: Name of the HDU. dtype : `type` Data type. + mask : `np.ndarray`, optional + Mask to apply to the data. If provided, the data will be read as a masked array. Returns ------- @@ -150,35 +157,87 @@ def readData(hduName: str, dtype: type) -> Union[np.ndarray, List[np.ndarray]]: """ hdu = fits[hduName] if isinstance(hdu, (ImageHDU, CompImageHDU)): - return hdu.data.astype(dtype) - numRows = len(hdu.data.dtype) - if "row_0" in hdu.data.dtype.names: - data: List[np.ndarray] = [] - for ii in range(len(hdu.data)): - data.append(np.array([hdu.data[f"row_{jj}"][ii] for jj in range(numRows)], dtype=dtype)) - return data - if numRows == 1: - return [row["value"].astype(dtype) for row in hdu.data] - raise RuntimeError(f"Cannot read data from HDU {hduName}") + if mask is None: + return hdu.data.astype(dtype) + else: + if hdu.data.ndim == 1: + # Same for all spectra, such as wavelength, do not filter + return hdu.data.astype(dtype) + else: + # Different for each spectrum, such as flux, do filter + return hdu.data[mask].astype(dtype) + elif isinstance(hdu, BinTableHDU): + # This is a special case of storing variable length arrays in a table. + # Rows of 2D arrays are stored as columns of a table (e.g. covariance) + numRows = len(hdu.data.dtype) + if "row_0" in hdu.data.dtype.names: + data: List[np.ndarray] = [] + for ii in range(len(hdu.data)): + data.append(np.array([hdu.data[f"row_{jj}"][ii] for jj in range(numRows)], dtype=dtype)) + return data + if numRows == 1: + # 1D array stored as a single column + if mask is None: + return [row["value"].astype(dtype) for row in hdu.data] + else: + return [row.astype(dtype) for row in hdu.data["value"][mask]] + else: + # 2D array stored as multiple columns + data: List[np.ndarray] = [] + if mask is not None: + index = np.where(mask)[0] + else: + index = range(len(hdu.data)) + + for ii in index: + data.append(np.array( + [hdu.data[f"row_{jj}"][ii] for jj in range(numRows)], + dtype=dtype)) + + return data + # raise RuntimeError(f"Cannot read data from HDU {hduName}") + else: + raise TypeError(f"HDU '{hduName}' is not an ImageHDU, CompImageHDU or BinTableHDU.") spectra = [] with astropy.io.fits.open(filename) as fits: - header = astropyHeaderToDict(fits[0].header) + # Read the main headers and the list of targets. + header = fits[0].header targetHdu = fits["TARGET"].data + + # If any keyword arguments are provided, filter the spectra + mask = None + for key, value in kwargs.items(): + if key not in targetHdu.columns.names: + raise KeyError(f"Keyword argument '{key}' not found in TARGET HDU.") + + if targetHdu.dtype[key].kind == "V": + # Handle variable length string columns + m = np.array([''.join(v) for v in targetHdu[key]]) == value + else: + m = np.array(targetHdu[key]) == value + + mask = m if mask is None else mask & m + targetFluxHdu = fits["TARGETFLUX"].data observationsHdu = fits["OBSERVATIONS"].data - wavelengthData = readData("WAVELENGTH", np.float64) - fluxData = readData("FLUX", np.float32) - maskData = readData("MASK", np.int32) - skyData = readData("SKY", np.float32) - covarData = readData("COVAR", np.float32) - covar2Hdu = fits["COVAR2"].data if "COVAR2" in fits else None + wavelengthData = readData("WAVELENGTH", np.float64, mask=mask) + fluxData = readData("FLUX", np.float32, mask=mask) + maskData = readData("MASK", np.int32, mask=mask) + skyData = readData("SKY", np.float32, mask=mask) + covarData = readData("COVAR", np.float32, mask=mask) + covar2Data = readData("COVAR2", np.float32, mask=mask) if "COVAR2" in fits else None metadataHdu = fits["METADATA"].data fluxTableHdu = fits["FLUXTABLE"].data notesTable = cls.NotesClass.readHdu(fits) - for ii, row in enumerate(targetHdu): - targetId = row["targetId"] + if mask is None: + targets = enumerate(targetHdu.targetId) + else: + targets = zip(np.where(mask)[0], targetHdu.targetId[mask]) + + for ii, (index, targetId) in enumerate(targets): + row = targetHdu[index] select = targetFluxHdu.targetId == targetId fiberFlux = dict( zip( @@ -216,7 +275,7 @@ def readData(hduName: str, dtype: type) -> Union[np.ndarray, List[np.ndarray]]: expTime, ) - metadataRow = metadataHdu[ii] + metadataRow = metadataHdu[index] assert metadataRow["targetId"] == targetId metadata = yaml.load( @@ -226,7 +285,7 @@ def readData(hduName: str, dtype: type) -> Union[np.ndarray, List[np.ndarray]]: ) flags = MaskHelper.fromFitsHeader(metadata, strip=True) - fluxTableRow = fluxTableHdu[ii] + fluxTableRow = fluxTableHdu[index] assert fluxTableRow["targetId"] == targetId fluxTable = FluxTable( fluxTableRow["wavelength"], @@ -237,7 +296,7 @@ def readData(hduName: str, dtype: type) -> Union[np.ndarray, List[np.ndarray]]: ) notes = cls.PfsFiberArrayClass.NotesClass( - **{col.name: getattr(notesTable, col.name)[ii] for col in notesTable.schema} + **{col.name: getattr(notesTable, col.name)[index] for col in notesTable.schema} ) # Wavelength: we might write a single array if everything has the same length and value @@ -255,7 +314,7 @@ def readData(hduName: str, dtype: type) -> Union[np.ndarray, List[np.ndarray]]: maskData[ii], skyData[ii], covarData[ii], - covar2Hdu[ii] if covar2Hdu is not None else [], + covar2Data[ii] if covar2Data is not None else [], flags, metadata, fluxTable, diff --git a/tests/test_PfsStar.py b/tests/test_PfsStar.py new file mode 100644 index 00000000..2ccc5485 --- /dev/null +++ b/tests/test_PfsStar.py @@ -0,0 +1,191 @@ +import os +import re + +import numpy as np + +import lsst.utils.tests + +from pfs.datamodel import Target, TargetType +from pfs.datamodel import Observations +from pfs.datamodel import MaskHelper +from pfs.datamodel import StarFluxTable +from pfs.datamodel import PfsStar, StellarParams, Abundances, VelocityCorrections + +class PfsStarTestCase(lsst.utils.tests.TestCase): + """ Check the format of example datamodel files are + consistent with that specified in the corresponding + datamodel classes. + """ + + def makePfsStar(self): + """Construct a PfsStar with dummy values for testing.""" + + catId = 12345 + tract = 1 + patch = '1,1' + objId = 123456789 + ra = -100.63654 + dec = -68.591576 + targetType = TargetType.SCIENCE + + target = Target(catId, tract, patch, objId, ra, dec, targetType) + + visit = np.array([83219, 83220]) + arm = np.array(['b', 'm']) + spectrograph = np.array([1, 1]) + pfsDesignId = np.array([8854764194165386399, 8854764194165386400]) + fiberId = np.array([476, 476]) + pfiNominal = np.array([[ra, dec], [ra, dec]]) + pfiCenter = np.array([[ra, dec], [ra, dec]]) + + observations = Observations(visit, arm, spectrograph, pfsDesignId, fiberId, pfiNominal, pfiCenter) + + npix = 4096 + wavelength = np.concatenate([ + np.linspace(380, 650, npix, dtype=np.float32), + np.linspace(710, 885, npix, dtype=np.float32) + ]) + flux = np.zeros_like(wavelength) + error = np.zeros_like(wavelength) + model = np.zeros_like(wavelength) + cont = np.zeros_like(wavelength) + norm_flux = np.zeros_like(wavelength) + norm_error = np.zeros_like(wavelength) + norm_model = np.zeros_like(wavelength) + mask = np.zeros_like(wavelength, dtype=np.int32) + sky = np.zeros_like(wavelength) + covar = np.zeros((3, wavelength.size), dtype=np.float32) # Tridiagonal covariance matrix of flux + covar2 = np.zeros((1, 1), dtype=np.float32) # ? + + flags = MaskHelper() # + metadata = {} # Key-value pairs to put in the header + fluxTable = StarFluxTable(wavelength, flux, error, model, cont, + norm_flux, norm_error, norm_model, + mask, flags) + + stellarParams = StellarParams( + method=np.array(['tempfit', 'tempfit', 'tempfit', 'tempfit', 'tempfit']), + frame=np.array(['helio', '', '', '', '']), + param=np.array(['v_los', 'Fe_H', 'T_eff', 'log_g', 'a_Fe']), + covarId=np.array([0, 1, 2, 3, 4]), + unit=np.array(['km s-1', 'dex', 'K', '', 'dex']), + value=np.array([0.0, 0.0, 0.0, 0.0, 0.0]), + valueErr=np.array([0.0, 0.0, 0.0, 0.0, 0.0]), + flag=np.array([False, False, False, False, False]), + status=np.array(['', '', '', '', '']), + ) + + velocityCorrections = VelocityCorrections( + visit=visit, + JD=np.zeros_like(visit, dtype=np.float32), + helio=np.zeros_like(visit, dtype=np.float32), + bary=np.zeros_like(visit, dtype=np.float32), + ) + + abundances = Abundances( + method=np.array(['chemfit', 'chemfit', 'chemfit']), + element=np.array(['Mg', 'Ti', 'Si']), + covarId=np.array([0, 1, 2]), + value=np.array([0.0, 0.0, 0.0]), + valueErr=np.array([0.0, 0.0, 0.0]), + flag=np.array([False, False, False]), + status=np.array(['', '', '']), + ) + + paramsCovar = np.eye(3, dtype=np.float32) + abundCovar = np.eye(4, dtype=np.float32) + notes = None + + return PfsStar(target, observations, + wavelength, flux, mask, sky, covar, covar2, + flags, metadata, + fluxTable, + stellarParams, + velocityCorrections, + abundances, + paramsCovar, + abundCovar, + notes) + + def assertPfsStar(self, lhs, rhs): + np.testing.assert_array_equal(lhs.observations.visit, rhs.observations.visit) + + # TODO: add more tests here + + def extractAttributes(self, cls, fileName): + matches = re.search(cls.filenameRegex, fileName) + if not matches: + self.fail( + "Unable to parse filename: {} using regex {}" + .format(fileName, cls.filenameRegex)) + + # Cannot use algorithm in PfsSpectra._parseFilename(), + # specifically cls.filenameKeys, due to ambiguity in parsing + # integers in hex format (eg objId). Need to parse cls.filenameFormat + ff = re.search(r'^[a-zA-Z]+(.*)\.fits', cls.filenameFormat)[1] + cmps = re.findall(r'-{0,1}(0x){0,1}\%\((\w+)\)\d*(\w)', ff) + fmts = [(kk, tt) for ox, kk, tt in cmps] + + d = {} + for (kk, tt), vv in zip(fmts, matches.groups()): + if tt == 'd': + ii = int(vv) + elif tt == 'x': + ii = int(vv, 16) + elif tt == 's': + ii = vv + d[kk] = ii + return d + + def testFilenameRegex(self): + d = self.extractAttributes( + PfsStar, + 'pfsStar-07621-01234-2,2-02468ace1234abcd-003-0x0123456789abcdef.fits') + self.assertEqual(d['catId'], 7621) + self.assertEqual(d['tract'], 1234) + self.assertEqual(d['patch'], '2,2') + self.assertEqual(d['objId'], 163971054118939597) + self.assertEqual(d['nVisit'], 3) + self.assertEqual(d['pfsVisitHash'], 81985529216486895) + + def testGetIdentity(self): + """Construct a PfsStar and get its identity.""" + + pfsStar = self.makePfsStar() + identity = pfsStar.getIdentity() + filename = pfsStar.filenameFormat % identity + + self.assertEqual('pfsStar-12345-00001-1,1-00000000075bcd15-002-0x05a95bc24d8ce16f.fits', filename) + + def testValidate(self): + """Construct a PfsStar and run validation.""" + + pfsStar = self.makePfsStar() + pfsStar.validate() + + def testWriteFitsFromFits(self): + """Construct a PfsStar and save it to a FITS file.""" + + pfsStar = self.makePfsStar() + + dirName = os.path.splitext(__file__)[0] + if not os.path.exists(dirName): + os.makedirs(dirName) + + id = pfsStar.getIdentity() + filename = os.path.join(dirName, pfsStar.filenameFormat % id) + if os.path.exists(filename): + os.unlink(filename) + + pfsStar.writeFits(filename) + other = PfsStar.readFits(filename) + self.assertPfsStar(pfsStar, other) + + # try: + # pfsStar.writeFits(filename) + # other = PfsStar.readFits(filename) + # self.assertPfsStar(pfsStar, other) + # except Exception as e: + # raise e + # finally: + # os.unlink(filename) diff --git a/tests/test_PfsStarCatalog.py b/tests/test_PfsStarCatalog.py new file mode 100644 index 00000000..bb0ce07d --- /dev/null +++ b/tests/test_PfsStarCatalog.py @@ -0,0 +1,166 @@ +import os +import re + +import numpy as np + +import lsst.utils.tests + +from pfs.datamodel import TargetType, Observations +from pfs.datamodel import PfsStarCatalog, StarCatalogTable +from pfs.datamodel.pfsTable import Column + + +class PfsStarCatalogTestCase(lsst.utils.tests.TestCase): + """ Check the format of example datamodel files are + consistent with that specified in the corresponding + datamodel classes. + """ + + def makePfsStarCatalog(self): + """Construct a PfsStarCatalog with dummy values for testing.""" + + catId = 12345 + + observations = Observations( + visit=np.array([10000, 10001, 10002, 10005]), + arm=np.array(['bmn', 'bmn', 'bmn', 'bmn']), + spectrograph=np.array(4 * [-1]), + pfsDesignId=np.array([100001, 100002, 100003, 100004]), + fiberId=np.array(4 * [-1]), + pfiNominal=np.array(4 * [2 * [np.nan]]), + pfiCenter=np.array(4 * [2 * [np.nan]]), + obsTime=np.array(['2025-01-10 12:00:00', '2025-01-10 12:15:00', + '2025-01-10 12:30:00', '2025-01-10 13:00:00']), + expTime=np.array([1800.0, 1800.0, 1800.0, 1800.0]), + ) + + catalog = StarCatalogTable( + catId=np.array([10001, 10001, 10001]), + objId=np.array([1, 2, 3]), + ra=np.array([210.01, 210.02, 210.03]), + dec=np.array([67.1, 67.2, 67.3]), + epoch=np.array(['J2016.0', 'J2016.0', 'J2016.0']), + pmRa=np.array([0.0, 0.0, 0.0]), + pmDec=np.array([0.0, 0.0, 0.0]), + parallax=np.array([0.0, 0.0, 0.0]), + targetType=np.array([TargetType.SCIENCE, TargetType.SCIENCE, TargetType.SCIENCE]), + targetPriority=np.array([1, 1, 1]), + proposalId=np.array(['A', 'A', 'A']), + obCode=np.array(['A', 'B', 'C']), + + fiberId=np.array([-1, -1, -1]), + nVisit=np.array([3, 3, 3]), + pfsVisitHash=np.array([81985529216486895, 81985529216486895, 81985529216486895]), + + nVisit_b=np.array([3, 3, 3]), + nVisit_m=np.array([3, 3, 3]), + nVisit_r=np.array([3, 3, 3]), + nVisit_n=np.array([3, 3, 3]), + + expTimeEff_b=np.array([5400, 5400, 5400]), + expTimeEff_m=np.array([5400, 5400, 5400]), + expTimeEff_r=np.array([5400, 5400, 5400]), + expTimeEff_n=np.array([5400, 5400, 5400]), + + snr_b=np.array([0.0, 0.0, 0.0]), + snr_m=np.array([0.0, 0.0, 0.0]), + snr_r=np.array([0.0, 0.0, 0.0]), + snr_n=np.array([0.0, 0.0, 0.0]), + + v_los=np.array([0.0, 0.0, 0.0]), + v_losErr=np.array([0.0, 0.0, 0.0]), + v_losStatus=np.array(['', '', '']), + EBV=np.array([0.0, 0.0, 0.0]), + EBVErr=np.array([0.0, 0.0, 0.0]), + EBVStatus=np.array(['', '', '']), + T_eff=np.array([0.0, 0.0, 0.0]), + T_effErr=np.array([0.0, 0.0, 0.0]), + T_effStatus=np.array(['', '', '']), + M_H=np.array([0.0, 0.0, 0.0]), + M_HErr=np.array([0.0, 0.0, 0.0]), + M_HStatus=np.array(['', '', '']), + a_M=np.array([0.0, 0.0, 0.0]), + a_MErr=np.array([0.0, 0.0, 0.0]), + a_MStatus=np.array(['', '', '']), + C=np.array([0.0, 0.0, 0.0]), + CErr=np.array([0.0, 0.0, 0.0]), + CStatus=np.array(['', '', '']), + log_g=np.array([0.0, 0.0, 0.0]), + log_gErr=np.array([0.0, 0.0, 0.0]), + log_gStatus=np.array(['', '', '']), + + flag=np.array([False, False, False]), + status=np.array(['', '', '']), + ) + + return PfsStarCatalog( + catId, observations, catalog, metadata=None, notes=None + ) + + def assertPfsStarCatalog(self, lhs, rhs): + np.testing.assert_array_equal(lhs.observations.visit, rhs.observations.visit) + + # TODO: add more tests here + + def extractAttributes(self, cls, fileName): + matches = re.search(cls.filenameRegex, fileName) + if not matches: + self.fail( + "Unable to parse filename: {} using regex {}" + .format(fileName, cls.filenameRegex)) + + # Cannot use algorithm in PfsSpectra._parseFilename(), + # specifically cls.filenameKeys, due to ambiguity in parsing + # integers in hex format (eg objId). Need to parse cls.filenameFormat + ff = re.search(r'^[a-zA-Z]+(.*)\.fits', cls.filenameFormat)[1] + cmps = re.findall(r'-{0,1}(0x){0,1}\%\((\w+)\)\d*(\w)', ff) + fmts = [(kk, tt) for ox, kk, tt in cmps] + + d = {} + for (kk, tt), vv in zip(fmts, matches.groups()): + if tt == 'd': + ii = int(vv) + elif tt == 'x': + ii = int(vv, 16) + elif tt == 's': + ii = vv + d[kk] = ii + return d + + def testFilenameRegex(self): + d = self.extractAttributes( + PfsStarCatalog, + 'pfsStarCatalog-07621-003-0x0123456789abcdef.fits') + self.assertEqual(d['catId'], 7621) + self.assertEqual(d['nVisit'], 3) + self.assertEqual(d['pfsVisitHash'], 81985529216486895) + + def testValidate(self): + """Construct a PfsStarCatalog and run validation.""" + + pfsStarCatalog = self.makePfsStarCatalog() + pfsStarCatalog.validate() + + def testWriteFitsFromFits(self): + """Construct a PfsStarCatalog and save it to a FITS file.""" + + pfsStarCatalog = self.makePfsStarCatalog() + + dirName = os.path.splitext(__file__)[0] + if not os.path.exists(dirName): + os.makedirs(dirName) + + id = pfsStarCatalog.getIdentity() + filename = os.path.join(dirName, pfsStarCatalog.filenameFormat % id) + if os.path.exists(filename): + os.unlink(filename) + + try: + pfsStarCatalog.writeFits(filename) + other = PfsStarCatalog.readFits(filename) + self.assertPfsStarCatalog(pfsStarCatalog, other) + except Exception as e: + raise e + finally: + if os.path.exists(filename): + os.unlink(filename) diff --git a/tests/test_PfsTargetSpectra.py b/tests/test_PfsTargetSpectra.py index 710c65ed..9933d6bc 100644 --- a/tests/test_PfsTargetSpectra.py +++ b/tests/test_PfsTargetSpectra.py @@ -349,6 +349,7 @@ def testLookup(self): def testIO(self): """Test I/O functionality""" + spectra = self.makePfsTargetSpectra(list(range(123))) with lsst.utils.tests.getTempFilePath(".fits") as filename: spectra.writeFits(filename) @@ -359,6 +360,23 @@ def testIO(self): with astropy.io.fits.open(filename) as hdus: self.assertIn("DAMD_VER", hdus[0].header) + def testIOWithFilters(self): + """Test I/O functionality with filters""" + + spectra = self.makePfsTargetSpectra(list(range(123)), catId=itertools.cycle([12345, 12346, 12347])) + with lsst.utils.tests.getTempFilePath(".fits") as filename: + spectra.writeFits(filename) + + # Filter single spectrum by objId + copy = PfsCoadd.readFits(filename, objId=22) + self.assertPfsTargetSpectraEqual(copy, PfsCoadd([spectra[22]])) + + # Filter multiple spectra by catId + copy = PfsCoadd.readFits(filename, catId=12345) + for (_, left), right in zip(copy.items(), [s for t, s in spectra.items() if t.catId == 12345]): + self.assertTargetEqual(left.target, right.target) + self.assertPfsFiberArrayEqual(left, right) + def testDifferentLengths(self): """Test I/O when the spectra have different lengths""" spectra = self.makePfsTargetSpectra(list(range(123)), length=itertools.cycle([999, 1000, 1001])) @@ -367,6 +385,30 @@ def testDifferentLengths(self): copy = PfsCoadd.readFits(filename) self.assertPfsTargetSpectraEqual(copy, spectra) + def testDifferentLengthsWithFilters(self): + """Test I/O when the spectra have different lengths and filters are used""" + spectra = self.makePfsTargetSpectra( + list(range(123)), + catId=itertools.cycle([12345, 12346, 12347]), + length=itertools.cycle([999, 1000, 1001, 1002, 1003]) + ) + with lsst.utils.tests.getTempFilePath(".fits") as filename: + spectra.writeFits(filename) + + # Filter single spectrum by objId + for ii in range(22, 27): + copy = PfsCoadd.readFits(filename, objId=ii) + self.assertPfsTargetSpectraEqual(copy, PfsCoadd([spectra[ii]])) + + # Filter multiple spectra by catId + copy = PfsCoadd.readFits(filename, catId=12345) + for (_, left), right in zip( + copy.items(), + [s for t, s in spectra.items() if t.catId == 12345]): + + self.assertTargetEqual(left.target, right.target) + self.assertPfsFiberArrayEqual(left, right) + class TestMemory(lsst.utils.tests.MemoryTestCase): pass diff --git a/tests/test_docstrings.py b/tests/test_docstrings.py index 4e590fd9..564372e0 100644 --- a/tests/test_docstrings.py +++ b/tests/test_docstrings.py @@ -5,18 +5,21 @@ import lsst.utils.tests from pfs.datamodel.drp import PfsArm, PfsMerged, PfsReference, PfsSingle, PfsObject +from pfs.datamodel import PfsStar, PfsStarCatalog class DocstringsTestCase(unittest.TestCase): def testDocstrings(self): - for cls in (PfsArm, PfsMerged, PfsReference, PfsSingle, PfsObject): + for cls in (PfsArm, PfsMerged, PfsReference, PfsSingle, PfsObject, PfsStar, PfsStarCatalog): for name, attr in inspect.getmembers(cls): if not hasattr(attr, "__doc__") or not attr.__doc__: continue docstring = attr.__doc__ for base in cls.__mro__[1:-1]: - self.assertNotIn(base.__name__, docstring, - f"{cls.__name__}.{name}.__doc__ contains {base.__name__}: {docstring}") + if not name.endswith("Class"): + self.assertNotIn( + base.__name__, docstring, + f"{cls.__name__}.{name}.__doc__ contains {base.__name__}: {docstring}") class TestMemory(lsst.utils.tests.MemoryTestCase):