From 2ce6373f8aa36b918a421abc0e684fb5ca12339a Mon Sep 17 00:00:00 2001 From: Paul Price Date: Tue, 3 May 2022 14:13:48 -0400 Subject: [PATCH] I/O: use double precision only for wavelength Wavelength needs to be in double precision in order to preserve the precision in the measurements, but other quantities (esp. flux) don't need double precision, and reverting them to single precision saves disk space. --- python/pfs/datamodel/fluxTable.py | 8 +++--- python/pfs/datamodel/pfsFiberArray.py | 14 +++++----- python/pfs/datamodel/pfsFiberArraySet.py | 32 ++++++++++++++--------- python/pfs/datamodel/pfsFiberProfiles.py | 6 ++--- python/pfs/datamodel/pfsFluxReference.py | 2 +- python/pfs/datamodel/pfsSimpleSpectrum.py | 4 +-- 6 files changed, 38 insertions(+), 28 deletions(-) diff --git a/python/pfs/datamodel/fluxTable.py b/python/pfs/datamodel/fluxTable.py index 9ecfefdd..c60c1cd4 100644 --- a/python/pfs/datamodel/fluxTable.py +++ b/python/pfs/datamodel/fluxTable.py @@ -69,8 +69,8 @@ def toFits(self, fits): header['DAMD_VER'] = (1, "FluxTable datamodel version") hdu = BinTableHDU.from_columns([ Column("wavelength", "D", array=self.wavelength), - Column("flux", "D", array=self.flux), - Column("error", "D", array=self.error), + Column("flux", "E", array=self.flux), + Column("error", "E", array=self.error), Column("mask", "K", array=self.mask), ], header=header, name=self._hduName) fits.append(hdu) @@ -92,6 +92,6 @@ def fromFits(cls, fits): hdu = fits[cls._hduName] header = astropyHeaderToDict(hdu.header) flags = MaskHelper.fromFitsHeader(header) - return cls(hdu.data["wavelength"].astype(float), hdu.data["flux"].astype(float), - hdu.data["error"].astype(float), hdu.data["mask"].astype(np.int32), + return cls(hdu.data["wavelength"].astype(float), hdu.data["flux"].astype(np.float32), + hdu.data["error"].astype(np.float32), hdu.data["mask"].astype(np.int32), flags) diff --git a/python/pfs/datamodel/pfsFiberArray.py b/python/pfs/datamodel/pfsFiberArray.py index d6079e4a..0df6b461 100644 --- a/python/pfs/datamodel/pfsFiberArray.py +++ b/python/pfs/datamodel/pfsFiberArray.py @@ -1,3 +1,5 @@ +import numpy as np + from .pfsSimpleSpectrum import PfsSimpleSpectrum from .utils import wraparoundNVisit, inheritDocstrings from .fluxTable import FluxTable @@ -88,10 +90,10 @@ def _readImpl(cls, fits): Keyword arguments for constructing spectrum. """ data = super()._readImpl(fits) - data["sky"] = fits["SKY"].data.astype(float) + data["sky"] = fits["SKY"].data.astype(np.float32) data["observations"] = Observations.fromFits(fits) - data["covar"] = fits["COVAR"].data.astype(float) - data["covar2"] = fits["COVAR2"].data.astype(float) + data["covar"] = fits["COVAR"].data.astype(np.float32) + data["covar2"] = fits["COVAR2"].data.astype(np.float32) try: fluxTable = FluxTable.fromFits(fits) except KeyError as exc: @@ -118,9 +120,9 @@ def _writeImpl(self, fits): from astropy.io.fits import ImageHDU header = super()._writeImpl(fits) header["DAMD_VER"] = (1, "PfsFiberArray datamodel version") - fits.append(ImageHDU(self.sky, header=header, name="SKY")) - fits.append(ImageHDU(self.covar, header=header, name="COVAR")) - fits.append(ImageHDU(self.covar2, name="COVAR2")) + fits.append(ImageHDU(self.sky.astype(np.float32), header=header, name="SKY")) + fits.append(ImageHDU(self.covar.astype(np.float32), header=header, name="COVAR")) + fits.append(ImageHDU(self.covar2.astype(np.float32), name="COVAR2")) self.observations.toFits(fits) if self.fluxTable is not None: self.fluxTable.toFits(fits) diff --git a/python/pfs/datamodel/pfsFiberArraySet.py b/python/pfs/datamodel/pfsFiberArraySet.py index d944cb18..21cd3a0e 100644 --- a/python/pfs/datamodel/pfsFiberArraySet.py +++ b/python/pfs/datamodel/pfsFiberArraySet.py @@ -220,9 +220,9 @@ def readFits(cls, filename): data["metadata"] = astropyHeaderToDict(fd[0].header) for attr, dtype in (("fiberId", np.int32), ("wavelength", float), - ("flux", float), - ("mask", np.int32), - ("sky", float), + ("flux", np.float32), + ("mask", np.uint32), + ("sky", np.float32), # "norm" is treated separately, for backwards-compatibility ("covar", float) ): @@ -230,9 +230,9 @@ def readFits(cls, filename): data[attr] = fd[hduName].data.astype(dtype) version = fd[0].header["DAMD_VER"] if version >= 2: - data["norm"] = fd["NORM"].data.astype(float) + data["norm"] = fd["NORM"].data.astype(np.float32) else: - data["norm"] = np.ones_like(data["flux"], dtype=float) + data["norm"] = np.ones_like(data["flux"], dtype=np.float32) data["identity"] = Identity.fromFits(fd) data["flags"] = MaskHelper.fromFitsHeader(data["metadata"]) @@ -283,10 +283,18 @@ def writeFits(self, filename): header = astropyHeaderFromDict(header) header['DAMD_VER'] = (2, "PfsFiberArraySet datamodel version") fits.append(astropy.io.fits.PrimaryHDU(header=header)) - for attr in ("fiberId", "wavelength", "flux", "mask", "sky", "norm", "covar"): + for attr, dtype in ( + ("fiberId", np.int32), + ("wavelength", float), + ("flux", np.float32), + ("mask", np.uint32), + ("sky", np.float32), + ("norm", np.float32), + ("covar", np.float32) + ): hduName = attr.upper() data = getattr(self, attr) - fits.append(astropy.io.fits.ImageHDU(data, name=hduName)) + fits.append(astropy.io.fits.ImageHDU(data.astype(dtype), name=hduName)) self.identity.toFits(fits) with open(filename, "wb") as fd: @@ -332,11 +340,11 @@ def fromMerge(cls, spectraList, metadata=None): length = length.pop() fiberId = np.empty(num, dtype=int) wavelength = np.empty((num, length), dtype=float) - flux = np.empty((num, length), dtype=float) - mask = np.empty((num, length), dtype=int) - sky = np.empty((num, length), dtype=float) - norm = np.empty((num, length), dtype=float) - covar = np.empty((num, 3, length), dtype=float) + flux = np.empty((num, length), dtype=np.float32) + mask = np.empty((num, length), dtype=np.int32) + sky = np.empty((num, length), dtype=np.float32) + norm = np.empty((num, length), dtype=np.float32) + covar = np.empty((num, 3, length), dtype=np.float32) index = 0 for ss in spectraList: select = slice(index, index + len(ss)) diff --git a/python/pfs/datamodel/pfsFiberProfiles.py b/python/pfs/datamodel/pfsFiberProfiles.py index 78c3a383..11586f99 100644 --- a/python/pfs/datamodel/pfsFiberProfiles.py +++ b/python/pfs/datamodel/pfsFiberProfiles.py @@ -36,7 +36,7 @@ class PfsFiberProfiles: that fiber. ``P = int(2*(radius + 1)*oversample) + 1``. The profile arrays may be of type `numpy.ma.masked_array`, in order to indicate profile values that should be ignored. - norm : iterable (length ``N``) of array_like of `float` (length ``Q``) + norm : iterable (length ``N``) of array_like of `float32` (length ``Q``) Normalisation to apply when extracting spectrum from the image. ``Q`` is the height of the detector; or it may be ``0`` if no normalisation is to be applied. @@ -156,7 +156,7 @@ def _readImpl(cls, fits, identity): fiberId1 = hdu.data["fiberId"].astype(np.int32) radius = hdu.data["radius"].astype(np.int32) oversample = hdu.data["oversample"].astype(float) - norm = [nn.astype(float) for nn in hdu.data["norm"]] + norm = [nn.astype(np.float32) for nn in hdu.data["norm"]] hdu = fits["PROFILES"] fiberId2 = hdu.data["fiberId"].astype(np.int32) @@ -278,7 +278,7 @@ def _writeImpl(self): astropy.io.fits.Column("fiberId", format="J", array=self.fiberId), astropy.io.fits.Column("radius", format="J", array=self.radius), astropy.io.fits.Column("oversample", format="D", array=self.oversample), - astropy.io.fits.Column("norm", format="PD()", array=self.norm), + astropy.io.fits.Column("norm", format="PE()", array=self.norm), ], name="FIBERS") fibersHdu.header["INHERIT"] = True diff --git a/python/pfs/datamodel/pfsFluxReference.py b/python/pfs/datamodel/pfsFluxReference.py index 9588feeb..97262a02 100644 --- a/python/pfs/datamodel/pfsFluxReference.py +++ b/python/pfs/datamodel/pfsFluxReference.py @@ -204,7 +204,7 @@ def fits_getdata(hdulist, name, dtype=None, needHeader=False): with astropy.io.fits.open(filename) as fd: data["fiberId"] = fits_getdata(fd, "fiberId", dtype=np.int32) - data["flux"], wcsHeader = fits_getdata(fd, "flux", dtype=np.float, needHeader=True) + data["flux"], wcsHeader = fits_getdata(fd, "flux", dtype=np.float32, needHeader=True) data["fitFlag"], flagHeader = fits_getdata(fd, "fitFlag", dtype=np.int32, needHeader=True) data["fitParams"] = fits_getdata(fd, "fitParams") data["identity"] = Identity.fromFits(fd) diff --git a/python/pfs/datamodel/pfsSimpleSpectrum.py b/python/pfs/datamodel/pfsSimpleSpectrum.py index 9710857d..ad587c03 100644 --- a/python/pfs/datamodel/pfsSimpleSpectrum.py +++ b/python/pfs/datamodel/pfsSimpleSpectrum.py @@ -80,7 +80,7 @@ def _readImpl(cls, fits): Keyword arguments for constructing spectrum. """ data = {} - data["flux"] = fits["FLUX"].data.astype(float) + data["flux"] = fits["FLUX"].data.astype(np.float32) data["mask"] = fits["MASK"].data.astype(np.int32) # Wavelength can be specified in an explicit extension, or as a WCS in the header @@ -174,7 +174,7 @@ def _writeImpl(self, fits): if self.metadata: header.extend(astropyHeaderFromDict(self.metadata)) - fits.append(ImageHDU(self.flux, header=header, name="FLUX")) + fits.append(ImageHDU(self.flux.astype(np.float32), header=header, name="FLUX")) maskHeader = astropyHeaderFromDict(self.flags.toFitsHeader()) maskHeader.extend(header) fits.append(ImageHDU(self.mask, header=maskHeader, name="MASK"))