LSSTDESC · jtmyles · Mar 18, 2026 · Aug 11, 2025 · Aug 11, 2025 · Mar 2, 2026
diff --git a/README.md b/README.md
@@ -9,7 +9,21 @@
 **sompz** - RAIL estimator, summarizer, and classifier using the SOMPZ method described in [Buchs, Davis, et al. 2019](https://arxiv.org/pdf/1901.05005.pdf), [Sánchez, Raveri, Alarcon, Bernstein 2020](https://arxiv.org/pdf/2004.09542.pdf), [Myles, Alarcon et al. 2021](https://arxiv.org/pdf/2012.08566.pdf) and [Campos, et al. 2024](https://arxiv.org/pdf/2408.00922). 
 
 
-The main product is the galaxy ensemble tomographic redshift distributions $n(z)$, which are output for a sample as a `qp` ensemble. The code additionally saves the two Self-Organizing Maps (SOMs) constructed for $n(z)$ inference and assignment indices of the input galaxy samples to their respective SOMs.
+The main product is the galaxy ensemble tomographic bin assignments and associated redshift distributions $n(z)$, which are output for a sample as a `qp` ensemble. The code additionally saves the two Self-Organizing Maps (SOMs) constructed for $n(z)$ inference and assignment indices of the input galaxy samples to their respective SOMs.
+
+The SOMPZ algorithm generates redshift distributions for a sample of galaxies with a multi-step inference formalism. Based on observations of a wide-field imaging dataset catalog and a deep-field imaging dataset catalog (traditionally lower-noise optical bands and additional near-infrared bands), the algorithm takes three primary tabular data inputs:
+
+- `spec_data`: a catalog with secure redshifts, deep-field photometry, and simulated wide-field photometry
+- `balrog_data` : a catalog with deep-field photometry and simulated wide-field photometry
+- `wide_data`: a catalog with wide-field photometry
+
+In practice, `spec_data` is a subset of `balrog_data`.
+
+These catalogs are used to train two SOMs: one built with deep-field photometry and the other built with wide-field photometry.
+
+Once all samples are assigned to the wide SOM and `spec_data` and `balrog_data` are assigned to the deep SOM the wide SOM cells can be grouped into tomographic bins via a tomographic binning algorithm. The redshift distributions are computed as follows:
+
+$$ n(z|\hat{b}, \hat{s}) = \sum_{\hat{c} \in \hat{b}} \sum_{c \in \hat{c}} p(z|c, \hat{s}) p(c|\hat{c}, \hat{s}) p(\hat{c}| \hat{s}) $$
 
 # RAIL: Redshift Assessment Infrastructure Layers
 

diff --git a/src/rail/pipelines/estimation/estimate_sompz.py b/src/rail/pipelines/estimation/estimate_sompz.py
@@ -34,9 +34,6 @@ def __init__(
     ):
         RailPipeline.__init__(self)
 
-        DS = RailStage.data_store
-        DS.__class__.allow_overwrite = True
-
         wide_catalog_class = CatalogConfigBase.get_class(
             wide_catalog_tag, catalog_module
         )

diff --git a/src/rail/pipelines/estimation/inform_sompz.py b/src/rail/pipelines/estimation/inform_sompz.py
@@ -52,9 +52,6 @@ def __init__(
             som_wrap=False,
         )
 
-        DS = RailStage.data_store
-        DS.__class__.allow_overwrite = True
-
         # 1: train the deep SOM
         self.som_informer_deep = SOMPZInformer.build(
             aliases=dict(input_data="input_deep_data"),

diff --git a/tests/test_algos.py b/tests/test_algos.py
@@ -20,9 +20,6 @@
 traindata = os.path.join(RAILDIR, 'rail/examples_data/testdata/training_100gal.hdf5')
 validdata = os.path.join(RAILDIR, 'rail/examples_data/testdata/validation_10gal.hdf5')
 
-DS = RailStage.data_store
-DS.__class__.allow_overwrite = True
-
 
 @pytest.mark.parametrize(
     "ntarray",

diff --git a/tests/test_stages.py b/tests/test_stages.py
@@ -1,5 +1,5 @@
 from rail.core.stage import RailStage
-from rail.core.data import Hdf5Handle
+from rail.core.data import Hdf5Handle, DataStore
 from rail.utils.catalog_utils import CatalogConfigBase
 
 from rail.estimation.algos.sompz import (
@@ -63,9 +63,7 @@
 def test_informer_deep(get_data):
     assert get_data == 0
 
-    DS = RailStage.data_store
-    DS.__class__.allow_overwrite = True
-    DS.clear()
+    DS = DataStore()
 
     som_informer_deep = SOMPZInformer.make_stage(
         name="test_informer_deep",
@@ -83,9 +81,7 @@ def test_informer_deep(get_data):
 def test_informer_wide(get_data):
     assert get_data == 0
 
-    DS = RailStage.data_store
-    DS.__class__.allow_overwrite = True
-    DS.clear()
+    DS = DataStore()
 
     som_informer_wide = SOMPZInformer.make_stage(
         name="test_informer_wide",
@@ -104,9 +100,7 @@ def test_deepdeep_estimator(get_data, get_intermediates):
     assert get_data == 0
     assert get_intermediates == 0
 
-    DS = RailStage.data_store
-    DS.__class__.allow_overwrite = True
-    DS.clear()
+    DS = DataStore()
 
     som_deepdeep_estimator = SOMPZEstimatorDeep.make_stage(
         name="test_deepdeep_estimator",
@@ -128,9 +122,7 @@ def test_deepwide_estimator(get_data, get_intermediates):
     assert get_data == 0
     assert get_intermediates == 0
 
-    DS = RailStage.data_store
-    DS.__class__.allow_overwrite = True
-    DS.clear()
+    DS = DataStore()
 
     som_deepwide_estimator = SOMPZEstimatorWide.make_stage(
         name="test_deepwide_estimator",
@@ -152,9 +144,7 @@ def test_pz_c(get_data, get_intermediates):
     assert get_data == 0
     assert get_intermediates == 0
 
-    DS = RailStage.data_store
-    DS.__class__.allow_overwrite = True
-    DS.clear()
+    DS = DataStore()
 
     som_pzc = SOMPZPzc.make_stage(
         name="test_pzc",
@@ -184,9 +174,7 @@ def test_pc_chat(get_intermediates):
 
     assert get_intermediates == 0
 
-    DS = RailStage.data_store
-    DS.__class__.allow_overwrite = True
-    DS.clear()
+    DS = DataStore()
 
     som_pcchat = SOMPZPc_chat.make_stage(
         name="test_pcchat",
@@ -211,9 +199,7 @@ def test_pz_chat(get_data, get_intermediates):
     assert get_data == 0
     assert get_intermediates == 0
 
-    DS = RailStage.data_store
-    DS.__class__.allow_overwrite = True
-    DS.clear()
+    DS = DataStore()
 
     som_pzchat = SOMPZPzchat.make_stage(
         name="test_pzchat",
@@ -258,9 +244,7 @@ def test_tomo_bin(get_data, get_intermediates):
     assert get_data == 0
     assert get_intermediates == 0
 
-    DS = RailStage.data_store
-    DS.__class__.allow_overwrite = True
-    DS.clear()
+    DS = DataStore()
 
     som_tomobin = SOMPZTomobin.make_stage(
         name="test_tomobin",
@@ -299,9 +283,7 @@ def test_nz(get_data, get_intermediates):
     assert get_data == 0
     assert get_intermediates == 0
 
-    DS = RailStage.data_store
-    DS.__class__.allow_overwrite = True
-    DS.clear()
+    DS = DataStore()
 
     som_nz = SOMPZnz.make_stage(
         name="test_nz",