From 44f51d37929a2f986ac8f26952a351f56cc3e3c3 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Sat, 6 Apr 2024 18:29:26 -0400
Subject: [PATCH 01/30] Model fitting works lazily

---
 src/cellcanvas/data/data_manager.py           | 39 +++++++++++--------
 .../semantic/_embedding_segmentor.py          |  2 +
 .../semantic/segmentation_manager.py          |  3 +-
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/src/cellcanvas/data/data_manager.py b/src/cellcanvas/data/data_manager.py
index ddd0cfa..3f99ac6 100644
--- a/src/cellcanvas/data/data_manager.py
+++ b/src/cellcanvas/data/data_manager.py
@@ -3,6 +3,7 @@
 import numpy as np
 from napari.utils.events.containers import SelectableEventedList
 from zarr import Array
+import dask.array as da
 
 from cellcanvas.data.data_set import DataSet
 
@@ -30,23 +31,27 @@ def get_training_data(self) -> Tuple[Array, Array]:
         features = []
         labels = []
         for dataset in self.datasets:
-            # get the features and labels
-            # todo make lazier
-            dataset_features = np.asarray(dataset.concatenated_features)
-            dataset_labels = np.asarray(dataset.labels)
-
-            # reshape the data
-            dataset_labels = dataset_labels.flatten()
-            reshaped_features = dataset_features.reshape(
-                -1, dataset_features.shape[-1]
-            )
-
-            # Filter features where labels are greater than 0
-            valid_labels = dataset_labels > 0
-            filtered_features = reshaped_features[valid_labels, :]
-            filtered_labels = dataset_labels[valid_labels] - 1  # Adjust labels
+            dataset_features = da.asarray(dataset.concatenated_features)
+            dataset_labels = da.asarray(dataset.labels)
+
+            # Flatten labels for boolean indexing
+            flattened_labels = dataset_labels.flatten()
+
+            # Compute valid_indices based on labels > 0
+            valid_indices = da.nonzero(flattened_labels > 0)[0].compute()
+
+            # Flatten only the spatial dimensions of the dataset_features while preserving the feature dimension
+            c, h, w, d = dataset_features.shape
+            reshaped_features = dataset_features.reshape(c, h * w * d)
+
+            # We need to apply valid_indices for each feature dimension separately
+            filtered_features_list = [da.take(reshaped_features[i, :], valid_indices, axis=0) for i in range(c)]
+            filtered_features = da.stack(filtered_features_list, axis=1)
+
+            # Adjust labels
+            filtered_labels = flattened_labels[valid_indices] - 1
 
             features.append(filtered_features)
             labels.append(filtered_labels)
-
-        return np.concatenate(features), np.concatenate(labels)
+            
+        return da.concatenate(features), da.concatenate(labels)
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index c00e37a..4883cfd 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -672,6 +672,8 @@ def update_class_distribution_charts(self):
     def compute_embedding_projection(self):
         # Filter out entries where the label is 0
         filtered_features, filtered_labels = self.data.get_training_data()
+        filtered_features = filtered_features.compute()
+        filtered_labels = filtered_labels.compute()
 
         # label values are offset by 1 for training,
         # undo the offset.
diff --git a/src/cellcanvas/semantic/segmentation_manager.py b/src/cellcanvas/semantic/segmentation_manager.py
index 87b47ac..dd248ec 100644
--- a/src/cellcanvas/semantic/segmentation_manager.py
+++ b/src/cellcanvas/semantic/segmentation_manager.py
@@ -23,8 +23,9 @@ def __init__(self, data: DataManager, model: SegmentationModel):
     def fit(self):
         """Fit using the model using the data in the data manager."""
         features, labels = self.data.get_training_data()
+        features_computed, labels_computed = features.compute(), labels.compute()
 
-        self.model.fit(features, labels)
+        self.model.fit(features_computed, labels_computed)
 
     def predict(self, feature_image: np.ndarray):
         """Predict using the trained model.

From c2ba1f359ca61c9a47fc34c4431d126a3fe7264b Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Sat, 6 Apr 2024 21:53:16 -0400
Subject: [PATCH 02/30] Prediction has worked with this

---
 .../semantic/_embedding_segmentor.py          | 66 +++++++++++++------
 .../semantic/segmentation_manager.py          | 10 ++-
 2 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 4883cfd..1b72b0e 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -7,6 +7,7 @@
 import matplotlib.pyplot as plt
 import napari
 import numpy as np
+import dask.array as da
 import toolz as tz
 import zarr
 from matplotlib.backends.backend_qt5agg import (
@@ -358,26 +359,49 @@ def update_model(self, model_type):
             raise ValueError(f"Unsupported model type: {model_type}")
 
     def predict(self):
-        # We shift labels + 1 because background is 0 and has special meaning
-        # prediction = (
-        #         future.predict_segmenter(
-        #             features.reshape(-1, features.shape[-1]), model
-        #         ).reshape(features.shape[:-1])
-        #         + 1
-        # )
-        prediction = (
-            self.segmentation_manager.predict(
-                np.asarray(self.data.datasets[0].concatenated_features)
-            )
-            + 1
-        )
-
-        # Compute stats in thread too
-        prediction_labels, prediction_counts = np.unique(
-            prediction, return_counts=True
-        )
-
-        return (prediction, prediction_labels, prediction_counts)
+        dataset_features = da.asarray(self.data.datasets[0].concatenated_features)
+        chunk_shape = dataset_features.chunksize
+        shape = dataset_features.shape
+        dtype = dataset_features.dtype
+        
+        # Placeholder for aggregated labels and counts
+        all_labels = []
+        all_counts = []
+        
+        # Iterate over chunks
+        for z in range(0, shape[1], chunk_shape[1]):
+            for y in range(0, shape[2], chunk_shape[2]):
+                for x in range(0, shape[3], chunk_shape[3]):
+                    # Compute the slice for the current chunk
+                    # in feature,z,y,x order
+                    chunk_slice = (
+                        slice(None),
+                        slice(z, min(z + chunk_shape[1], shape[1])),
+                        slice(y, min(y + chunk_shape[2], shape[2])),
+                        slice(x, min(x + chunk_shape[3], shape[3])),                        
+                    )
+                    print(f"Predicting on chunk {chunk_slice}")
+                    
+                    # Extract the current chunk
+                    chunk = dataset_features[chunk_slice].compute()
+                    
+                    # Predict on the chunk (adding 1 to each prediction)
+                    predicted_chunk = self.segmentation_manager.predict(chunk) + 1
+                    
+                    # Write the prediction to the corresponding region in the Zarr array
+                    self.prediction_data[chunk_slice[1:]] = predicted_chunk
+                    
+                    # Aggregate labels and counts
+                    labels, counts = np.unique(predicted_chunk, return_counts=True)
+                    all_labels.append(labels)
+                    all_labels.append(counts)
+
+        # Combine all_labels and all_counts
+        unique_labels, inverse = np.unique(np.concatenate(all_labels), return_inverse=True)
+        total_counts = np.bincount(inverse, weights=np.concatenate(all_counts))
+
+        # Now, self.prediction_data should contain the predicted labels
+        return self.prediction_data, unique_labels, total_counts
 
     @thread_worker
     def prediction_thread(self):
@@ -402,6 +426,8 @@ def start_prediction(self):
 
         # features = self.get_features()
 
+        # TODO use a yielded connect worker
+        
         self.prediction_worker = self.prediction_thread()
         self.prediction_worker.returned.connect(self.on_prediction_completed)
         self.prediction_worker.start()
diff --git a/src/cellcanvas/semantic/segmentation_manager.py b/src/cellcanvas/semantic/segmentation_manager.py
index dd248ec..fe9e706 100644
--- a/src/cellcanvas/semantic/segmentation_manager.py
+++ b/src/cellcanvas/semantic/segmentation_manager.py
@@ -1,6 +1,8 @@
 from typing import Protocol
 
 import numpy as np
+import dask.array as da
+from dask import delayed
 from sklearn.exceptions import NotFittedError
 
 from cellcanvas.data.data_manager import DataManager
@@ -27,7 +29,7 @@ def fit(self):
 
         self.model.fit(features_computed, labels_computed)
 
-    def predict(self, feature_image: np.ndarray):
+    def predict(self, feature_image):
         """Predict using the trained model.
 
         Parameters
@@ -40,7 +42,8 @@ def predict(self, feature_image: np.ndarray):
         predicted_labels : Array
             The prediction of class.
         """
-        features = feature_image.reshape((-1, feature_image.shape[-1]))
+        c, z, y, x = feature_image.shape
+        features = feature_image.transpose(1, 2, 3, 0).reshape(-1, c)
 
         try:
             predicted_labels = self.model.predict(features)
@@ -50,4 +53,5 @@ def predict(self, feature_image: np.ndarray):
                 "for example with the `fit_segmenter` function."
             ) from None
 
-        return predicted_labels.reshape(feature_image.shape[:-1])
+        return predicted_labels.reshape(feature_image.shape[1:])
+

From 71c4b713f773dc9215fb1a88b47134167e9d85b2 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Sun, 7 Apr 2024 09:36:59 -0400
Subject: [PATCH 03/30] Fix count aggregation

---
 src/cellcanvas/semantic/_embedding_segmentor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 1b72b0e..1ca2ba1 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -394,7 +394,7 @@ def predict(self):
                     # Aggregate labels and counts
                     labels, counts = np.unique(predicted_chunk, return_counts=True)
                     all_labels.append(labels)
-                    all_labels.append(counts)
+                    all_counts.append(counts)
 
         # Combine all_labels and all_counts
         unique_labels, inverse = np.unique(np.concatenate(all_labels), return_inverse=True)

From ad429ef5f754e5ce513efcf93454398b8e93e7b7 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 8 Apr 2024 17:16:28 -0400
Subject: [PATCH 04/30] Launch cellcanvas from copick explorer, legend matches

---
 examples/run_app_copick.py                    | 259 ++++++++++++++++++
 src/cellcanvas/_app/main_app.py               |   5 +
 .../semantic/_embedding_segmentor.py          |  15 +-
 .../semantic/segmentation_manager.py          |   3 +
 4 files changed, 281 insertions(+), 1 deletion(-)
 create mode 100644 examples/run_app_copick.py

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
new file mode 100644
index 0000000..da53b53
--- /dev/null
+++ b/examples/run_app_copick.py
@@ -0,0 +1,259 @@
+"""Example of using CellCanvas to pick particles on a surface.
+
+To use:
+1. update base_file_path to point to cropped_covid.zarr example file
+2. Run the script to launch CellCanvas
+3. Paint/predict until you're happy with the result. The seeded labels are:
+    - 1: background (including inside the capsules)
+    - 2: membrane
+    - 3: spike proteins
+3b. You might want to switch the image layer into the plane
+    depiction before doing the instance segmentation.
+    Sometimes I have trouble manipulating the plane after
+    the instance segmentation - need to look into this.
+4. Once you're happy with the prediction, click the "instance segmentation" tab
+5. Set the label value to 2. This will extract the membrane and
+    make instances via connected components.
+6. Remove the small objects. Suggested threshold: 100
+7. Alt + left mouse button to select an instance to modify.
+    Once select, you can dilate, erode, etc. to smooth it.
+8. With the segment still selected, you can then mesh it
+   using the mesh widget. You can play with the smoothing parameters.
+9. If the mesh looks good, switch to the "geometry" tab.
+    Select the mesh and start surfing!
+"""
+from collections import defaultdict
+import os
+import numpy as np
+import napari
+import cellcanvas
+from cellcanvas._app.main_app import CellCanvasApp, QtCellCanvas
+from cellcanvas.data.data_manager import DataManager
+from cellcanvas.data.data_set import DataSet
+
+import json
+import copick
+from copick.impl.filesystem import CopickRootFSSpec
+import zarr
+
+from qtpy.QtWidgets import QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget, QComboBox, QPushButton, QLabel
+from qtpy.QtCore import Qt
+import glob  # For pattern matching of file names
+
+
+# Project root
+root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/working/demo_project/copick_config_kyle.json")
+
+## Root API
+root.config # CopickConfig object
+root.runs # List of run objects (lazy loading from filesystem location(s))
+
+# TODO update to use root.config.pickable_objects
+
+
+def get_labels_colormap():
+    """Return a colormap for distinct label colors based on the pickable objects."""
+    colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects}
+    colormap[None] = np.array([1, 1, 1, 1])  # default is white
+    colormap[0] = np.array([0, 0, 0, 0])  # Add any special cases if needed
+    return colormap
+
+cellcanvas.utils.get_labels_colormap = get_labels_colormap
+
+# Use the function
+colormap = get_labels_colormap()
+
+# TODO set names from copick config
+# cell_canvas.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects}
+
+import napari
+from qtpy.QtWidgets import QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget
+from qtpy.QtCore import Qt
+
+class NapariCopickExplorer(QWidget):
+    def __init__(self, viewer: napari.Viewer, root):
+        super().__init__()
+        self.viewer = viewer
+        self.root = root
+        self.selected_run = None
+        self.cell_canvas_app = None
+
+        layout = QVBoxLayout()
+        self.setLayout(layout)
+
+        # Dropdowns for each data layer
+        self.dropdowns = {}
+        for layer in ["image", "features", "painting", "prediction"]:
+            layout.addWidget(QLabel(f"{layer.capitalize()} Path:"))
+            self.dropdowns[layer] = QComboBox()
+            layout.addWidget(self.dropdowns[layer])
+
+        # Button to update CellCanvas with the selected dataset
+        self.update_button = QPushButton("Initialize/Update CellCanvas")
+        self.update_button.clicked.connect(self.initialize_or_update_cell_canvas)
+        layout.addWidget(self.update_button)
+
+        self.tree = QTreeWidget()
+        self.tree.setHeaderLabel("Copick Runs")
+        self.tree.itemClicked.connect(self.on_run_clicked)
+        layout.addWidget(self.tree)
+
+        self.populate_tree()
+
+    def populate_tree(self):
+        for run in self.root.runs:
+            run_item = QTreeWidgetItem(self.tree, [run.name])
+            run_item.setData(0, Qt.UserRole, run)
+
+            for category in ["segmentations", "meshes", "picks", "voxel_spacings"]:
+                category_item = QTreeWidgetItem(run_item, [category])
+                items = getattr(run, category)
+                for item in items:
+                    if category == "picks":
+                        item_name = item.pickable_object_name
+                    else:
+                        item_name = getattr(item, 'name', 'Unnamed')
+
+                    child_item = QTreeWidgetItem(category_item, [item_name])
+                    child_item.setData(0, Qt.UserRole, item)
+
+                    # list tomograms
+                    if category == "voxel_spacings":
+                        for tomogram in item.tomograms:
+                            tomo_item = QTreeWidgetItem(child_item, [f"Tomogram: {tomogram.tomo_type}"])
+                            tomo_item.setData(0, Qt.UserRole, tomogram)
+
+    def on_run_clicked(self, item, column):
+        data = item.data(0, Qt.UserRole)
+        if not isinstance(data, copick.impl.filesystem.CopickRunFSSpec):
+            self.on_item_clicked(item, column)
+            return
+
+        self.selected_run = data
+        static_path = self.selected_run.static_path
+
+        # Clear existing items
+        for dropdown in self.dropdowns.values():
+            dropdown.clear()
+
+        # Find VoxelSpacing directories
+        voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing*"))
+
+        for voxel_spacing_dir in voxel_spacing_dirs:
+            # Find all Zarr datasets within the voxel spacing directory
+            zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*_features.zarr"))
+
+            for dataset_path in zarr_datasets:
+                # Check for the existence of 'embedding' directory within each features zarr
+                embedding_path = os.path.join(dataset_path, "*", "embedding")
+                embedding_dirs = glob.glob(embedding_path)
+
+                for embedding_dir in embedding_dirs:
+                    # Assuming 'embedding' is the desired path for features
+                    self.dropdowns["features"].addItem(embedding_dir)
+
+            zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
+            for dataset_path in zarr_datasets:
+                # For image paths, directly add non-features zarr datasets to the image dropdown
+                if not "features" in os.path.basename(dataset_path).lower():
+                    self.dropdowns["image"].addItem(dataset_path)
+
+
+        # Set defaults for painting and prediction layers, assuming they follow a fixed naming convention
+        # and are expected to be located in a specific VoxelSpacing directory, adjusting as necessary
+        if voxel_spacing_dirs:  # Check if at least one VoxelSpacing directory was found
+            base_voxel_dir = voxel_spacing_dirs[0]  # Assuming to use the first found directory for default paths
+            self.dropdowns["painting"].addItem(os.path.join(base_voxel_dir, "painting.zarr"))
+            self.dropdowns["prediction"].addItem(os.path.join(base_voxel_dir, "prediction.zarr"))
+        else:
+            print("No Voxel Spacing directories found. Please check the directory structure.")
+
+                                    
+    def on_item_clicked(self, item, column):
+        data = item.data(0, Qt.UserRole)
+        if data:
+            if isinstance(data, copick.impl.filesystem.CopickPicksFSSpec):
+                self.open_picks(data)
+            elif isinstance(data, copick.impl.filesystem.CopickTomogramFSSpec):
+                self.open_tomogram(data)
+
+    def open_picks(self, picks):
+        with open(picks.path, 'r') as f:
+            points_data = json.load(f)
+
+        # Extracting points locations
+        points_locations = [
+            [point['location']['z'], point['location']['y'], point['location']['x']]
+            for point in points_data['points']
+        ]
+
+        points_array = np.array(points_locations)
+        
+        # Adding the points layer to the viewer, using the pickable_object_name as the layer name
+        pickable_object = [obj for obj in root.config.pickable_objects if obj.name == picks.pickable_object_name][0]
+        self.viewer.add_points(points_array, name=picks.pickable_object_name, size=100, out_of_slice_display=True, face_color=np.array(pickable_object.color)/255.0)
+
+    def open_tomogram(self, tomogram):
+        zarr_store = zarr.open(tomogram.zarr(), mode='r')
+        # TODO extract scale/transform info
+
+        # TODO scale is hard coded to 10 here
+        self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.tomo_type}", scale=10)
+
+    def initialize_or_update_cell_canvas(self):
+        # Collect paths from dropdowns
+        paths = {layer: dropdown.currentText() for layer, dropdown in self.dropdowns.items()}
+
+        if not all(paths.values()):
+            print("Please ensure all paths are selected before initializing/updating CellCanvas.")
+            return
+
+        dataset = DataSet.from_paths(
+            image_path=f"{paths['image']}/0",
+            features_path=paths["features"],
+            labels_path=paths["painting"],
+            segmentation_path=paths["prediction"],
+            make_missing_datasets=True,
+        )
+
+        data_manager = DataManager(datasets=[dataset])
+        
+        if not self.cell_canvas_app:
+            self.cell_canvas_app = CellCanvasApp(data=data_manager, viewer=self.viewer, verbose=True)
+            cell_canvas_widget = QtCellCanvas(app=self.cell_canvas_app)
+            self.viewer.window.add_dock_widget(cell_canvas_widget)
+        else:
+            # Update existing CellCanvasApp's data manager
+            self.cell_canvas_app.update_data_manager(data_manager)
+
+        # TODO this has multiple copick specific hardcoded hacks
+            
+        # TODO hardcoded scale factor
+        self.viewer.layers['Image'].scale = (10, 10, 10)
+
+        # Set colormap
+        # painting_layer.colormap.color_dict
+        #  self.app.painting_labels
+        colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects}
+        colormap[None] = np.array([1, 1, 1, 1])
+        colormap[9] = np.array([0, 1, 1, 1])
+        self.cell_canvas_app.semantic_segmentor.painting_layer.colormap.color_dict = colormap
+        self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in root.config.pickable_objects] + [9]
+        self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects}
+
+        self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping[9] = 'background'
+        self.cell_canvas_app.semantic_segmentor.widget.setupLegend()
+
+viewer = napari.Viewer()
+copick_explorer_widget = NapariCopickExplorer(viewer, root)
+viewer.window.add_dock_widget(copick_explorer_widget, name="Copick Explorer", area="left")
+
+
+# napari.run()
+
+# TODO finish making the prediction computation more lazy
+# the strategy should be to start computing labels chunkwise
+# on the zarr itself
+
+# TODO check scaling between picks and zarrs
+
diff --git a/src/cellcanvas/_app/main_app.py b/src/cellcanvas/_app/main_app.py
index d814a87..b7aedc2 100644
--- a/src/cellcanvas/_app/main_app.py
+++ b/src/cellcanvas/_app/main_app.py
@@ -30,6 +30,11 @@ def __init__(
             extra_logging=self.verbose,
         )
 
+
+    def update_data_manager(self, data: DataManager):
+        self.data = data
+        self.semantic_segmentor.update_data_manager(data)
+        
     @property
     def mode(self) -> AppMode:
         return self._mode
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 1ca2ba1..c7949f3 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -91,7 +91,7 @@ def __init__(
         #     self.logger.info(f"zarr_path: {zarr_path}")
 
         self._add_threading_workers()
-        self._init_viewer_layers()
+        self.update_data_manager(self.data)
         self._add_widget()
         self.model = None
 
@@ -99,6 +99,19 @@ def __init__(
         self.start_computing_embedding_plot()
         self.update_class_distribution_charts()
 
+    def update_data_manager(self, data: DataManager):
+        self.data = data
+        self.segmentation_manager.update_data_manager(data)
+
+        # get the image and features
+        # todo this is temporarily assuming a single dataset
+        # need to generalize
+        self.image_data = self.data.datasets[0].image
+        self.features = self.data.datasets[0].features
+
+        # TODO remove old layers
+        self._init_viewer_layers()
+
     def reshape_features(self, arr):
         return arr.reshape(-1, arr.shape[-1])
 
diff --git a/src/cellcanvas/semantic/segmentation_manager.py b/src/cellcanvas/semantic/segmentation_manager.py
index fe9e706..3d246ec 100644
--- a/src/cellcanvas/semantic/segmentation_manager.py
+++ b/src/cellcanvas/semantic/segmentation_manager.py
@@ -22,6 +22,9 @@ def __init__(self, data: DataManager, model: SegmentationModel):
         self.data = data
         self.model = model
 
+    def update_data_manager(self, data: DataManager):
+        self.data = data
+        
     def fit(self):
         """Fit using the model using the data in the data manager."""
         features, labels = self.data.get_training_data()

From 857ee01f7bb2b43b21ad06cfd87120555f59aa7b Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 8 Apr 2024 17:33:11 -0400
Subject: [PATCH 05/30] Update painting and prediction colormaps

---
 examples/run_app_copick.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index da53b53..1ff3fbc 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -51,17 +51,17 @@
 # TODO update to use root.config.pickable_objects
 
 
-def get_labels_colormap():
+def get_copick_colormap():
     """Return a colormap for distinct label colors based on the pickable objects."""
     colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects}
-    colormap[None] = np.array([1, 1, 1, 1])  # default is white
-    colormap[0] = np.array([0, 0, 0, 0])  # Add any special cases if needed
+    colormap[None] = np.array([1, 1, 1, 1])
+    colormap[9] = np.array([0, 1, 1, 1])
     return colormap
 
-cellcanvas.utils.get_labels_colormap = get_labels_colormap
+cellcanvas.utils.get_labels_colormap = get_copick_colormap
 
 # Use the function
-colormap = get_labels_colormap()
+colormap = get_copick_colormap()
 
 # TODO set names from copick config
 # cell_canvas.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects}
@@ -234,10 +234,9 @@ def initialize_or_update_cell_canvas(self):
         # Set colormap
         # painting_layer.colormap.color_dict
         #  self.app.painting_labels
-        colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects}
-        colormap[None] = np.array([1, 1, 1, 1])
-        colormap[9] = np.array([0, 1, 1, 1])
+        colormap = get_copick_colormap()
         self.cell_canvas_app.semantic_segmentor.painting_layer.colormap.color_dict = colormap
+        self.cell_canvas_app.semantic_segmentor.prediction_layer.colormap.color_dict = colormap
         self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in root.config.pickable_objects] + [9]
         self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects}
 
@@ -257,3 +256,5 @@ def initialize_or_update_cell_canvas(self):
 
 # TODO check scaling between picks and zarrs
 
+# TODO check why painting doesn't work
+# check if it is related to scaling

From 5e1208690081e80cb7518dffee58a88b37d48328 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 8 Apr 2024 17:40:02 -0400
Subject: [PATCH 06/30] Move hard coded scaling to point data, resolves
 painting issue

---
 examples/run_app_copick.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index 1ff3fbc..65ccd59 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -187,18 +187,19 @@ def open_picks(self, picks):
             for point in points_data['points']
         ]
 
-        points_array = np.array(points_locations)
+        # TODO hard coded scaling
+        points_array = np.array(points_locations) / 10
         
         # Adding the points layer to the viewer, using the pickable_object_name as the layer name
         pickable_object = [obj for obj in root.config.pickable_objects if obj.name == picks.pickable_object_name][0]
-        self.viewer.add_points(points_array, name=picks.pickable_object_name, size=100, out_of_slice_display=True, face_color=np.array(pickable_object.color)/255.0)
+        self.viewer.add_points(points_array, name=picks.pickable_object_name, size=25, out_of_slice_display=True, face_color=np.array(pickable_object.color)/255.0)
 
     def open_tomogram(self, tomogram):
         zarr_store = zarr.open(tomogram.zarr(), mode='r')
         # TODO extract scale/transform info
 
         # TODO scale is hard coded to 10 here
-        self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.tomo_type}", scale=10)
+        self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.tomo_type}")
 
     def initialize_or_update_cell_canvas(self):
         # Collect paths from dropdowns
@@ -229,7 +230,7 @@ def initialize_or_update_cell_canvas(self):
         # TODO this has multiple copick specific hardcoded hacks
             
         # TODO hardcoded scale factor
-        self.viewer.layers['Image'].scale = (10, 10, 10)
+        # self.viewer.layers['Image'].scale = (10, 10, 10)
 
         # Set colormap
         # painting_layer.colormap.color_dict
@@ -256,5 +257,6 @@ def initialize_or_update_cell_canvas(self):
 
 # TODO check scaling between picks and zarrs
 
-# TODO check why painting doesn't work
-# check if it is related to scaling
+# TODO check why painting doesn't work when using proper scaling
+
+# TODO add proper colormap support

From 0076c51758fd575666265915ad0134425696b6a5 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Tue, 9 Apr 2024 07:47:30 -0400
Subject: [PATCH 07/30] Kludge to get colormaps working

---
 examples/run_app_copick.py                    |  9 ++++----
 .../semantic/_embedding_segmentor.py          | 23 ++++++++++++++-----
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index 65ccd59..7246bd4 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -235,9 +235,7 @@ def initialize_or_update_cell_canvas(self):
         # Set colormap
         # painting_layer.colormap.color_dict
         #  self.app.painting_labels
-        colormap = get_copick_colormap()
-        self.cell_canvas_app.semantic_segmentor.painting_layer.colormap.color_dict = colormap
-        self.cell_canvas_app.semantic_segmentor.prediction_layer.colormap.color_dict = colormap
+        self.cell_canvas_app.semantic_segmentor.set_colormap(get_copick_colormap())
         self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in root.config.pickable_objects] + [9]
         self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects}
 
@@ -259,4 +257,7 @@ def initialize_or_update_cell_canvas(self):
 
 # TODO check why painting doesn't work when using proper scaling
 
-# TODO add proper colormap support
+# TODO add proper colormap and legend support
+# - override exclusion of non-zero labels
+# - consistent colormap in the charts
+# - consistent colormap in the painted part of the labels image
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index c7949f3..a924a85 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -60,6 +60,7 @@ def __init__(
 
         self.extra_logging = extra_logging
         self.data = data_manager
+        self.colormap = get_labels_colormap()
         clf = RandomForestClassifier(
             n_estimators=50,
             n_jobs=-1,
@@ -99,6 +100,12 @@ def __init__(
         self.start_computing_embedding_plot()
         self.update_class_distribution_charts()
 
+    def set_colormap(self, colormap):
+        self.colormap = colormap
+        
+        self.prediction_layer.colormap = DirectLabelColormap(color_dict=colormap)
+        self.painting_layer.colormap = DirectLabelColormap(color_dict=colormap)        
+
     def update_data_manager(self, data: DataManager):
         self.data = data
         self.segmentation_manager.update_data_manager(data)
@@ -139,7 +146,7 @@ def _init_viewer_layers(self):
             name="Prediction",
             scale=self.data_layer.scale,
             opacity=0.1,
-            colormap=DirectLabelColormap(color_dict=get_labels_colormap()),
+            colormap=DirectLabelColormap(color_dict=self.colormap),
         )
 
         # self.painting_data = zarr.open(
@@ -154,7 +161,7 @@ def _init_viewer_layers(self):
             self.painting_data,
             name="Painting",
             scale=self.data_layer.scale,
-            colormap=DirectLabelColormap(color_dict=get_labels_colormap()),
+            colormap=DirectLabelColormap(color_dict=self.colormap),
         )
 
         # Set up painting logging
@@ -223,7 +230,9 @@ def on_data_change(self, event, app):
         self.corner_pixels = self.viewer.layers["Image"].corner_pixels
 
         # TODO check if this is stalling things
-        self.painting_labels, self.painting_counts = np.unique(
+        # TODO recheck this after copick
+        # self.painting_labels, self.painting_counts = np.unique(
+        _, self.painting_counts = np.unique(
             self.painting_data[:], return_counts=True
         )
 
@@ -252,7 +261,9 @@ def threaded_on_data_change(
         self.logger.info(f"Labels data has changed! {event}")  # noqa: G004
 
         # Update stats
-        self.painting_labels, self.painting_counts = np.unique(
+        # TODO check after copick
+        # self.painting_labels, self.painting_counts = np.unique(
+        _, self.painting_counts = np.unique(
             self.painting_data[:], return_counts=True
         )
 
@@ -586,7 +597,7 @@ def update_class_distribution_charts(self):
         # Example class to color mapping
         class_color_mapping = {
             label: f"#{int(rgba[0] * 255):02x}{int(rgba[1] * 255):02x}{int(rgba[2] * 255):02x}"
-            for label, rgba in get_labels_colormap().items()
+            for label, rgba in self.colormap.items()
         }
 
         self.widget.figure.clear()
@@ -774,7 +785,7 @@ def create_embedding_plot(self, result):
             label: "#{:02x}{:02x}{:02x}".format(
                 int(rgba[0] * 255), int(rgba[1] * 255), int(rgba[2] * 255)
             )
-            for label, rgba in get_labels_colormap().items()
+            for label, rgba in self.colormap.items()
         }
 
         # Convert filtered_labels to a list of colors for each point

From 6f0a0185da7572f07620b6aed541219201a76bbc Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Fri, 12 Apr 2024 15:38:46 -0400
Subject: [PATCH 08/30] Add support for toggling layers

---
 examples/run_app_copick.py                    | 90 ++++++++++++-------
 src/cellcanvas/data/data_manager.py           |  1 -
 .../semantic/_embedding_segmentor.py          |  5 +-
 3 files changed, 63 insertions(+), 33 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index 7246bd4..1a4b2b4 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -42,7 +42,7 @@
 
 
 # Project root
-root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/working/demo_project/copick_config_kyle.json")
+root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json")
 
 ## Root API
 root.config # CopickConfig object
@@ -83,8 +83,14 @@ def __init__(self, viewer: napari.Viewer, root):
 
         # Dropdowns for each data layer
         self.dropdowns = {}
+        self.layer_buttons = {}
         for layer in ["image", "features", "painting", "prediction"]:
-            layout.addWidget(QLabel(f"{layer.capitalize()} Path:"))
+            # Make layer button
+            button = QPushButton(f"Select {layer.capitalize()} Layer")
+            button.clicked.connect(lambda checked, layer=layer: self.activate_layer(layer))
+            layout.addWidget(button)
+            self.layer_buttons[layer] = button
+            # Make layer selection dropdown
             self.dropdowns[layer] = QComboBox()
             layout.addWidget(self.dropdowns[layer])
 
@@ -123,6 +129,20 @@ def populate_tree(self):
                             tomo_item = QTreeWidgetItem(child_item, [f"Tomogram: {tomogram.tomo_type}"])
                             tomo_item.setData(0, Qt.UserRole, tomogram)
 
+    def activate_layer(self, layer):
+        print(f"Activating layer {layer}")
+        if layer == "image":
+            layer = self.cell_canvas_app.semantic_segmentor.data_layer
+        elif layer == "painting":
+            layer = self.cell_canvas_app.semantic_segmentor.painting_layer
+        elif layer == "prediction":
+            layer = self.cell_canvas_app.semantic_segmentor.prediction_layer
+        else:
+            return
+        layer.visible = True
+        layer.editable = True
+        self.viewer.layers.selection.active = layer
+
     def on_run_clicked(self, item, column):
         data = item.data(0, Qt.UserRole)
         if not isinstance(data, copick.impl.filesystem.CopickRunFSSpec):
@@ -137,36 +157,35 @@ def on_run_clicked(self, item, column):
             dropdown.clear()
 
         # Find VoxelSpacing directories
-        voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing*"))
+        # TODO hardcoded to match spacing = 10
+        voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*"))
+
+        if not voxel_spacing_dirs:  # Check if at least one VoxelSpacing directory was found
+            print(f"No Voxel Spacing directories found in {static_path}. Please check the directory structure.")
+            return
 
+        self.voxel_spacing_dir = voxel_spacing_dirs[0]        
+        
         for voxel_spacing_dir in voxel_spacing_dirs:
             # Find all Zarr datasets within the voxel spacing directory
-            zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*_features.zarr"))
-
-            for dataset_path in zarr_datasets:
-                # Check for the existence of 'embedding' directory within each features zarr
-                embedding_path = os.path.join(dataset_path, "*", "embedding")
-                embedding_dirs = glob.glob(embedding_path)
-
-                for embedding_dir in embedding_dirs:
-                    # Assuming 'embedding' is the desired path for features
-                    self.dropdowns["features"].addItem(embedding_dir)
-
             zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
+            
+            # Filtering the paths for each dropdown category
             for dataset_path in zarr_datasets:
-                # For image paths, directly add non-features zarr datasets to the image dropdown
-                if not "features" in os.path.basename(dataset_path).lower():
-                    self.dropdowns["image"].addItem(dataset_path)
+                dataset_name = os.path.basename(dataset_path)
+                if "_features.zarr" in dataset_name.lower():
+                    self.dropdowns["features"].addItem(dataset_name, dataset_path)
+                elif "painting.zarr" in dataset_name.lower():
+                    self.dropdowns["painting"].addItem(dataset_name, dataset_path)
+                elif "prediction.zarr" in dataset_name.lower():
+                    self.dropdowns["prediction"].addItem(dataset_name, dataset_path)
+                else:
+                    # This is for the image dropdown, excluding features, painting, and prediction zarr files
+                    self.dropdowns["image"].addItem(dataset_name, dataset_path)
 
 
         # Set defaults for painting and prediction layers, assuming they follow a fixed naming convention
         # and are expected to be located in a specific VoxelSpacing directory, adjusting as necessary
-        if voxel_spacing_dirs:  # Check if at least one VoxelSpacing directory was found
-            base_voxel_dir = voxel_spacing_dirs[0]  # Assuming to use the first found directory for default paths
-            self.dropdowns["painting"].addItem(os.path.join(base_voxel_dir, "painting.zarr"))
-            self.dropdowns["prediction"].addItem(os.path.join(base_voxel_dir, "prediction.zarr"))
-        else:
-            print("No Voxel Spacing directories found. Please check the directory structure.")
 
                                     
     def on_item_clicked(self, item, column):
@@ -204,16 +223,19 @@ def open_tomogram(self, tomogram):
     def initialize_or_update_cell_canvas(self):
         # Collect paths from dropdowns
         paths = {layer: dropdown.currentText() for layer, dropdown in self.dropdowns.items()}
+        
+        if not paths["image"] or not paths["features"]:
+            print("Please ensure image and feature paths are selected before initializing/updating CellCanvas.")
+            return        
 
-        if not all(paths.values()):
-            print("Please ensure all paths are selected before initializing/updating CellCanvas.")
-            return
-
+        default_painting_path = os.path.join(self.voxel_spacing_dir, "painting_001.zarr")
+        default_prediction_path = os.path.join(self.voxel_spacing_dir, "prediction_001.zarr")
+        
         dataset = DataSet.from_paths(
-            image_path=f"{paths['image']}/0",
-            features_path=paths["features"],
-            labels_path=paths["painting"],
-            segmentation_path=paths["prediction"],
+            image_path=os.path.join(self.voxel_spacing_dir, f"{paths['image']}/0"),
+            features_path=os.path.join(self.voxel_spacing_dir, paths["features"]),
+            labels_path=default_painting_path if not paths["painting"] else os.path.join(self.voxel_spacing_dir, paths["painting"]),
+            segmentation_path=default_prediction_path if not paths["prediction"] else os.path.join(self.voxel_spacing_dir, paths["prediction"]),
             make_missing_datasets=True,
         )
 
@@ -243,6 +265,11 @@ def initialize_or_update_cell_canvas(self):
         self.cell_canvas_app.semantic_segmentor.widget.setupLegend()
 
 viewer = napari.Viewer()
+
+# Hide layer list and controls
+# viewer.window.qt_viewer.dockLayerList.setVisible(False)
+# viewer.window.qt_viewer.dockLayerControls.setVisible(False)
+
 copick_explorer_widget = NapariCopickExplorer(viewer, root)
 viewer.window.add_dock_widget(copick_explorer_widget, name="Copick Explorer", area="left")
 
@@ -261,3 +288,4 @@ def initialize_or_update_cell_canvas(self):
 # - override exclusion of non-zero labels
 # - consistent colormap in the charts
 # - consistent colormap in the painted part of the labels image
+
diff --git a/src/cellcanvas/data/data_manager.py b/src/cellcanvas/data/data_manager.py
index 3f99ac6..44e4f9f 100644
--- a/src/cellcanvas/data/data_manager.py
+++ b/src/cellcanvas/data/data_manager.py
@@ -33,7 +33,6 @@ def get_training_data(self) -> Tuple[Array, Array]:
         for dataset in self.datasets:
             dataset_features = da.asarray(dataset.concatenated_features)
             dataset_labels = da.asarray(dataset.labels)
-
             # Flatten labels for boolean indexing
             flattened_labels = dataset_labels.flatten()
 
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index a924a85..8325a0c 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -104,7 +104,8 @@ def set_colormap(self, colormap):
         self.colormap = colormap
         
         self.prediction_layer.colormap = DirectLabelColormap(color_dict=colormap)
-        self.painting_layer.colormap = DirectLabelColormap(color_dict=colormap)        
+        self.painting_layer.colormap = DirectLabelColormap(color_dict=colormap)
+        self.update_class_distribution_charts()
 
     def update_data_manager(self, data: DataManager):
         self.data = data
@@ -117,6 +118,7 @@ def update_data_manager(self, data: DataManager):
         self.features = self.data.datasets[0].features
 
         # TODO remove old layers
+        self.viewer.layers.clear()
         self._init_viewer_layers()
 
     def reshape_features(self, arr):
@@ -133,6 +135,7 @@ def _init_viewer_layers(self):
         self.data_layer = self.viewer.add_image(
             self.image_data, name="Image", projection_mode="mean"
         )
+        self.data_layer._keep_auto_contrast = True
         # self.prediction_data = zarr.open(
         #     f"{self.zarr_path}/prediction",
         #     mode="a",

From d143192c855b94aac4588420543601a75f9d1b27 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Fri, 12 Apr 2024 17:10:49 -0400
Subject: [PATCH 09/30] Train and predict on all tomograms in a copick project

---
 examples/run_app_copick.py                    | 133 ++++++++++++++++++
 .../semantic/_embedding_segmentor.py          |   1 +
 2 files changed, 134 insertions(+)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index 1a4b2b4..d06366a 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -40,6 +40,13 @@
 from qtpy.QtCore import Qt
 import glob  # For pattern matching of file names
 
+from sklearn.ensemble import RandomForestClassifier
+
+from cellcanvas.semantic.segmentation_manager import (
+    SemanticSegmentationManager,
+)
+
+import dask.array as da
 
 # Project root
 root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json")
@@ -81,6 +88,15 @@ def __init__(self, viewer: napari.Viewer, root):
         layout = QVBoxLayout()
         self.setLayout(layout)
 
+        # Adding new buttons for "Fit on all" and "Predict for all"
+        self.fit_all_button = QPushButton("Fit on all")
+        self.fit_all_button.clicked.connect(self.fit_on_all)
+        layout.addWidget(self.fit_all_button)
+
+        self.predict_all_button = QPushButton("Predict for all")
+        self.predict_all_button.clicked.connect(self.predict_for_all)
+        layout.addWidget(self.predict_all_button)
+        
         # Dropdowns for each data layer
         self.dropdowns = {}
         self.layer_buttons = {}
@@ -143,6 +159,123 @@ def activate_layer(self, layer):
         layer.editable = True
         self.viewer.layers.selection.active = layer
 
+    def get_complete_data_manager(self):
+        datasets = []
+        for run in self.root.runs:
+            static_path = run.static_path
+            # Assume there is a method to get the default voxel spacing directory for each run
+            voxel_spacing_dir = self.get_default_voxel_spacing_directory(static_path)
+
+            if not voxel_spacing_dir:
+                print(f"No Voxel Spacing directory found for run {run.name}.")
+                continue
+
+            # Get all Zarr datasets within the voxel spacing directory
+            zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
+
+            # Initialize paths
+            image_path = None
+            features_path = None
+            painting_path = os.path.join(voxel_spacing_dir, "painting_001.zarr")
+            prediction_path = os.path.join(voxel_spacing_dir, "prediction_001.zarr")
+            
+            # Assign paths based on dataset names
+            for dataset_path in zarr_datasets:
+                dataset_name = os.path.basename(dataset_path)
+                if "_features.zarr" in dataset_name.lower():
+                    features_path = dataset_path
+                elif "painting" in dataset_name.lower():
+                    painting_path = dataset_path
+                elif "prediction" in dataset_name.lower():
+                    prediction_path = dataset_path
+                else:
+                    image_path = dataset_path
+
+            # Assume each dataset should be loaded with a specific method that may also handle missing datasets
+            if image_path and features_path:
+                # TODO remove hack for highest resolution
+                dataset = DataSet.from_paths(
+                    image_path=os.path.join(image_path, "0"),
+                    features_path=features_path,
+                    labels_path=painting_path,
+                    segmentation_path=prediction_path,
+                    make_missing_datasets=True
+                )
+                datasets.append(dataset)
+
+        # Create a new data manager with all datasets
+        return DataManager(datasets=datasets)
+
+    def get_default_voxel_spacing_directory(self, static_path):
+        # Find VoxelSpacing directories, assuming a hard coded match for now
+        voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*"))
+        if voxel_spacing_dirs:
+            return voxel_spacing_dirs[0]
+        return None
+
+    def fit_on_all(self):
+        print("Fitting all models to the selected dataset.")
+
+        data_manager = self.get_complete_data_manager()
+
+        clf = RandomForestClassifier(
+            n_estimators=50,
+            n_jobs=-1,
+            max_depth=10,
+            max_samples=0.05,
+        )
+        
+        segmentation_manager = SemanticSegmentationManager(
+            data=data_manager, model=clf
+        )
+        segmentation_manager.fit()
+
+        # TODO this is bad
+        self.cell_canvas_app.semantic_segmentor.segmentation_manager = segmentation_manager        
+        
+    def predict_for_all(self):
+        print("Running predictions on all datasets.")
+
+        # Check if segmentation manager is properly initialized
+        if not hasattr(self.cell_canvas_app.semantic_segmentor, 'segmentation_manager') or self.cell_canvas_app.semantic_segmentor.segmentation_manager is None:
+            print("Segmentation manager is not initialized.")
+            return
+
+        # Retrieve the complete data manager that includes all runs
+        data_manager = self.get_complete_data_manager()
+
+        # Iterate through each dataset within the data manager
+        for dataset in data_manager.datasets:
+            dataset_features = da.asarray(dataset.concatenated_features)
+            chunk_shape = dataset_features.chunksize
+            shape = dataset_features.shape
+            dtype = dataset_features.dtype
+
+            # Iterate over chunks
+            for z in range(0, shape[1], chunk_shape[1]):
+                for y in range(0, shape[2], chunk_shape[2]):
+                    for x in range(0, shape[3], chunk_shape[3]):
+                        # Compute the slice for the current chunk
+                        # in feature,z,y,x order
+                        chunk_slice = (
+                            slice(None),
+                            slice(z, min(z + chunk_shape[1], shape[1])),
+                            slice(y, min(y + chunk_shape[2], shape[2])),
+                            slice(x, min(x + chunk_shape[3], shape[3])),                        
+                        )
+                        print(f"Predicting on chunk {chunk_slice}")
+
+                        # Extract the current chunk
+                        chunk = dataset_features[chunk_slice].compute()
+
+                        # Predict on the chunk (adding 1 to each prediction)
+                        predicted_chunk = self.cell_canvas_app.semantic_segmentor.segmentation_manager.predict(chunk) + 1
+
+                        # Write the prediction to the corresponding region in the Zarr array
+                        dataset.segmentation[chunk_slice[1:]] = predicted_chunk
+
+            print(f"Predictions written")
+
     def on_run_clicked(self, item, column):
         data = item.data(0, Qt.UserRole)
         if not isinstance(data, copick.impl.filesystem.CopickRunFSSpec):
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 8325a0c..09733e2 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -136,6 +136,7 @@ def _init_viewer_layers(self):
             self.image_data, name="Image", projection_mode="mean"
         )
         self.data_layer._keep_auto_contrast = True
+        self.data_layer.refresh()
         # self.prediction_data = zarr.open(
         #     f"{self.zarr_path}/prediction",
         #     mode="a",

From 723c53acf657f09d449deed9df54d87c88c1d75f Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 15 Apr 2024 12:20:26 -0400
Subject: [PATCH 10/30] Activate label when clicked on in the legend

---
 examples/run_app_copick.py                    |  3 ++-
 .../semantic/_embedding_segmentor.py          | 20 ++++++++++++++++---
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index d06366a..4a24b0f 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -363,7 +363,8 @@ def initialize_or_update_cell_canvas(self):
 
         default_painting_path = os.path.join(self.voxel_spacing_dir, "painting_001.zarr")
         default_prediction_path = os.path.join(self.voxel_spacing_dir, "prediction_001.zarr")
-        
+
+        # TODO note this is hard coded to use the highest resolution of a multiscale zarr
         dataset = DataSet.from_paths(
             image_path=os.path.join(self.voxel_spacing_dir, f"{paths['image']}/0"),
             features_path=os.path.join(self.voxel_spacing_dir, paths["features"]),
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 09733e2..a3b8bed 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -19,7 +19,7 @@
 from napari.qt.threading import thread_worker
 from napari.utils import DirectLabelColormap
 from psygnal import debounced
-from qtpy.QtCore import Qt
+from qtpy.QtCore import Qt, Signal
 from qtpy.QtGui import QColor, QPainter, QPixmap
 from qtpy.QtWidgets import (
     QCheckBox,
@@ -903,6 +903,15 @@ def paint_thread(self, lasso_path, target_label):
 
         # print(f"Painted {np.sum(contained)} pixels with label {target_label}")
 
+class ClickableLabel(QLabel):
+    clicked = Signal(int)  # Emits the label ID
+
+    def __init__(self, label_id, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.label_id = label_id
+
+    def mousePressEvent(self, event):
+        self.clicked.emit(self.label_id)        
 
 class EmbeddingPaintingWidget(QWidget):
     def __init__(self, app, parent=None):
@@ -1096,7 +1105,7 @@ def setupLegend(self):
                 color = painting_layer.colormap.color_dict[label_id]
 
                 # Create a QLabel for color swatch
-                color_swatch = QLabel()
+                color_swatch = ClickableLabel(label_id)
                 pixmap = QPixmap(16, 16)
 
                 if color is None:
@@ -1105,6 +1114,7 @@ def setupLegend(self):
                     pixmap.fill(QColor(*[int(c * 255) for c in color]))
 
                 color_swatch.setPixmap(pixmap)
+                color_swatch.clicked.connect(self.activateLabel)
 
                 # Update the mapping with new classes or use the existing name
                 if label_id not in self.class_labels_mapping:
@@ -1117,7 +1127,7 @@ def setupLegend(self):
                 label_edit = QLineEdit(label_name)
 
                 # Highlight the label if it is currently being used
-                if label_id == painting_layer._selected_label:
+                if label_id == painting_layer.selected_label:
                     self.highlightLabel(label_edit)
 
                 # Save changes to class labels back to the mapping
@@ -1139,6 +1149,10 @@ def setupLegend(self):
             self.legend_placeholder_index, self.legend_group
         )
 
+    def activateLabel(self, label_id):
+        painting_layer = self.app.get_painting_layer()
+        painting_layer.selected_label = label_id
+        
     def updateLegendHighlighting(self, selected_label_event):
         """Update highlighting of legend"""
         current_label_id = selected_label_event.source._selected_label

From 6fab1cec44942af9d6c94de4174c9fd09ae60314 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 15 Apr 2024 13:02:43 -0400
Subject: [PATCH 11/30] Update prediction to account for chunkwise predictions

---
 examples/run_app_copick.py                    | 38 ++++++++++++++-----
 .../semantic/_embedding_segmentor.py          |  3 --
 2 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index 4a24b0f..e6cc443 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -213,6 +213,12 @@ def get_default_voxel_spacing_directory(self, static_path):
             return voxel_spacing_dirs[0]
         return None
 
+    def get_segmentations_directory(self, static_path):
+        segmentation_dir = glob.glob(os.path.join(static_path, "Segmentations"))
+        if segmentation_dir:
+            return segmentation_dir[0]
+        return None
+
     def fit_on_all(self):
         print("Fitting all models to the selected dataset.")
 
@@ -290,7 +296,7 @@ def on_run_clicked(self, item, column):
             dropdown.clear()
 
         # Find VoxelSpacing directories
-        # TODO hardcoded to match spacing = 10
+        # TODO hardcoded to match spacing = 10        
         voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*"))
 
         if not voxel_spacing_dirs:  # Check if at least one VoxelSpacing directory was found
@@ -361,17 +367,29 @@ def initialize_or_update_cell_canvas(self):
             print("Please ensure image and feature paths are selected before initializing/updating CellCanvas.")
             return        
 
-        default_painting_path = os.path.join(self.voxel_spacing_dir, "painting_001.zarr")
-        default_prediction_path = os.path.join(self.voxel_spacing_dir, "prediction_001.zarr")
+        # TODO put these into the segmentations directory
+        segmentation_dir = self.get_segmentations_directory(self.selected_run.static_path)
+
+        voxel_spacing = 10
+
+        # Ensure segmentations directory exists
+        os.makedirs(segmentation_dir, exist_ok=True)
+        
+        default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr')
+        default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr')
 
         # TODO note this is hard coded to use the highest resolution of a multiscale zarr
-        dataset = DataSet.from_paths(
-            image_path=os.path.join(self.voxel_spacing_dir, f"{paths['image']}/0"),
-            features_path=os.path.join(self.voxel_spacing_dir, paths["features"]),
-            labels_path=default_painting_path if not paths["painting"] else os.path.join(self.voxel_spacing_dir, paths["painting"]),
-            segmentation_path=default_prediction_path if not paths["prediction"] else os.path.join(self.voxel_spacing_dir, paths["prediction"]),
-            make_missing_datasets=True,
-        )
+        try:
+            dataset = DataSet.from_paths(
+                image_path=os.path.join(self.voxel_spacing_dir, f"{paths['image']}/0"),
+                features_path=os.path.join(self.voxel_spacing_dir, paths["features"]),
+                labels_path=default_painting_path if not paths["painting"] else os.path.join(self.voxel_spacing_dir, paths["painting"]),
+                segmentation_path=default_prediction_path if not paths["prediction"] else os.path.join(self.voxel_spacing_dir, paths["prediction"]),
+                make_missing_datasets=True,
+            )
+        except FileNotFoundError:
+            print(f"File {path} not found!", file=sys.stderr)
+            return
 
         data_manager = DataManager(datasets=[dataset])
         
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index a3b8bed..2a99a1e 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -468,9 +468,6 @@ def on_prediction_completed(self, result):
         self.prediction_labels = prediction_labels
         self.prediction_counts = prediction_counts
 
-        self.get_prediction_layer().data = self.prediction_data.reshape(
-            self.get_prediction_layer().data.shape
-        )
         self.get_prediction_layer().refresh()
 
         self.update_class_distribution_charts()

From e063f0f7904e715815346110be93effef1b21d11 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 15 Apr 2024 13:35:25 -0400
Subject: [PATCH 12/30] Add support for labels with no annotations

---
 .../semantic/_embedding_segmentor.py          | 27 ++++++++++++-------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 2a99a1e..6f1462e 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -519,16 +519,23 @@ def update_class_distribution_charts(self):
             else 1
         )
 
-        painting_counts = (
-            self.painting_counts
-            if self.painting_counts is not None
-            else np.array([0])
-        )
-        painting_labels = (
-            self.painting_labels
-            if self.painting_labels is not None
-            else np.array([0])
-        )
+        # Initialize counts for all labels in painting_labels with zero
+        if self.painting_labels is not None:
+            unique_labels = np.unique(self.painting_labels)
+            painting_counts_dict = {label: 0 for label in unique_labels}
+        else:
+            unique_labels = np.array([0])
+            painting_counts_dict = {0: 0}
+
+        # Update counts from existing painting_counts if available
+        if self.painting_counts is not None and self.painting_labels is not None:
+            for label, count in zip(self.painting_labels, self.painting_counts):
+                painting_counts_dict[label] = count
+
+        # Create arrays from the dictionary
+        painting_labels = np.array(list(painting_counts_dict.keys()))
+        painting_counts = np.array(list(painting_counts_dict.values()))
+
         prediction_counts = (
             self.prediction_counts
             if self.prediction_counts is not None

From 59bda09b4cb86f7a4cbd0f42b4a3610b56c0de0c Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 15 Apr 2024 17:01:19 -0400
Subject: [PATCH 13/30] Add support for importing models

---
 .../semantic/_embedding_segmentor.py          | 54 ++++++++++++++++---
 1 file changed, 47 insertions(+), 7 deletions(-)

diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 6f1462e..0868402 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -47,6 +47,8 @@
 )
 from cellcanvas.utils import get_labels_colormap, paint_maker
 
+import xgboost as xgb
+
 ACTIVE_BUTTON_COLOR = "#AF8B38"
 
 
@@ -61,11 +63,22 @@ def __init__(
         self.extra_logging = extra_logging
         self.data = data_manager
         self.colormap = get_labels_colormap()
-        clf = RandomForestClassifier(
-            n_estimators=50,
-            n_jobs=-1,
-            max_depth=10,
-            max_samples=0.05,
+        # clf = RandomForestClassifier(
+        #     n_estimators=25,
+        #     n_jobs=-1,
+        #     max_depth=10,
+        #     max_samples=0.05,
+        #     max_features='sqrt',
+        #     class_weight='balanced'
+        # )
+
+        clf = xgb.XGBClassifier(
+            objective='multi:softmax',
+            num_class=10,  # Specify number of classes if using softmax
+            n_estimators=200,
+            max_depth=20,
+            learning_rate=0.1,
+            scale_pos_weight='balanced'  # For handling imbalance
         )
         self.segmentation_manager = SemanticSegmentationManager(
             data=self.data, model=clf
@@ -160,7 +173,8 @@ def _init_viewer_layers(self):
         #     dtype="i4",
         #     dimension_separator=".",
         # )
-        self.painting_data = self.data.datasets[0].labels
+        
+        self.painting_data = self.data.datasets[0].labels.astype(int)
         self.painting_layer = self.viewer.add_labels(
             self.painting_data,
             name="Painting",
@@ -349,13 +363,14 @@ def update_model(self, model_type):
         if filtered_labels.size == 0:
             self.logger.info("No labels present. Skipping model update.")
             return None
-
+        
         # Calculate class weights
         unique_labels = np.unique(filtered_labels)
         class_weights = compute_class_weight(
             "balanced", classes=unique_labels, y=filtered_labels
         )
         weight_dict = dict(zip(unique_labels, class_weights))
+        self.logger.info(f"Class balance calculated {class_weights}")
 
         # Apply weights
         # sample_weights = np.vectorize(weight_dict.get)(filtered_labels)
@@ -370,6 +385,8 @@ def update_model(self, model_type):
                 class_weight=weight_dict,
             )
             self.segmentation_manager.model = clf
+            # self.segmentation_manager.fit()
+            self.logger.info(f"Starting model fitting")
             self.segmentation_manager.fit()
             return self.segmentation_manager.model
         elif model_type == "XGBoost":
@@ -984,10 +1001,16 @@ def initUI(self):
         live_pred_layout.addWidget(self.live_pred_button)
         controls_layout.addLayout(live_pred_layout)
 
+        # Export model
         self.export_model_button = QPushButton("Export Model")
         controls_layout.addWidget(self.export_model_button)
         self.export_model_button.clicked.connect(self.export_model)
 
+        # Import model
+        self.import_model_button = QPushButton("Import Model")
+        controls_layout.addWidget(self.import_model_button)
+        self.import_model_button.clicked.connect(self.import_model)        
+
         controls_group.setLayout(controls_layout)
         main_layout.addWidget(controls_group)
 
@@ -1061,6 +1084,23 @@ def export_model(self):
                 self, "Model Export", "No model available to export."
             )
 
+    def import_model(self):
+        filePath, _ = QFileDialog.getOpenFileName(
+            self, "Open Model", "", "Joblib Files (*.joblib)"
+        )
+        if filePath:
+            try:
+                model = joblib.load(filePath)
+                self.app.model = model
+                QMessageBox.information(
+                    self, "Model Import", "Model imported successfully!"
+                )
+                print(f"Loaded model file from: {filePath}")
+            except Exception as e:
+                QMessageBox.warning(
+                    self, "Model Import", f"Failed to import model. Error: {str(e)}"
+                )
+            
     def change_embedding_label_color(self, color):
         """Change the background color of the embedding label."""
         self.embedding_label.setStyleSheet(f"background-color: {color};")

From c9dbd233f7879997d03d587e8cebab84cf13457a Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Wed, 17 Apr 2024 06:54:38 -0400
Subject: [PATCH 14/30] Better multiscale support, better UI for fit/predict on
 al

---
 examples/run_app_copick.py                    | 121 +++++++++++++++---
 src/cellcanvas/data/data_manager.py           |   1 +
 src/cellcanvas/data/data_set.py               |   8 +-
 .../semantic/_embedding_segmentor.py          |   6 +-
 .../semantic/segmentation_manager.py          |  23 +++-
 src/cellcanvas/utils.py                       |   3 +
 6 files changed, 141 insertions(+), 21 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index e6cc443..ed2d633 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -30,7 +30,10 @@
 from cellcanvas._app.main_app import CellCanvasApp, QtCellCanvas
 from cellcanvas.data.data_manager import DataManager
 from cellcanvas.data.data_set import DataSet
+from napari.qt.threading import thread_worker
 
+import sys
+import logging
 import json
 import copick
 from copick.impl.filesystem import CopickRootFSSpec
@@ -45,6 +48,7 @@
 from cellcanvas.semantic.segmentation_manager import (
     SemanticSegmentationManager,
 )
+from cellcanvas.utils import get_active_button_color
 
 import dask.array as da
 
@@ -88,6 +92,8 @@ def __init__(self, viewer: napari.Viewer, root):
         layout = QVBoxLayout()
         self.setLayout(layout)
 
+        self._init_logging()
+
         # Adding new buttons for "Fit on all" and "Predict for all"
         self.fit_all_button = QPushButton("Fit on all")
         self.fit_all_button.clicked.connect(self.fit_on_all)
@@ -122,6 +128,17 @@ def __init__(self, viewer: napari.Viewer, root):
 
         self.populate_tree()
 
+    def _init_logging(self):
+        self.logger = logging.getLogger("cellcanvas")
+        self.logger.setLevel(logging.DEBUG)
+        streamHandler = logging.StreamHandler(sys.stdout)
+        formatter = logging.Formatter(
+            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        )
+        streamHandler.setFormatter(formatter)
+        self.logger.addHandler(streamHandler)
+        
+
     def populate_tree(self):
         for run in self.root.runs:
             run_item = QTreeWidgetItem(self.tree, [run.name])
@@ -214,14 +231,32 @@ def get_default_voxel_spacing_directory(self, static_path):
         return None
 
     def get_segmentations_directory(self, static_path):
-        segmentation_dir = glob.glob(os.path.join(static_path, "Segmentations"))
-        if segmentation_dir:
-            return segmentation_dir[0]
-        return None
+        segmentation_dir = os.path.join(static_path, "Segmentations")
+        return segmentation_dir
 
+    def change_button_color(self, button, color):
+        button.setStyleSheet(f"background-color: {color};")
+
+    def reset_button_color(self, button):
+        self.change_button_color(button, "")
+    
     def fit_on_all(self):
+        if not self.cell_canvas_app:
+            print("Initialize cell canvas first")
+            return
+        
         print("Fitting all models to the selected dataset.")
 
+        self.change_button_color(
+            self.fit_all_button, get_active_button_color()
+        )
+        
+        self.model_fit_worker = self.threaded_fit_on_all()
+        self.model_fit_worker.returned.connect(self.on_model_fit_completed)
+        self.model_fit_worker.start()
+
+    @thread_worker
+    def threaded_fit_on_all(self):
         data_manager = self.get_complete_data_manager()
 
         clf = RandomForestClassifier(
@@ -236,10 +271,39 @@ def fit_on_all(self):
         )
         segmentation_manager.fit()
 
-        # TODO this is bad
-        self.cell_canvas_app.semantic_segmentor.segmentation_manager = segmentation_manager        
+        return segmentation_manager        
+
+    def on_model_fit_completed(self, segmentation_manager):
+        self.logger.debug("on_model_fit_completed")
+
+        self.cell_canvas_app.semantic_segmentor.segmentation_manager = segmentation_manager
+
+        # Reset color
+        self.reset_button_color(self.fit_all_button)
         
     def predict_for_all(self):
+        if not self.cell_canvas_app:
+            print("Initialize cell canvas first")
+            return
+        
+        print("Fitting all models to the selected dataset.")
+
+        self.change_button_color(
+            self.predict_all_button, get_active_button_color()
+        )
+        
+        self.predict_worker = self.threaded_predict_for_all()
+        self.predict_worker.returned.connect(self.on_predict_completed)
+        self.predict_worker.start()
+
+    def on_predict_completed(self, result):
+        self.logger.debug("on_predict_completed")
+
+        # Reset color
+        self.reset_button_color(self.predict_all_button)
+        
+    @thread_worker
+    def threaded_predict_for_all(self):
         print("Running predictions on all datasets.")
 
         # Check if segmentation manager is properly initialized
@@ -290,6 +354,7 @@ def on_run_clicked(self, item, column):
 
         self.selected_run = data
         static_path = self.selected_run.static_path
+        self.logger.info(f"Selected {static_path}")
 
         # Clear existing items
         for dropdown in self.dropdowns.values():
@@ -298,13 +363,14 @@ def on_run_clicked(self, item, column):
         # Find VoxelSpacing directories
         # TODO hardcoded to match spacing = 10        
         voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*"))
+        segmentation_dir = self.get_segmentations_directory(static_path)
 
         if not voxel_spacing_dirs:  # Check if at least one VoxelSpacing directory was found
             print(f"No Voxel Spacing directories found in {static_path}. Please check the directory structure.")
             return
 
+        # First handle image and features
         self.voxel_spacing_dir = voxel_spacing_dirs[0]        
-        
         for voxel_spacing_dir in voxel_spacing_dirs:
             # Find all Zarr datasets within the voxel spacing directory
             zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
@@ -314,19 +380,33 @@ def on_run_clicked(self, item, column):
                 dataset_name = os.path.basename(dataset_path)
                 if "_features.zarr" in dataset_name.lower():
                     self.dropdowns["features"].addItem(dataset_name, dataset_path)
-                elif "painting.zarr" in dataset_name.lower():
-                    self.dropdowns["painting"].addItem(dataset_name, dataset_path)
-                elif "prediction.zarr" in dataset_name.lower():
-                    self.dropdowns["prediction"].addItem(dataset_name, dataset_path)
                 else:
                     # This is for the image dropdown, excluding features, painting, and prediction zarr files
                     self.dropdowns["image"].addItem(dataset_name, dataset_path)
 
 
-        # Set defaults for painting and prediction layers, assuming they follow a fixed naming convention
-        # and are expected to be located in a specific VoxelSpacing directory, adjusting as necessary
+        # Find all Zarr datasets within the Segmentations directory
+        os.makedirs(segmentation_dir, exist_ok=True)
+        zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr"))
 
-                                    
+        voxel_spacing = 10
+        session_id = 0
+        
+        default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_{session_id}_all-multilabel.zarr')
+        default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_{session_id}_all-multilabel.zarr')        
+
+        self.dropdowns["painting"].addItem(os.path.basename(default_painting_path), default_painting_path)
+        self.dropdowns["prediction"].addItem(os.path.basename(default_prediction_path), default_prediction_path)
+
+        # Filtering the paths for each dropdown category
+        for dataset_path in zarr_datasets:
+            dataset_name = os.path.basename(dataset_path)
+            # Do not add painting or prediction to prediction or painting respectively
+            if "painting" not in dataset_name.lower():
+                self.dropdowns["prediction"].addItem(dataset_name, dataset_path)
+            if "prediction" not in dataset_name.lower():
+                self.dropdowns["painting"].addItem(dataset_name, dataset_path)
+                                                    
     def on_item_clicked(self, item, column):
         data = item.data(0, Qt.UserRole)
         if data:
@@ -334,10 +414,12 @@ def on_item_clicked(self, item, column):
                 self.open_picks(data)
             elif isinstance(data, copick.impl.filesystem.CopickTomogramFSSpec):
                 self.open_tomogram(data)
+            elif isinstance(data, copick.models.CopickSegmentation):
+                self.open_labels(data)
 
     def open_picks(self, picks):
         with open(picks.path, 'r') as f:
-            points_data = json.load(f)
+            points_data = json.load(f)            
 
         # Extracting points locations
         points_locations = [
@@ -354,11 +436,20 @@ def open_picks(self, picks):
 
     def open_tomogram(self, tomogram):
         zarr_store = zarr.open(tomogram.zarr(), mode='r')
+        print(f"open_tomogram {tomogram.zarr()}")
         # TODO extract scale/transform info
 
         # TODO scale is hard coded to 10 here
         self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.tomo_type}")
 
+    def open_labels(self, tomogram):
+        zarr_store = zarr.open(tomogram.zarr(), mode='r')
+        print(f"open_labels {tomogram.zarr()}")
+        # TODO extract scale/transform info
+
+        # TODO scale is hard coded to 10 here
+        self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.name}")
+
     def initialize_or_update_cell_canvas(self):
         # Collect paths from dropdowns
         paths = {layer: dropdown.currentText() for layer, dropdown in self.dropdowns.items()}
diff --git a/src/cellcanvas/data/data_manager.py b/src/cellcanvas/data/data_manager.py
index 44e4f9f..e114816 100644
--- a/src/cellcanvas/data/data_manager.py
+++ b/src/cellcanvas/data/data_manager.py
@@ -16,6 +16,7 @@ def __init__(self, datasets: Optional[List[DataSet]] = None):
             datasets = [datasets]
         self.datasets = SelectableEventedList(datasets)
 
+    # Normal version
     def get_training_data(self) -> Tuple[Array, Array]:
         """Get the pixel-wise semantic segmentation training data for datasets.
 
diff --git a/src/cellcanvas/data/data_set.py b/src/cellcanvas/data/data_set.py
index f61b423..117c42f 100644
--- a/src/cellcanvas/data/data_set.py
+++ b/src/cellcanvas/data/data_set.py
@@ -6,6 +6,8 @@
 import zarr
 from zarr import Array
 
+from ome_zarr.io import ZarrLocation
+from ome_zarr.reader import Multiscales
 
 @dataclass
 class DataSet:
@@ -62,7 +64,11 @@ def from_paths(
                 dimension_separator=".",
             )
         else:
-            labels = zarr.open(labels_path, "a")
+            if Multiscales.matches(ZarrLocation(labels_path)):
+                labels = zarr.open(os.path.join(labels_path, "0"),
+                                   "a")
+            else:
+                labels = zarr.open(labels_path, "a")
 
         # get the segmentation
         if (not os.path.isdir(segmentation_path)) and make_missing_datasets:
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 0868402..29450de 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -173,8 +173,8 @@ def _init_viewer_layers(self):
         #     dtype="i4",
         #     dimension_separator=".",
         # )
-        
-        self.painting_data = self.data.datasets[0].labels.astype(int)
+        self.painting_data = self.data.datasets[0].labels
+        # .data.astype("i4")
         self.painting_layer = self.viewer.add_labels(
             self.painting_data,
             name="Painting",
@@ -510,7 +510,6 @@ def start_model_fit(self):
 
         self.model_fit_worker = self.model_fit_thread(self.get_model_type())
         self.model_fit_worker.returned.connect(self.on_model_fit_completed)
-        # TODO update UI to indicate that model training has started
         self.model_fit_worker.start()
 
     def on_model_fit_completed(self, model):
@@ -1196,6 +1195,7 @@ def setupLegend(self):
     def activateLabel(self, label_id):
         painting_layer = self.app.get_painting_layer()
         painting_layer.selected_label = label_id
+        self.updateLegendHighlighting()
         
     def updateLegendHighlighting(self, selected_label_event):
         """Update highlighting of legend"""
diff --git a/src/cellcanvas/semantic/segmentation_manager.py b/src/cellcanvas/semantic/segmentation_manager.py
index 3d246ec..09e5d67 100644
--- a/src/cellcanvas/semantic/segmentation_manager.py
+++ b/src/cellcanvas/semantic/segmentation_manager.py
@@ -1,12 +1,14 @@
 from typing import Protocol
 
+import sys
+import logging
 import numpy as np
 import dask.array as da
 from dask import delayed
 from sklearn.exceptions import NotFittedError
 
 from cellcanvas.data.data_manager import DataManager
-
+from tqdm import tqdm
 
 class SegmentationModel(Protocol):
     """Protocol for semantic segmentations models that are
@@ -22,14 +24,31 @@ def __init__(self, data: DataManager, model: SegmentationModel):
         self.data = data
         self.model = model
 
+        self._init_logging()
+
+    def _init_logging(self):
+        self.logger = logging.getLogger("cellcanvas")
+        self.logger.setLevel(logging.DEBUG)
+        streamHandler = logging.StreamHandler(sys.stdout)
+        formatter = logging.Formatter(
+            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        )
+        streamHandler.setFormatter(formatter)
+        self.logger.addHandler(streamHandler)
+
     def update_data_manager(self, data: DataManager):
         self.data = data
-        
+
     def fit(self):
         """Fit using the model using the data in the data manager."""
+        self.logger.info("Starting to fit")
+        # Get training data from the data manager
         features, labels = self.data.get_training_data()
+
         features_computed, labels_computed = features.compute(), labels.compute()
 
+        self.logger.info("Starting the actual model fit")
+
         self.model.fit(features_computed, labels_computed)
 
     def predict(self, feature_image):
diff --git a/src/cellcanvas/utils.py b/src/cellcanvas/utils.py
index fbc7211..338ee7e 100644
--- a/src/cellcanvas/utils.py
+++ b/src/cellcanvas/utils.py
@@ -78,3 +78,6 @@ def paint(self, coord, new_label, refresh=True):
         )
 
     return paint
+
+def get_active_button_color():
+    return "#AF8B38"

From e5c106dc7a10bcbc6664561856395683a9b6706e Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Wed, 17 Apr 2024 10:24:32 -0400
Subject: [PATCH 15/30] Fix default painting/prediction path, remove settings
 from UI

---
 examples/run_app_copick.py                    |  4 +-
 .../semantic/_embedding_segmentor.py          | 74 ++++++++-----------
 src/cellcanvas/utils.py                       |  6 +-
 3 files changed, 36 insertions(+), 48 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index ed2d633..f3ad0cd 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -474,8 +474,8 @@ def initialize_or_update_cell_canvas(self):
             dataset = DataSet.from_paths(
                 image_path=os.path.join(self.voxel_spacing_dir, f"{paths['image']}/0"),
                 features_path=os.path.join(self.voxel_spacing_dir, paths["features"]),
-                labels_path=default_painting_path if not paths["painting"] else os.path.join(self.voxel_spacing_dir, paths["painting"]),
-                segmentation_path=default_prediction_path if not paths["prediction"] else os.path.join(self.voxel_spacing_dir, paths["prediction"]),
+                labels_path=default_painting_path if not paths["painting"] else os.path.join(segmentation_dir, paths["painting"]),
+                segmentation_path=default_prediction_path if not paths["prediction"] else os.path.join(segmentation_dir, paths["prediction"]),
                 make_missing_datasets=True,
             )
         except FileNotFoundError:
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 29450de..1863a1e 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -19,7 +19,7 @@
 from napari.qt.threading import thread_worker
 from napari.utils import DirectLabelColormap
 from psygnal import debounced
-from qtpy.QtCore import Qt, Signal
+from qtpy.QtCore import Qt, Signal, Slot
 from qtpy.QtGui import QColor, QPainter, QPixmap
 from qtpy.QtWidgets import (
     QCheckBox,
@@ -35,6 +35,7 @@
     QVBoxLayout,
     QWidget,
 )
+from qtpy import QtCore, QtWidgets
 from sklearn.cross_decomposition import PLSRegression
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.utils.class_weight import compute_class_weight
@@ -222,7 +223,7 @@ def _connect_events(self):
             listener.connect(
                 debounced(
                     ensure_main_thread(on_data_change_handler),
-                    timeout=1000,
+                    timeout=5000,
                 )
             )
 
@@ -378,10 +379,10 @@ def update_model(self, model_type):
         # Model fitting
         if model_type == "Random Forest":
             clf = RandomForestClassifier(
-                n_estimators=50,
+                n_estimators=200,
                 n_jobs=-1,
-                max_depth=10,
-                max_samples=0.05,
+                max_depth=15,
+                max_samples=0.1,
                 class_weight=weight_dict,
             )
             self.segmentation_manager.model = clf
@@ -576,9 +577,6 @@ def update_class_distribution_charts(self):
             self.logger.info(
                 f"image layer: contrast_limits = {self.viewer.layers['Image'].contrast_limits}, opacity = {self.viewer.layers['Image'].opacity}, gamma = {self.viewer.layers['Image'].gamma}"  # noqa G004
             )
-            self.logger.info(
-                f"Current model type: {self.widget.model_dropdown.currentText()}"  # noqa G004
-            )
 
         # Calculate percentages instead of raw counts
         painting_percentages = (painting_counts / total_pixels) * 100
@@ -916,10 +914,10 @@ def paint_thread(self, lasso_path, target_label):
             # Update the painting data
             self.painting_data[z, y, x] = target_label
 
-        if self.extra_logging:
-            self.logger.info(
-                f"lasso paint: label = {target_label}, indices = {paint_indices}"  # noqa G004
-            )
+        # if self.extra_logging:
+        #     self.logger.info(
+        #         f"lasso paint: label = {target_label}, indices = {paint_indices}"  # noqa G004
+        #     )
 
         # print(f"Painted {np.sum(contained)} pixels with label {target_label}")
 
@@ -932,7 +930,7 @@ def __init__(self, label_id, *args, **kwargs):
 
     def mousePressEvent(self, event):
         self.clicked.emit(self.label_id)        
-
+        
 class EmbeddingPaintingWidget(QWidget):
     def __init__(self, app, parent=None):
         super().__init__(parent=parent)
@@ -945,22 +943,6 @@ def initUI(self):
 
         self.legend_placeholder_index = 0
 
-        # Settings Group
-        settings_group = QGroupBox("Settings")
-        settings_layout = QVBoxLayout()
-
-        model_layout = QHBoxLayout()
-        model_label = QLabel("Select Model")
-        self.model_dropdown = QComboBox()
-        self.model_dropdown.addItems(["Random Forest", "XGBoost"])
-        model_layout.addWidget(model_label)
-        model_layout.addWidget(self.model_dropdown)
-        settings_layout.addLayout(model_layout)
-
-        self.add_features_button = QPushButton("Add Features")
-        self.add_features_button.clicked.connect(self.add_features)
-        settings_layout.addWidget(self.add_features_button)
-
         thickness_layout = QHBoxLayout()
         thickness_label = QLabel("Adjust Slice Thickness")
         self.thickness_slider = QSlider(Qt.Horizontal)
@@ -970,14 +952,11 @@ def initUI(self):
         self.thickness_slider.setValue(10)
         thickness_layout.addWidget(thickness_label)
         thickness_layout.addWidget(self.thickness_slider)
-        settings_layout.addLayout(thickness_layout)
-
+        main_layout.addLayout(thickness_layout)
+        
         # Update layer contrast limits after thick slices has effect
         self.app.viewer.layers["Image"].reset_contrast_limits()
 
-        settings_group.setLayout(settings_layout)
-        main_layout.addWidget(settings_group)
-
         # Controls Group
         controls_group = QGroupBox("Controls")
         controls_layout = QVBoxLayout()
@@ -1000,6 +979,14 @@ def initUI(self):
         live_pred_layout.addWidget(self.live_pred_button)
         controls_layout.addLayout(live_pred_layout)
 
+        # Connect checkbox signals to actions
+        self.live_fit_checkbox.stateChanged.connect(self.on_live_fit_changed)
+        self.live_pred_checkbox.stateChanged.connect(self.on_live_pred_changed)
+
+        # Connect button clicks to actions
+        self.live_fit_button.clicked.connect(self.app.start_model_fit)
+        self.live_pred_button.clicked.connect(self.app.start_prediction)
+        
         # Export model
         self.export_model_button = QPushButton("Export Model")
         controls_layout.addWidget(self.export_model_button)
@@ -1044,14 +1031,6 @@ def initUI(self):
 
         self.setLayout(main_layout)
 
-        # Connect checkbox signals to actions
-        self.live_fit_checkbox.stateChanged.connect(self.on_live_fit_changed)
-        self.live_pred_checkbox.stateChanged.connect(self.on_live_pred_changed)
-
-        # Connect button clicks to actions
-        self.live_fit_button.clicked.connect(self.app.start_model_fit)
-        self.live_pred_button.clicked.connect(self.app.start_prediction)
-
     def add_features(self):
         zarr_path = QFileDialog.getExistingDirectory(self, "Select Directory")
 
@@ -1192,10 +1171,15 @@ def setupLegend(self):
             self.legend_placeholder_index, self.legend_group
         )
 
-    def activateLabel(self, label_id):
+    def activateLabel(self, current_label_id):
         painting_layer = self.app.get_painting_layer()
-        painting_layer.selected_label = label_id
-        self.updateLegendHighlighting()
+        painting_layer.selected_label = current_label_id
+
+        for label_id, label_edit in self.label_edits.items():
+            if label_id == current_label_id:
+                self.highlightLabel(label_edit)
+            else:
+                self.removeHighlightLabel(label_edit)
         
     def updateLegendHighlighting(self, selected_label_event):
         """Update highlighting of legend"""
diff --git a/src/cellcanvas/utils.py b/src/cellcanvas/utils.py
index 338ee7e..0041ae2 100644
--- a/src/cellcanvas/utils.py
+++ b/src/cellcanvas/utils.py
@@ -3,6 +3,9 @@
     sphere_indices,
 )
 
+from qtpy.QtWidgets import (QApplication, QGroupBox, QVBoxLayout, QHBoxLayout,
+                            QLabel, QComboBox, QPushButton, QWidget, QCheckBox)
+from qtpy.QtCore import Slot, Qt
 
 def get_labels_colormap():
     """Return a colormap for distinct label colors based on:
@@ -71,7 +74,7 @@ def paint(self, coord, new_label, refresh=True):
             int
         )
 
-        logger.info("paint: label = %s, indices = %s", new_label, mask_indices)
+        # logger.info("paint: label = %s, indices = %s", new_label, mask_indices)
 
         self._paint_indices(
             mask_indices, new_label, shape, dims_to_paint, slice_coord, refresh
@@ -81,3 +84,4 @@ def paint(self, coord, new_label, refresh=True):
 
 def get_active_button_color():
     return "#AF8B38"
+

From ec75334bcad39b879abf3f5454ccb47b99e0a7ef Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Wed, 17 Apr 2024 11:48:05 -0400
Subject: [PATCH 16/30] Clean up path handling and support configs per run

---
 examples/run_app_copick.py | 210 +++++++++++++++++++++++--------------
 1 file changed, 134 insertions(+), 76 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index f3ad0cd..82475f3 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -128,6 +128,9 @@ def __init__(self, viewer: napari.Viewer, root):
 
         self.populate_tree()
 
+    def get_voxel_spacing(self):
+        return 10
+        
     def _init_logging(self):
         self.logger = logging.getLogger("cellcanvas")
         self.logger.setLevel(logging.DEBUG)
@@ -179,40 +182,67 @@ def activate_layer(self, layer):
     def get_complete_data_manager(self):
         datasets = []
         for run in self.root.runs:
-            static_path = run.static_path
-            # Assume there is a method to get the default voxel spacing directory for each run
-            voxel_spacing_dir = self.get_default_voxel_spacing_directory(static_path)
+            run_dir = run.static_path
+            config_path = os.path.join(run_dir, "dataset_config.json")
+
+            voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir)
+            segmentation_dir = self.get_segmentations_directory(run_dir)
 
             if not voxel_spacing_dir:
                 print(f"No Voxel Spacing directory found for run {run.name}.")
                 continue
 
-            # Get all Zarr datasets within the voxel spacing directory
-            zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
-
-            # Initialize paths
-            image_path = None
-            features_path = None
-            painting_path = os.path.join(voxel_spacing_dir, "painting_001.zarr")
-            prediction_path = os.path.join(voxel_spacing_dir, "prediction_001.zarr")
+            os.makedirs(segmentation_dir, exist_ok=True)
             
-            # Assign paths based on dataset names
-            for dataset_path in zarr_datasets:
-                dataset_name = os.path.basename(dataset_path)
-                if "_features.zarr" in dataset_name.lower():
-                    features_path = dataset_path
-                elif "painting" in dataset_name.lower():
-                    painting_path = dataset_path
-                elif "prediction" in dataset_name.lower():
-                    prediction_path = dataset_path
-                else:
-                    image_path = dataset_path
-
-            # Assume each dataset should be loaded with a specific method that may also handle missing datasets
+            if os.path.exists(config_path):
+                with open(config_path, 'r') as file:
+                    config = json.load(file)
+                    image_path = os.path.join(voxel_spacing_dir, config['image'])
+                    features_path = os.path.join(voxel_spacing_dir, config['features'])
+                    painting_path = os.path.join(segmentation_dir, config['painting'])
+                    prediction_path = os.path.join(segmentation_dir, config['prediction'])
+            else:
+                # Existing logic to find paths                
+                voxel_spacing = self.get_voxel_spacing()
+
+                zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
+                image_path = None
+                features_path = None
+                painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr')
+                prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr')
+
+                for dataset_path in zarr_datasets:
+                    dataset_name = os.path.basename(dataset_path).lower()
+                    if "_features.zarr" in dataset_name:
+                        features_path = dataset_path
+                    elif "painting" in dataset_name:
+                        painting_path = dataset_path
+                    elif "prediction" in dataset_name:
+                        prediction_path = dataset_path
+                    else:
+                        # TODO hard coded to use highest resolution
+                        image_path = os.path.join(dataset_path, "0")
+
+                # Save paths to JSON
+                config = {
+                    'image': os.path.relpath(image_path, voxel_spacing_dir),
+                    'features': os.path.relpath(features_path, voxel_spacing_dir),
+                    'painting': os.path.relpath(painting_path, segmentation_dir),
+                    'prediction': os.path.relpath(prediction_path, segmentation_dir)
+                }
+                with open(config_path, 'w') as file:
+                    json.dump(config, file)
+
+            print(f"Fitting on paths:")
+            print(f"Image: {image_path}")
+            print(f"Features: {features_path}")
+            print(f"Painting: {painting_path}")
+            print(f"Prediction: {prediction_path}")
+                    
+            # Load dataset with paths
             if image_path and features_path:
-                # TODO remove hack for highest resolution
                 dataset = DataSet.from_paths(
-                    image_path=os.path.join(image_path, "0"),
+                    image_path=image_path,
                     features_path=features_path,
                     labels_path=painting_path,
                     segmentation_path=prediction_path,
@@ -220,12 +250,12 @@ def get_complete_data_manager(self):
                 )
                 datasets.append(dataset)
 
-        # Create a new data manager with all datasets
         return DataManager(datasets=datasets)
 
     def get_default_voxel_spacing_directory(self, static_path):
         # Find VoxelSpacing directories, assuming a hard coded match for now
-        voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*"))
+        voxel_spacing = self.get_voxel_spacing()
+        voxel_spacing_dirs = glob.glob(os.path.join(static_path, f'VoxelSpacing{voxel_spacing:.3f}'))
         if voxel_spacing_dirs:
             return voxel_spacing_dirs[0]
         return None
@@ -360,52 +390,57 @@ def on_run_clicked(self, item, column):
         for dropdown in self.dropdowns.values():
             dropdown.clear()
 
-        # Find VoxelSpacing directories
-        # TODO hardcoded to match spacing = 10        
-        voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*"))
-        segmentation_dir = self.get_segmentations_directory(static_path)
+        config_path = os.path.join(static_path, "dataset_config.json")
+        if os.path.exists(config_path):
+            # Use the JSON file to set up the dropdowns
+            with open(config_path, 'r') as file:
+                config = json.load(file)
+
+            # Populate dropdowns using the paths in the config
+            for key, rel_path in config.items():
+                abs_path = os.path.join(static_path, rel_path)
+                if os.path.exists(abs_path):
+                    dropdown_key = key.split('_')[0]  # 'image', 'features', 'painting', 'prediction'
+                    self.dropdowns[dropdown_key].addItem(rel_path, abs_path)
+        else:
+            # Find VoxelSpacing directories
+            # TODO hard coded voxel spacing here
+            voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*"))
+            segmentation_dir = self.get_segmentations_directory(static_path)
+
+            if not voxel_spacing_dirs:
+                print(f"No Voxel Spacing directories found in {static_path}. Please check the directory structure.")
+                return
+
+            voxel_spacing_dir = voxel_spacing_dirs[0]
+            for voxel_spacing_dir in voxel_spacing_dirs:
+                # Find all Zarr datasets within the voxel spacing directory
+                zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
+
+                for dataset_path in zarr_datasets:
+                    dataset_name = os.path.basename(dataset_path)
+                    if "_features.zarr" in dataset_name.lower():
+                        self.dropdowns["features"].addItem(dataset_name, os.path.join(voxel_spacing_dir, dataset_path))
+                    else:
+                        self.dropdowns["image"].addItem(dataset_name + "/0", os.path.join(voxel_spacing_dir, dataset_path, "0"))
 
-        if not voxel_spacing_dirs:  # Check if at least one VoxelSpacing directory was found
-            print(f"No Voxel Spacing directories found in {static_path}. Please check the directory structure.")
-            return
+            # Handling segmentations
+            os.makedirs(segmentation_dir, exist_ok=True)
+            zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr"))
+            voxel_spacing = self.get_voxel_spacing()
+            session_id = 0
+            default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_{session_id}_all-multilabel.zarr')
+            default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_{session_id}_all-multilabel.zarr')
+
+            self.dropdowns["painting"].addItem(os.path.basename(default_painting_path), default_painting_path)
+            self.dropdowns["prediction"].addItem(os.path.basename(default_prediction_path), default_prediction_path)
 
-        # First handle image and features
-        self.voxel_spacing_dir = voxel_spacing_dirs[0]        
-        for voxel_spacing_dir in voxel_spacing_dirs:
-            # Find all Zarr datasets within the voxel spacing directory
-            zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
-            
-            # Filtering the paths for each dropdown category
             for dataset_path in zarr_datasets:
                 dataset_name = os.path.basename(dataset_path)
-                if "_features.zarr" in dataset_name.lower():
-                    self.dropdowns["features"].addItem(dataset_name, dataset_path)
-                else:
-                    # This is for the image dropdown, excluding features, painting, and prediction zarr files
-                    self.dropdowns["image"].addItem(dataset_name, dataset_path)
-
-
-        # Find all Zarr datasets within the Segmentations directory
-        os.makedirs(segmentation_dir, exist_ok=True)
-        zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr"))
-
-        voxel_spacing = 10
-        session_id = 0
-        
-        default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_{session_id}_all-multilabel.zarr')
-        default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_{session_id}_all-multilabel.zarr')        
-
-        self.dropdowns["painting"].addItem(os.path.basename(default_painting_path), default_painting_path)
-        self.dropdowns["prediction"].addItem(os.path.basename(default_prediction_path), default_prediction_path)
-
-        # Filtering the paths for each dropdown category
-        for dataset_path in zarr_datasets:
-            dataset_name = os.path.basename(dataset_path)
-            # Do not add painting or prediction to prediction or painting respectively
-            if "painting" not in dataset_name.lower():
-                self.dropdowns["prediction"].addItem(dataset_name, dataset_path)
-            if "prediction" not in dataset_name.lower():
-                self.dropdowns["painting"].addItem(dataset_name, dataset_path)
+                if "painting" not in dataset_name.lower():
+                    self.dropdowns["prediction"].addItem(dataset_name, dataset_path)
+                if "prediction" not in dataset_name.lower():
+                    self.dropdowns["painting"].addItem(dataset_name, dataset_path)
                                                     
     def on_item_clicked(self, item, column):
         data = item.data(0, Qt.UserRole)
@@ -458,10 +493,11 @@ def initialize_or_update_cell_canvas(self):
             print("Please ensure image and feature paths are selected before initializing/updating CellCanvas.")
             return        
 
-        # TODO put these into the segmentations directory
+        run_dir = self.selected_run.static_path
         segmentation_dir = self.get_segmentations_directory(self.selected_run.static_path)
+        voxel_spacing_dir = self.get_default_voxel_spacing_directory(self.selected_run.static_path)
 
-        voxel_spacing = 10
+        voxel_spacing = self.get_voxel_spacing()
 
         # Ensure segmentations directory exists
         os.makedirs(segmentation_dir, exist_ok=True)
@@ -469,19 +505,41 @@ def initialize_or_update_cell_canvas(self):
         default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr')
         default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr')
 
+        painting_path = default_painting_path if not paths["painting"] else os.path.join(segmentation_dir, paths["painting"])
+        prediction_path = default_prediction_path if not paths["prediction"] else os.path.join(segmentation_dir, paths["prediction"])
+        image_path = os.path.join(voxel_spacing_dir, paths['image'])
+        features_path = os.path.join(voxel_spacing_dir, paths["features"])
+        
         # TODO note this is hard coded to use the highest resolution of a multiscale zarr
+        print(f"Opening paths:")
+        print(f"Image: {image_path}")
+        print(f"Features: {features_path}")
+        print(f"Painting: {painting_path}")
+        print(f"Prediction: {prediction_path}")
         try:
             dataset = DataSet.from_paths(
-                image_path=os.path.join(self.voxel_spacing_dir, f"{paths['image']}/0"),
-                features_path=os.path.join(self.voxel_spacing_dir, paths["features"]),
-                labels_path=default_painting_path if not paths["painting"] else os.path.join(segmentation_dir, paths["painting"]),
-                segmentation_path=default_prediction_path if not paths["prediction"] else os.path.join(segmentation_dir, paths["prediction"]),
+                image_path=image_path,
+                features_path=features_path,
+                labels_path=painting_path,
+                segmentation_path=prediction_path,
                 make_missing_datasets=True,
             )
         except FileNotFoundError:
             print(f"File {path} not found!", file=sys.stderr)
             return
 
+        config_path = os.path.join(run_dir, "dataset_config.json")
+
+        config = {
+            'image': os.path.relpath(os.path.join(voxel_spacing_dir, f"{paths['image']}"), voxel_spacing_dir),
+            'features': os.path.relpath(os.path.join(voxel_spacing_dir, paths["features"]), voxel_spacing_dir),
+            'painting': os.path.relpath(painting_path, segmentation_dir),
+            'prediction': os.path.relpath(prediction_path, segmentation_dir)
+        }
+
+        with open(config_path, 'w') as file:
+            json.dump(config, file)
+        
         data_manager = DataManager(datasets=[dataset])
         
         if not self.cell_canvas_app:

From b9995122866c534aa27c47aefd3f12eabe1f4ca7 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Wed, 17 Apr 2024 12:11:28 -0400
Subject: [PATCH 17/30] Make embedding computation button triggered, improved
 config supp

---
 examples/run_app_copick.py                    | 83 ++++++++++---------
 .../semantic/_embedding_segmentor.py          |  7 +-
 2 files changed, 48 insertions(+), 42 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index 82475f3..a686edd 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -390,58 +390,59 @@ def on_run_clicked(self, item, column):
         for dropdown in self.dropdowns.values():
             dropdown.clear()
 
+        # Define directories
+        voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*"))
+        segmentation_dir = self.get_segmentations_directory(static_path)
+        os.makedirs(segmentation_dir, exist_ok=True)
+
+        # Initialize dictionary to hold default selections from config
+        default_selections = {}
+
+        # Check for config file and load selections if present
         config_path = os.path.join(static_path, "dataset_config.json")
         if os.path.exists(config_path):
-            # Use the JSON file to set up the dropdowns
             with open(config_path, 'r') as file:
                 config = json.load(file)
-
-            # Populate dropdowns using the paths in the config
-            for key, rel_path in config.items():
-                abs_path = os.path.join(static_path, rel_path)
-                if os.path.exists(abs_path):
-                    dropdown_key = key.split('_')[0]  # 'image', 'features', 'painting', 'prediction'
-                    self.dropdowns[dropdown_key].addItem(rel_path, abs_path)
-        else:
-            # Find VoxelSpacing directories
-            # TODO hard coded voxel spacing here
-            voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*"))
-            segmentation_dir = self.get_segmentations_directory(static_path)
-
-            if not voxel_spacing_dirs:
-                print(f"No Voxel Spacing directories found in {static_path}. Please check the directory structure.")
-                return
-
-            voxel_spacing_dir = voxel_spacing_dirs[0]
+            default_selections = {
+                'image': os.path.join(voxel_spacing_dirs[0], config.get('image')),
+                'features': os.path.join(voxel_spacing_dirs[0], config.get('features')),
+                'painting': os.path.join(segmentation_dir, config.get('painting')),
+                'prediction': os.path.join(segmentation_dir, config.get('prediction'))
+            }
+
+        # Helper function to add items if not already in dropdown
+        def add_item_if_not_exists(dropdown, item_name, item_data):
+            if dropdown.findData(item_data) == -1:
+                dropdown.addItem(item_name, item_data)
+
+        # Load all zarr datasets from voxel spacing directories
+        if voxel_spacing_dirs:
             for voxel_spacing_dir in voxel_spacing_dirs:
-                # Find all Zarr datasets within the voxel spacing directory
                 zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
-
                 for dataset_path in zarr_datasets:
                     dataset_name = os.path.basename(dataset_path)
                     if "_features.zarr" in dataset_name.lower():
-                        self.dropdowns["features"].addItem(dataset_name, os.path.join(voxel_spacing_dir, dataset_path))
+                        add_item_if_not_exists(self.dropdowns["features"], dataset_name, dataset_path)
                     else:
-                        self.dropdowns["image"].addItem(dataset_name + "/0", os.path.join(voxel_spacing_dir, dataset_path, "0"))
+                        add_item_if_not_exists(self.dropdowns["image"], dataset_name + "/0", dataset_path + "/0")
+
+        # Load all zarr datasets from segmentation directory
+        zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr"))
+        for dataset_path in zarr_datasets:
+            dataset_name = os.path.basename(dataset_path)
+            if "painting" not in dataset_name.lower():
+                add_item_if_not_exists(self.dropdowns["prediction"], dataset_name, dataset_path)
+            if "prediction" not in dataset_name.lower():
+                add_item_if_not_exists(self.dropdowns["painting"], dataset_name, dataset_path)
+
+        # Set default selections in dropdowns if specified in the config
+        for key, dropdown in self.dropdowns.items():
+            if default_selections.get(key):
+                index = dropdown.findData(default_selections[key])
+                if index != -1:
+                    dropdown.setCurrentIndex(index)
+
 
-            # Handling segmentations
-            os.makedirs(segmentation_dir, exist_ok=True)
-            zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr"))
-            voxel_spacing = self.get_voxel_spacing()
-            session_id = 0
-            default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_{session_id}_all-multilabel.zarr')
-            default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_{session_id}_all-multilabel.zarr')
-
-            self.dropdowns["painting"].addItem(os.path.basename(default_painting_path), default_painting_path)
-            self.dropdowns["prediction"].addItem(os.path.basename(default_prediction_path), default_prediction_path)
-
-            for dataset_path in zarr_datasets:
-                dataset_name = os.path.basename(dataset_path)
-                if "painting" not in dataset_name.lower():
-                    self.dropdowns["prediction"].addItem(dataset_name, dataset_path)
-                if "prediction" not in dataset_name.lower():
-                    self.dropdowns["painting"].addItem(dataset_name, dataset_path)
-                                                    
     def on_item_clicked(self, item, column):
         data = item.data(0, Qt.UserRole)
         if data:
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 1863a1e..6f49933 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -262,7 +262,7 @@ def on_data_change(self, event, app):
         self.update_class_distribution_charts()
 
         # Update projection
-        self.start_computing_embedding_plot()
+        # self.start_computing_embedding_plot()
 
         self.widget.setupLegend()
 
@@ -1026,6 +1026,11 @@ def initUI(self):
         self.embedding_canvas = FigureCanvas(self.embedding_figure)
         self.stats_summary_layout.addWidget(self.embedding_canvas)
 
+        # Create a button for computing the embedding plot
+        self.compute_embedding_button = QPushButton("Compute Embedding Plot")
+        self.compute_embedding_button.clicked.connect(self.app.start_computing_embedding_plot)
+        self.stats_summary_layout.addWidget(self.compute_embedding_button)
+
         stats_summary_group.setLayout(self.stats_summary_layout)
         main_layout.addWidget(stats_summary_group)
 

From 942cae3f919958cef456c7e0f419c80bd24c7552 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Wed, 17 Apr 2024 12:52:16 -0400
Subject: [PATCH 18/30] Support for training on all pairs of images (denoised,
 wbp, etc.)

---
 examples/run_app_copick.py                    | 193 +++++++++++++-----
 .../semantic/_embedding_segmentor.py          |   2 +
 2 files changed, 141 insertions(+), 54 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index a686edd..29338d5 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -179,12 +179,10 @@ def activate_layer(self, layer):
         layer.editable = True
         self.viewer.layers.selection.active = layer
 
-    def get_complete_data_manager(self):
+    def get_complete_data_manager(self, all_pairs=False):
         datasets = []
         for run in self.root.runs:
             run_dir = run.static_path
-            config_path = os.path.join(run_dir, "dataset_config.json")
-
             voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir)
             segmentation_dir = self.get_segmentations_directory(run_dir)
 
@@ -193,64 +191,150 @@ def get_complete_data_manager(self):
                 continue
 
             os.makedirs(segmentation_dir, exist_ok=True)
+
+            voxel_spacing = self.get_voxel_spacing()
             
+            # Reused paths for all datasets in a run
+            painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr')
+            prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr')
+
+            zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
+            image_feature_pairs = {}
+
+            # Locate all images and corresponding features
+            for dataset_path in zarr_datasets:
+                dataset_name = os.path.basename(dataset_path)
+                if dataset_name.endswith(".zarr") and not dataset_name.endswith("_features.zarr"):
+                    base_image_name = dataset_name.replace(".zarr", "")
+                    # Find corresponding feature files
+                    feature_files = [path for path in zarr_datasets if base_image_name in path and "_features.zarr" in path]
+                    for feature_path in feature_files:
+                        features_base_name = os.path.basename(feature_path).replace("_features.zarr", "")
+                        # Check if the image base name matches the start of the feature base name
+                        if features_base_name.startswith(base_image_name):
+                            image_feature_pairs[features_base_name] = {
+                                'image': os.path.join(dataset_path, "0"),  # Assuming highest resolution
+                                'features': feature_path
+                            }
+
+            # Handle either all pairs or only those specified by the configuration
+            config_path = os.path.join(run_dir, "dataset_config.json")
             if os.path.exists(config_path):
+                with open(config_path, 'r') as file:
+                    config = json.load(file)
+                    if 'painting' in config:
+                        painting_path = os.path.join(segmentation_dir, config['painting'])
+                    if 'prediction' in config:
+                        prediction_path = os.path.join(segmentation_dir, config['prediction'])
+
+            if not all_pairs:
                 with open(config_path, 'r') as file:
                     config = json.load(file)
                     image_path = os.path.join(voxel_spacing_dir, config['image'])
                     features_path = os.path.join(voxel_spacing_dir, config['features'])
-                    painting_path = os.path.join(segmentation_dir, config['painting'])
-                    prediction_path = os.path.join(segmentation_dir, config['prediction'])
+                    if 'painting' in config:
+                        painting_path = os.path.join(segmentation_dir, config['painting'])
+                    if 'prediction' in config:
+                        prediction_path = os.path.join(segmentation_dir, config['prediction'])
+
+                    # Load dataset with specific config paths
+                    dataset = DataSet.from_paths(
+                        image_path=image_path,
+                        features_path=features_path,
+                        labels_path=painting_path,
+                        segmentation_path=prediction_path,
+                        make_missing_datasets=True
+                    )
+                    datasets.append(dataset)
             else:
-                # Existing logic to find paths                
-                voxel_spacing = self.get_voxel_spacing()
-
-                zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
-                image_path = None
-                features_path = None
-                painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr')
-                prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr')
-
-                for dataset_path in zarr_datasets:
-                    dataset_name = os.path.basename(dataset_path).lower()
-                    if "_features.zarr" in dataset_name:
-                        features_path = dataset_path
-                    elif "painting" in dataset_name:
-                        painting_path = dataset_path
-                    elif "prediction" in dataset_name:
-                        prediction_path = dataset_path
-                    else:
-                        # TODO hard coded to use highest resolution
-                        image_path = os.path.join(dataset_path, "0")
-
-                # Save paths to JSON
-                config = {
-                    'image': os.path.relpath(image_path, voxel_spacing_dir),
-                    'features': os.path.relpath(features_path, voxel_spacing_dir),
-                    'painting': os.path.relpath(painting_path, segmentation_dir),
-                    'prediction': os.path.relpath(prediction_path, segmentation_dir)
-                }
-                with open(config_path, 'w') as file:
-                    json.dump(config, file)
-
-            print(f"Fitting on paths:")
-            print(f"Image: {image_path}")
-            print(f"Features: {features_path}")
-            print(f"Painting: {painting_path}")
-            print(f"Prediction: {prediction_path}")
+                # Load all available pairs
+                for base_name, paths in image_feature_pairs.items():
+                    dataset = DataSet.from_paths(
+                        image_path=paths['image'],
+                        features_path=paths['features'],
+                        labels_path=painting_path,
+                        segmentation_path=prediction_path,
+                        make_missing_datasets=True
+                    )
+                    datasets.append(dataset)
+
+            print(f"Loaded datasets for run {run.name}")
+
+        return DataManager(datasets=datasets)        
+
+    # Only train on config pairs
+    # def get_complete_data_manager(self, all_pairs=False):
+    #     datasets = []
+    #     for run in self.root.runs:
+    #         run_dir = run.static_path
+    #         config_path = os.path.join(run_dir, "dataset_config.json")
+
+    #         voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir)
+    #         segmentation_dir = self.get_segmentations_directory(run_dir)
+
+    #         if not voxel_spacing_dir:
+    #             print(f"No Voxel Spacing directory found for run {run.name}.")
+    #             continue
+
+    #         os.makedirs(segmentation_dir, exist_ok=True)
+            
+    #         if os.path.exists(config_path):
+    #             with open(config_path, 'r') as file:
+    #                 config = json.load(file)
+    #                 image_path = os.path.join(voxel_spacing_dir, config['image'])
+    #                 features_path = os.path.join(voxel_spacing_dir, config['features'])
+    #                 painting_path = os.path.join(segmentation_dir, config['painting'])
+    #                 prediction_path = os.path.join(segmentation_dir, config['prediction'])
+    #         else:
+    #             # Existing logic to find paths                
+    #             voxel_spacing = self.get_voxel_spacing()
+
+    #             zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
+    #             image_path = None
+    #             features_path = None
+    #             painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr')
+    #             prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr')
+
+    #             for dataset_path in zarr_datasets:
+    #                 dataset_name = os.path.basename(dataset_path).lower()
+    #                 if "_features.zarr" in dataset_name:
+    #                     features_path = dataset_path
+    #                 elif "painting" in dataset_name:
+    #                     painting_path = dataset_path
+    #                 elif "prediction" in dataset_name:
+    #                     prediction_path = dataset_path
+    #                 else:
+    #                     # TODO hard coded to use highest resolution
+    #                     image_path = os.path.join(dataset_path, "0")
+
+    #             # Save paths to JSON
+    #             config = {
+    #                 'image': os.path.relpath(image_path, voxel_spacing_dir),
+    #                 'features': os.path.relpath(features_path, voxel_spacing_dir),
+    #                 'painting': os.path.relpath(painting_path, segmentation_dir),
+    #                 'prediction': os.path.relpath(prediction_path, segmentation_dir)
+    #             }
+    #             with open(config_path, 'w') as file:
+    #                 json.dump(config, file)
+
+    #         print(f"Fitting on paths:")
+    #         print(f"Image: {image_path}")
+    #         print(f"Features: {features_path}")
+    #         print(f"Painting: {painting_path}")
+    #         print(f"Prediction: {prediction_path}")
                     
-            # Load dataset with paths
-            if image_path and features_path:
-                dataset = DataSet.from_paths(
-                    image_path=image_path,
-                    features_path=features_path,
-                    labels_path=painting_path,
-                    segmentation_path=prediction_path,
-                    make_missing_datasets=True
-                )
-                datasets.append(dataset)
-
-        return DataManager(datasets=datasets)
+    #         # Load dataset with paths
+    #         if image_path and features_path:
+    #             dataset = DataSet.from_paths(
+    #                 image_path=image_path,
+    #                 features_path=features_path,
+    #                 labels_path=painting_path,
+    #                 segmentation_path=prediction_path,
+    #                 make_missing_datasets=True
+    #             )
+    #             datasets.append(dataset)
+
+    #     return DataManager(datasets=datasets)
 
     def get_default_voxel_spacing_directory(self, static_path):
         # Find VoxelSpacing directories, assuming a hard coded match for now
@@ -287,7 +371,8 @@ def fit_on_all(self):
 
     @thread_worker
     def threaded_fit_on_all(self):
-        data_manager = self.get_complete_data_manager()
+        # Fit model on all pairs
+        data_manager = self.get_complete_data_manager(all_pairs=True)
 
         clf = RandomForestClassifier(
             n_estimators=50,
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 6f49933..6179b5b 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -1185,6 +1185,8 @@ def activateLabel(self, current_label_id):
                 self.highlightLabel(label_edit)
             else:
                 self.removeHighlightLabel(label_edit)
+
+        self.app.viewer.layers.selection.active = painting_layer
         
     def updateLegendHighlighting(self, selected_label_event):
         """Update highlighting of legend"""

From 2c6e7dcf9f4208bf2536a4999af7b23b58aa992e Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 22 Apr 2024 10:49:43 -0400
Subject: [PATCH 19/30] Fix for removed model dropdown

---
 examples/run_app_copick.py                      | 1 +
 src/cellcanvas/semantic/_embedding_segmentor.py | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index 29338d5..b2b2d6d 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -54,6 +54,7 @@
 
 # Project root
 root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json")
+# root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/chlamy_copick/copick_config_kyle.json")
 
 ## Root API
 root.config # CopickConfig object
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 6179b5b..539b757 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -294,9 +294,7 @@ def threaded_on_data_change(
             self.start_prediction()
 
     def get_model_type(self):
-        if not self.model_type:
-            self.model_type = self.widget.model_dropdown.currentText()
-        return self.model_type
+        return "Random Forest"
 
     def get_corner_pixels(self):
         if self.corner_pixels is None:

From faf4f13f2aab37327c1b66719b0eae8a7e52a76b Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 22 Apr 2024 10:55:17 -0400
Subject: [PATCH 20/30] Refactor into if main clause

---
 examples/run_app_copick.py | 61 ++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 35 deletions(-)

diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py
index b2b2d6d..f1c5b2c 100644
--- a/examples/run_app_copick.py
+++ b/examples/run_app_copick.py
@@ -52,32 +52,6 @@
 
 import dask.array as da
 
-# Project root
-root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json")
-# root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/chlamy_copick/copick_config_kyle.json")
-
-## Root API
-root.config # CopickConfig object
-root.runs # List of run objects (lazy loading from filesystem location(s))
-
-# TODO update to use root.config.pickable_objects
-
-
-def get_copick_colormap():
-    """Return a colormap for distinct label colors based on the pickable objects."""
-    colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects}
-    colormap[None] = np.array([1, 1, 1, 1])
-    colormap[9] = np.array([0, 1, 1, 1])
-    return colormap
-
-cellcanvas.utils.get_labels_colormap = get_copick_colormap
-
-# Use the function
-colormap = get_copick_colormap()
-
-# TODO set names from copick config
-# cell_canvas.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects}
-
 import napari
 from qtpy.QtWidgets import QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget
 from qtpy.QtCore import Qt
@@ -129,6 +103,16 @@ def __init__(self, viewer: napari.Viewer, root):
 
         self.populate_tree()
 
+        # Monkeypatch
+        cellcanvas.utils.get_labels_colormap = self.get_copick_colormap
+
+    def get_copick_colormap(self):
+        """Return a colormap for distinct label colors based on the pickable objects."""
+        colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects}
+        colormap[None] = np.array([1, 1, 1, 1])
+        colormap[9] = np.array([0, 1, 1, 1])
+        return colormap
+    
     def get_voxel_spacing(self):
         return 10
         
@@ -645,24 +629,32 @@ def initialize_or_update_cell_canvas(self):
         # Set colormap
         # painting_layer.colormap.color_dict
         #  self.app.painting_labels
-        self.cell_canvas_app.semantic_segmentor.set_colormap(get_copick_colormap())
+        self.cell_canvas_app.semantic_segmentor.set_colormap(self.get_copick_colormap())
         self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in root.config.pickable_objects] + [9]
         self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects}
 
         self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping[9] = 'background'
         self.cell_canvas_app.semantic_segmentor.widget.setupLegend()
 
-viewer = napari.Viewer()
+if __name__ == "__main__":
+    # Project root
+    root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json")
+    # root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/chlamy_copick/copick_config_kyle.json")
 
-# Hide layer list and controls
-# viewer.window.qt_viewer.dockLayerList.setVisible(False)
-# viewer.window.qt_viewer.dockLayerControls.setVisible(False)
+    ## Root API
+    root.config # CopickConfig object
+    root.runs # List of run objects (lazy loading from filesystem location(s))
+        
+    viewer = napari.Viewer()
 
-copick_explorer_widget = NapariCopickExplorer(viewer, root)
-viewer.window.add_dock_widget(copick_explorer_widget, name="Copick Explorer", area="left")
+    # Hide layer list and controls
+    # viewer.window.qt_viewer.dockLayerList.setVisible(False)
+    # viewer.window.qt_viewer.dockLayerControls.setVisible(False)
 
+    copick_explorer_widget = NapariCopickExplorer(viewer, root)
+    viewer.window.add_dock_widget(copick_explorer_widget, name="Copick Explorer", area="left")
 
-# napari.run()
+    # napari.run()
 
 # TODO finish making the prediction computation more lazy
 # the strategy should be to start computing labels chunkwise
@@ -676,4 +668,3 @@ def initialize_or_update_cell_canvas(self):
 # - override exclusion of non-zero labels
 # - consistent colormap in the charts
 # - consistent colormap in the painted part of the labels image
-

From 24e2f28fbaec90376facf913b958e9b58327421e Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 22 Apr 2024 11:20:37 -0400
Subject: [PATCH 21/30] Move copick widget into source tree

---
 src/cellcanvas/_copick/widget.py | 670 +++++++++++++++++++++++++++++++
 1 file changed, 670 insertions(+)
 create mode 100644 src/cellcanvas/_copick/widget.py

diff --git a/src/cellcanvas/_copick/widget.py b/src/cellcanvas/_copick/widget.py
new file mode 100644
index 0000000..f1c5b2c
--- /dev/null
+++ b/src/cellcanvas/_copick/widget.py
@@ -0,0 +1,670 @@
+"""Example of using CellCanvas to pick particles on a surface.
+
+To use:
+1. update base_file_path to point to cropped_covid.zarr example file
+2. Run the script to launch CellCanvas
+3. Paint/predict until you're happy with the result. The seeded labels are:
+    - 1: background (including inside the capsules)
+    - 2: membrane
+    - 3: spike proteins
+3b. You might want to switch the image layer into the plane
+    depiction before doing the instance segmentation.
+    Sometimes I have trouble manipulating the plane after
+    the instance segmentation - need to look into this.
+4. Once you're happy with the prediction, click the "instance segmentation" tab
+5. Set the label value to 2. This will extract the membrane and
+    make instances via connected components.
+6. Remove the small objects. Suggested threshold: 100
+7. Alt + left mouse button to select an instance to modify.
+    Once select, you can dilate, erode, etc. to smooth it.
+8. With the segment still selected, you can then mesh it
+   using the mesh widget. You can play with the smoothing parameters.
+9. If the mesh looks good, switch to the "geometry" tab.
+    Select the mesh and start surfing!
+"""
+from collections import defaultdict
+import os
+import numpy as np
+import napari
+import cellcanvas
+from cellcanvas._app.main_app import CellCanvasApp, QtCellCanvas
+from cellcanvas.data.data_manager import DataManager
+from cellcanvas.data.data_set import DataSet
+from napari.qt.threading import thread_worker
+
+import sys
+import logging
+import json
+import copick
+from copick.impl.filesystem import CopickRootFSSpec
+import zarr
+
+from qtpy.QtWidgets import QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget, QComboBox, QPushButton, QLabel
+from qtpy.QtCore import Qt
+import glob  # For pattern matching of file names
+
+from sklearn.ensemble import RandomForestClassifier
+
+from cellcanvas.semantic.segmentation_manager import (
+    SemanticSegmentationManager,
+)
+from cellcanvas.utils import get_active_button_color
+
+import dask.array as da
+
+import napari
+from qtpy.QtWidgets import QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget
+from qtpy.QtCore import Qt
+
+class NapariCopickExplorer(QWidget):
+    def __init__(self, viewer: napari.Viewer, root):
+        super().__init__()
+        self.viewer = viewer
+        self.root = root
+        self.selected_run = None
+        self.cell_canvas_app = None
+
+        layout = QVBoxLayout()
+        self.setLayout(layout)
+
+        self._init_logging()
+
+        # Adding new buttons for "Fit on all" and "Predict for all"
+        self.fit_all_button = QPushButton("Fit on all")
+        self.fit_all_button.clicked.connect(self.fit_on_all)
+        layout.addWidget(self.fit_all_button)
+
+        self.predict_all_button = QPushButton("Predict for all")
+        self.predict_all_button.clicked.connect(self.predict_for_all)
+        layout.addWidget(self.predict_all_button)
+        
+        # Dropdowns for each data layer
+        self.dropdowns = {}
+        self.layer_buttons = {}
+        for layer in ["image", "features", "painting", "prediction"]:
+            # Make layer button
+            button = QPushButton(f"Select {layer.capitalize()} Layer")
+            button.clicked.connect(lambda checked, layer=layer: self.activate_layer(layer))
+            layout.addWidget(button)
+            self.layer_buttons[layer] = button
+            # Make layer selection dropdown
+            self.dropdowns[layer] = QComboBox()
+            layout.addWidget(self.dropdowns[layer])
+
+        # Button to update CellCanvas with the selected dataset
+        self.update_button = QPushButton("Initialize/Update CellCanvas")
+        self.update_button.clicked.connect(self.initialize_or_update_cell_canvas)
+        layout.addWidget(self.update_button)
+
+        self.tree = QTreeWidget()
+        self.tree.setHeaderLabel("Copick Runs")
+        self.tree.itemClicked.connect(self.on_run_clicked)
+        layout.addWidget(self.tree)
+
+        self.populate_tree()
+
+        # Monkeypatch
+        cellcanvas.utils.get_labels_colormap = self.get_copick_colormap
+
+    def get_copick_colormap(self):
+        """Return a colormap for distinct label colors based on the pickable objects."""
+        colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects}
+        colormap[None] = np.array([1, 1, 1, 1])
+        colormap[9] = np.array([0, 1, 1, 1])
+        return colormap
+    
+    def get_voxel_spacing(self):
+        return 10
+        
+    def _init_logging(self):
+        self.logger = logging.getLogger("cellcanvas")
+        self.logger.setLevel(logging.DEBUG)
+        streamHandler = logging.StreamHandler(sys.stdout)
+        formatter = logging.Formatter(
+            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        )
+        streamHandler.setFormatter(formatter)
+        self.logger.addHandler(streamHandler)
+        
+
+    def populate_tree(self):
+        for run in self.root.runs:
+            run_item = QTreeWidgetItem(self.tree, [run.name])
+            run_item.setData(0, Qt.UserRole, run)
+
+            for category in ["segmentations", "meshes", "picks", "voxel_spacings"]:
+                category_item = QTreeWidgetItem(run_item, [category])
+                items = getattr(run, category)
+                for item in items:
+                    if category == "picks":
+                        item_name = item.pickable_object_name
+                    else:
+                        item_name = getattr(item, 'name', 'Unnamed')
+
+                    child_item = QTreeWidgetItem(category_item, [item_name])
+                    child_item.setData(0, Qt.UserRole, item)
+
+                    # list tomograms
+                    if category == "voxel_spacings":
+                        for tomogram in item.tomograms:
+                            tomo_item = QTreeWidgetItem(child_item, [f"Tomogram: {tomogram.tomo_type}"])
+                            tomo_item.setData(0, Qt.UserRole, tomogram)
+
+    def activate_layer(self, layer):
+        print(f"Activating layer {layer}")
+        if layer == "image":
+            layer = self.cell_canvas_app.semantic_segmentor.data_layer
+        elif layer == "painting":
+            layer = self.cell_canvas_app.semantic_segmentor.painting_layer
+        elif layer == "prediction":
+            layer = self.cell_canvas_app.semantic_segmentor.prediction_layer
+        else:
+            return
+        layer.visible = True
+        layer.editable = True
+        self.viewer.layers.selection.active = layer
+
+    def get_complete_data_manager(self, all_pairs=False):
+        datasets = []
+        for run in self.root.runs:
+            run_dir = run.static_path
+            voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir)
+            segmentation_dir = self.get_segmentations_directory(run_dir)
+
+            if not voxel_spacing_dir:
+                print(f"No Voxel Spacing directory found for run {run.name}.")
+                continue
+
+            os.makedirs(segmentation_dir, exist_ok=True)
+
+            voxel_spacing = self.get_voxel_spacing()
+            
+            # Reused paths for all datasets in a run
+            painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr')
+            prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr')
+
+            zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
+            image_feature_pairs = {}
+
+            # Locate all images and corresponding features
+            for dataset_path in zarr_datasets:
+                dataset_name = os.path.basename(dataset_path)
+                if dataset_name.endswith(".zarr") and not dataset_name.endswith("_features.zarr"):
+                    base_image_name = dataset_name.replace(".zarr", "")
+                    # Find corresponding feature files
+                    feature_files = [path for path in zarr_datasets if base_image_name in path and "_features.zarr" in path]
+                    for feature_path in feature_files:
+                        features_base_name = os.path.basename(feature_path).replace("_features.zarr", "")
+                        # Check if the image base name matches the start of the feature base name
+                        if features_base_name.startswith(base_image_name):
+                            image_feature_pairs[features_base_name] = {
+                                'image': os.path.join(dataset_path, "0"),  # Assuming highest resolution
+                                'features': feature_path
+                            }
+
+            # Handle either all pairs or only those specified by the configuration
+            config_path = os.path.join(run_dir, "dataset_config.json")
+            if os.path.exists(config_path):
+                with open(config_path, 'r') as file:
+                    config = json.load(file)
+                    if 'painting' in config:
+                        painting_path = os.path.join(segmentation_dir, config['painting'])
+                    if 'prediction' in config:
+                        prediction_path = os.path.join(segmentation_dir, config['prediction'])
+
+            if not all_pairs:
+                with open(config_path, 'r') as file:
+                    config = json.load(file)
+                    image_path = os.path.join(voxel_spacing_dir, config['image'])
+                    features_path = os.path.join(voxel_spacing_dir, config['features'])
+                    if 'painting' in config:
+                        painting_path = os.path.join(segmentation_dir, config['painting'])
+                    if 'prediction' in config:
+                        prediction_path = os.path.join(segmentation_dir, config['prediction'])
+
+                    # Load dataset with specific config paths
+                    dataset = DataSet.from_paths(
+                        image_path=image_path,
+                        features_path=features_path,
+                        labels_path=painting_path,
+                        segmentation_path=prediction_path,
+                        make_missing_datasets=True
+                    )
+                    datasets.append(dataset)
+            else:
+                # Load all available pairs
+                for base_name, paths in image_feature_pairs.items():
+                    dataset = DataSet.from_paths(
+                        image_path=paths['image'],
+                        features_path=paths['features'],
+                        labels_path=painting_path,
+                        segmentation_path=prediction_path,
+                        make_missing_datasets=True
+                    )
+                    datasets.append(dataset)
+
+            print(f"Loaded datasets for run {run.name}")
+
+        return DataManager(datasets=datasets)        
+
+    # Only train on config pairs
+    # def get_complete_data_manager(self, all_pairs=False):
+    #     datasets = []
+    #     for run in self.root.runs:
+    #         run_dir = run.static_path
+    #         config_path = os.path.join(run_dir, "dataset_config.json")
+
+    #         voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir)
+    #         segmentation_dir = self.get_segmentations_directory(run_dir)
+
+    #         if not voxel_spacing_dir:
+    #             print(f"No Voxel Spacing directory found for run {run.name}.")
+    #             continue
+
+    #         os.makedirs(segmentation_dir, exist_ok=True)
+            
+    #         if os.path.exists(config_path):
+    #             with open(config_path, 'r') as file:
+    #                 config = json.load(file)
+    #                 image_path = os.path.join(voxel_spacing_dir, config['image'])
+    #                 features_path = os.path.join(voxel_spacing_dir, config['features'])
+    #                 painting_path = os.path.join(segmentation_dir, config['painting'])
+    #                 prediction_path = os.path.join(segmentation_dir, config['prediction'])
+    #         else:
+    #             # Existing logic to find paths                
+    #             voxel_spacing = self.get_voxel_spacing()
+
+    #             zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
+    #             image_path = None
+    #             features_path = None
+    #             painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr')
+    #             prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr')
+
+    #             for dataset_path in zarr_datasets:
+    #                 dataset_name = os.path.basename(dataset_path).lower()
+    #                 if "_features.zarr" in dataset_name:
+    #                     features_path = dataset_path
+    #                 elif "painting" in dataset_name:
+    #                     painting_path = dataset_path
+    #                 elif "prediction" in dataset_name:
+    #                     prediction_path = dataset_path
+    #                 else:
+    #                     # TODO hard coded to use highest resolution
+    #                     image_path = os.path.join(dataset_path, "0")
+
+    #             # Save paths to JSON
+    #             config = {
+    #                 'image': os.path.relpath(image_path, voxel_spacing_dir),
+    #                 'features': os.path.relpath(features_path, voxel_spacing_dir),
+    #                 'painting': os.path.relpath(painting_path, segmentation_dir),
+    #                 'prediction': os.path.relpath(prediction_path, segmentation_dir)
+    #             }
+    #             with open(config_path, 'w') as file:
+    #                 json.dump(config, file)
+
+    #         print(f"Fitting on paths:")
+    #         print(f"Image: {image_path}")
+    #         print(f"Features: {features_path}")
+    #         print(f"Painting: {painting_path}")
+    #         print(f"Prediction: {prediction_path}")
+                    
+    #         # Load dataset with paths
+    #         if image_path and features_path:
+    #             dataset = DataSet.from_paths(
+    #                 image_path=image_path,
+    #                 features_path=features_path,
+    #                 labels_path=painting_path,
+    #                 segmentation_path=prediction_path,
+    #                 make_missing_datasets=True
+    #             )
+    #             datasets.append(dataset)
+
+    #     return DataManager(datasets=datasets)
+
+    def get_default_voxel_spacing_directory(self, static_path):
+        # Find VoxelSpacing directories, assuming a hard coded match for now
+        voxel_spacing = self.get_voxel_spacing()
+        voxel_spacing_dirs = glob.glob(os.path.join(static_path, f'VoxelSpacing{voxel_spacing:.3f}'))
+        if voxel_spacing_dirs:
+            return voxel_spacing_dirs[0]
+        return None
+
+    def get_segmentations_directory(self, static_path):
+        segmentation_dir = os.path.join(static_path, "Segmentations")
+        return segmentation_dir
+
+    def change_button_color(self, button, color):
+        button.setStyleSheet(f"background-color: {color};")
+
+    def reset_button_color(self, button):
+        self.change_button_color(button, "")
+    
+    def fit_on_all(self):
+        if not self.cell_canvas_app:
+            print("Initialize cell canvas first")
+            return
+        
+        print("Fitting all models to the selected dataset.")
+
+        self.change_button_color(
+            self.fit_all_button, get_active_button_color()
+        )
+        
+        self.model_fit_worker = self.threaded_fit_on_all()
+        self.model_fit_worker.returned.connect(self.on_model_fit_completed)
+        self.model_fit_worker.start()
+
+    @thread_worker
+    def threaded_fit_on_all(self):
+        # Fit model on all pairs
+        data_manager = self.get_complete_data_manager(all_pairs=True)
+
+        clf = RandomForestClassifier(
+            n_estimators=50,
+            n_jobs=-1,
+            max_depth=10,
+            max_samples=0.05,
+        )
+        
+        segmentation_manager = SemanticSegmentationManager(
+            data=data_manager, model=clf
+        )
+        segmentation_manager.fit()
+
+        return segmentation_manager        
+
+    def on_model_fit_completed(self, segmentation_manager):
+        self.logger.debug("on_model_fit_completed")
+
+        self.cell_canvas_app.semantic_segmentor.segmentation_manager = segmentation_manager
+
+        # Reset color
+        self.reset_button_color(self.fit_all_button)
+        
+    def predict_for_all(self):
+        if not self.cell_canvas_app:
+            print("Initialize cell canvas first")
+            return
+        
+        print("Fitting all models to the selected dataset.")
+
+        self.change_button_color(
+            self.predict_all_button, get_active_button_color()
+        )
+        
+        self.predict_worker = self.threaded_predict_for_all()
+        self.predict_worker.returned.connect(self.on_predict_completed)
+        self.predict_worker.start()
+
+    def on_predict_completed(self, result):
+        self.logger.debug("on_predict_completed")
+
+        # Reset color
+        self.reset_button_color(self.predict_all_button)
+        
+    @thread_worker
+    def threaded_predict_for_all(self):
+        print("Running predictions on all datasets.")
+
+        # Check if segmentation manager is properly initialized
+        if not hasattr(self.cell_canvas_app.semantic_segmentor, 'segmentation_manager') or self.cell_canvas_app.semantic_segmentor.segmentation_manager is None:
+            print("Segmentation manager is not initialized.")
+            return
+
+        # Retrieve the complete data manager that includes all runs
+        data_manager = self.get_complete_data_manager()
+
+        # Iterate through each dataset within the data manager
+        for dataset in data_manager.datasets:
+            dataset_features = da.asarray(dataset.concatenated_features)
+            chunk_shape = dataset_features.chunksize
+            shape = dataset_features.shape
+            dtype = dataset_features.dtype
+
+            # Iterate over chunks
+            for z in range(0, shape[1], chunk_shape[1]):
+                for y in range(0, shape[2], chunk_shape[2]):
+                    for x in range(0, shape[3], chunk_shape[3]):
+                        # Compute the slice for the current chunk
+                        # in feature,z,y,x order
+                        chunk_slice = (
+                            slice(None),
+                            slice(z, min(z + chunk_shape[1], shape[1])),
+                            slice(y, min(y + chunk_shape[2], shape[2])),
+                            slice(x, min(x + chunk_shape[3], shape[3])),                        
+                        )
+                        print(f"Predicting on chunk {chunk_slice}")
+
+                        # Extract the current chunk
+                        chunk = dataset_features[chunk_slice].compute()
+
+                        # Predict on the chunk (adding 1 to each prediction)
+                        predicted_chunk = self.cell_canvas_app.semantic_segmentor.segmentation_manager.predict(chunk) + 1
+
+                        # Write the prediction to the corresponding region in the Zarr array
+                        dataset.segmentation[chunk_slice[1:]] = predicted_chunk
+
+            print(f"Predictions written")
+
+    def on_run_clicked(self, item, column):
+        data = item.data(0, Qt.UserRole)
+        if not isinstance(data, copick.impl.filesystem.CopickRunFSSpec):
+            self.on_item_clicked(item, column)
+            return
+
+        self.selected_run = data
+        static_path = self.selected_run.static_path
+        self.logger.info(f"Selected {static_path}")
+
+        # Clear existing items
+        for dropdown in self.dropdowns.values():
+            dropdown.clear()
+
+        # Define directories
+        voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*"))
+        segmentation_dir = self.get_segmentations_directory(static_path)
+        os.makedirs(segmentation_dir, exist_ok=True)
+
+        # Initialize dictionary to hold default selections from config
+        default_selections = {}
+
+        # Check for config file and load selections if present
+        config_path = os.path.join(static_path, "dataset_config.json")
+        if os.path.exists(config_path):
+            with open(config_path, 'r') as file:
+                config = json.load(file)
+            default_selections = {
+                'image': os.path.join(voxel_spacing_dirs[0], config.get('image')),
+                'features': os.path.join(voxel_spacing_dirs[0], config.get('features')),
+                'painting': os.path.join(segmentation_dir, config.get('painting')),
+                'prediction': os.path.join(segmentation_dir, config.get('prediction'))
+            }
+
+        # Helper function to add items if not already in dropdown
+        def add_item_if_not_exists(dropdown, item_name, item_data):
+            if dropdown.findData(item_data) == -1:
+                dropdown.addItem(item_name, item_data)
+
+        # Load all zarr datasets from voxel spacing directories
+        if voxel_spacing_dirs:
+            for voxel_spacing_dir in voxel_spacing_dirs:
+                zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
+                for dataset_path in zarr_datasets:
+                    dataset_name = os.path.basename(dataset_path)
+                    if "_features.zarr" in dataset_name.lower():
+                        add_item_if_not_exists(self.dropdowns["features"], dataset_name, dataset_path)
+                    else:
+                        add_item_if_not_exists(self.dropdowns["image"], dataset_name + "/0", dataset_path + "/0")
+
+        # Load all zarr datasets from segmentation directory
+        zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr"))
+        for dataset_path in zarr_datasets:
+            dataset_name = os.path.basename(dataset_path)
+            if "painting" not in dataset_name.lower():
+                add_item_if_not_exists(self.dropdowns["prediction"], dataset_name, dataset_path)
+            if "prediction" not in dataset_name.lower():
+                add_item_if_not_exists(self.dropdowns["painting"], dataset_name, dataset_path)
+
+        # Set default selections in dropdowns if specified in the config
+        for key, dropdown in self.dropdowns.items():
+            if default_selections.get(key):
+                index = dropdown.findData(default_selections[key])
+                if index != -1:
+                    dropdown.setCurrentIndex(index)
+
+
+    def on_item_clicked(self, item, column):
+        data = item.data(0, Qt.UserRole)
+        if data:
+            if isinstance(data, copick.impl.filesystem.CopickPicksFSSpec):
+                self.open_picks(data)
+            elif isinstance(data, copick.impl.filesystem.CopickTomogramFSSpec):
+                self.open_tomogram(data)
+            elif isinstance(data, copick.models.CopickSegmentation):
+                self.open_labels(data)
+
+    def open_picks(self, picks):
+        with open(picks.path, 'r') as f:
+            points_data = json.load(f)            
+
+        # Extracting points locations
+        points_locations = [
+            [point['location']['z'], point['location']['y'], point['location']['x']]
+            for point in points_data['points']
+        ]
+
+        # TODO hard coded scaling
+        points_array = np.array(points_locations) / 10
+        
+        # Adding the points layer to the viewer, using the pickable_object_name as the layer name
+        pickable_object = [obj for obj in root.config.pickable_objects if obj.name == picks.pickable_object_name][0]
+        self.viewer.add_points(points_array, name=picks.pickable_object_name, size=25, out_of_slice_display=True, face_color=np.array(pickable_object.color)/255.0)
+
+    def open_tomogram(self, tomogram):
+        zarr_store = zarr.open(tomogram.zarr(), mode='r')
+        print(f"open_tomogram {tomogram.zarr()}")
+        # TODO extract scale/transform info
+
+        # TODO scale is hard coded to 10 here
+        self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.tomo_type}")
+
+    def open_labels(self, tomogram):
+        zarr_store = zarr.open(tomogram.zarr(), mode='r')
+        print(f"open_labels {tomogram.zarr()}")
+        # TODO extract scale/transform info
+
+        # TODO scale is hard coded to 10 here
+        self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.name}")
+
+    def initialize_or_update_cell_canvas(self):
+        # Collect paths from dropdowns
+        paths = {layer: dropdown.currentText() for layer, dropdown in self.dropdowns.items()}
+        
+        if not paths["image"] or not paths["features"]:
+            print("Please ensure image and feature paths are selected before initializing/updating CellCanvas.")
+            return        
+
+        run_dir = self.selected_run.static_path
+        segmentation_dir = self.get_segmentations_directory(self.selected_run.static_path)
+        voxel_spacing_dir = self.get_default_voxel_spacing_directory(self.selected_run.static_path)
+
+        voxel_spacing = self.get_voxel_spacing()
+
+        # Ensure segmentations directory exists
+        os.makedirs(segmentation_dir, exist_ok=True)
+        
+        default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr')
+        default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr')
+
+        painting_path = default_painting_path if not paths["painting"] else os.path.join(segmentation_dir, paths["painting"])
+        prediction_path = default_prediction_path if not paths["prediction"] else os.path.join(segmentation_dir, paths["prediction"])
+        image_path = os.path.join(voxel_spacing_dir, paths['image'])
+        features_path = os.path.join(voxel_spacing_dir, paths["features"])
+        
+        # TODO note this is hard coded to use the highest resolution of a multiscale zarr
+        print(f"Opening paths:")
+        print(f"Image: {image_path}")
+        print(f"Features: {features_path}")
+        print(f"Painting: {painting_path}")
+        print(f"Prediction: {prediction_path}")
+        try:
+            dataset = DataSet.from_paths(
+                image_path=image_path,
+                features_path=features_path,
+                labels_path=painting_path,
+                segmentation_path=prediction_path,
+                make_missing_datasets=True,
+            )
+        except FileNotFoundError:
+            print(f"File {path} not found!", file=sys.stderr)
+            return
+
+        config_path = os.path.join(run_dir, "dataset_config.json")
+
+        config = {
+            'image': os.path.relpath(os.path.join(voxel_spacing_dir, f"{paths['image']}"), voxel_spacing_dir),
+            'features': os.path.relpath(os.path.join(voxel_spacing_dir, paths["features"]), voxel_spacing_dir),
+            'painting': os.path.relpath(painting_path, segmentation_dir),
+            'prediction': os.path.relpath(prediction_path, segmentation_dir)
+        }
+
+        with open(config_path, 'w') as file:
+            json.dump(config, file)
+        
+        data_manager = DataManager(datasets=[dataset])
+        
+        if not self.cell_canvas_app:
+            self.cell_canvas_app = CellCanvasApp(data=data_manager, viewer=self.viewer, verbose=True)
+            cell_canvas_widget = QtCellCanvas(app=self.cell_canvas_app)
+            self.viewer.window.add_dock_widget(cell_canvas_widget)
+        else:
+            # Update existing CellCanvasApp's data manager
+            self.cell_canvas_app.update_data_manager(data_manager)
+
+        # TODO this has multiple copick specific hardcoded hacks
+            
+        # TODO hardcoded scale factor
+        # self.viewer.layers['Image'].scale = (10, 10, 10)
+
+        # Set colormap
+        # painting_layer.colormap.color_dict
+        #  self.app.painting_labels
+        self.cell_canvas_app.semantic_segmentor.set_colormap(self.get_copick_colormap())
+        self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in root.config.pickable_objects] + [9]
+        self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects}
+
+        self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping[9] = 'background'
+        self.cell_canvas_app.semantic_segmentor.widget.setupLegend()
+
+if __name__ == "__main__":
+    # Project root
+    root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json")
+    # root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/chlamy_copick/copick_config_kyle.json")
+
+    ## Root API
+    root.config # CopickConfig object
+    root.runs # List of run objects (lazy loading from filesystem location(s))
+        
+    viewer = napari.Viewer()
+
+    # Hide layer list and controls
+    # viewer.window.qt_viewer.dockLayerList.setVisible(False)
+    # viewer.window.qt_viewer.dockLayerControls.setVisible(False)
+
+    copick_explorer_widget = NapariCopickExplorer(viewer, root)
+    viewer.window.add_dock_widget(copick_explorer_widget, name="Copick Explorer", area="left")
+
+    # napari.run()
+
+# TODO finish making the prediction computation more lazy
+# the strategy should be to start computing labels chunkwise
+# on the zarr itself
+
+# TODO check scaling between picks and zarrs
+
+# TODO check why painting doesn't work when using proper scaling
+
+# TODO add proper colormap and legend support
+# - override exclusion of non-zero labels
+# - consistent colormap in the charts
+# - consistent colormap in the painted part of the labels image

From 7cdc73e10768f627c487dbdecd9267ff941ee92b Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 22 Apr 2024 11:39:11 -0400
Subject: [PATCH 22/30] Add __init__ for _copick

---
 src/cellcanvas/_copick/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/cellcanvas/_copick/__init__.py

diff --git a/src/cellcanvas/_copick/__init__.py b/src/cellcanvas/_copick/__init__.py
new file mode 100644
index 0000000..e69de29

From e8f0d555020d007f0db4b3143e6fb002eb47591d Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 22 Apr 2024 13:38:09 -0400
Subject: [PATCH 23/30] Update attribute reference for copick project

---
 src/cellcanvas/_copick/widget.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/cellcanvas/_copick/widget.py b/src/cellcanvas/_copick/widget.py
index f1c5b2c..70ce854 100644
--- a/src/cellcanvas/_copick/widget.py
+++ b/src/cellcanvas/_copick/widget.py
@@ -108,7 +108,7 @@ def __init__(self, viewer: napari.Viewer, root):
 
     def get_copick_colormap(self):
         """Return a colormap for distinct label colors based on the pickable objects."""
-        colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects}
+        colormap = {obj.label: np.array(obj.color)/255.0 for obj in self.root.config.pickable_objects}
         colormap[None] = np.array([1, 1, 1, 1])
         colormap[9] = np.array([0, 1, 1, 1])
         return colormap
@@ -537,7 +537,7 @@ def open_picks(self, picks):
         points_array = np.array(points_locations) / 10
         
         # Adding the points layer to the viewer, using the pickable_object_name as the layer name
-        pickable_object = [obj for obj in root.config.pickable_objects if obj.name == picks.pickable_object_name][0]
+        pickable_object = [obj for obj in self.root.config.pickable_objects if obj.name == picks.pickable_object_name][0]
         self.viewer.add_points(points_array, name=picks.pickable_object_name, size=25, out_of_slice_display=True, face_color=np.array(pickable_object.color)/255.0)
 
     def open_tomogram(self, tomogram):
@@ -630,8 +630,8 @@ def initialize_or_update_cell_canvas(self):
         # painting_layer.colormap.color_dict
         #  self.app.painting_labels
         self.cell_canvas_app.semantic_segmentor.set_colormap(self.get_copick_colormap())
-        self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in root.config.pickable_objects] + [9]
-        self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects}
+        self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in self.root.config.pickable_objects] + [9]
+        self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in self.root.config.pickable_objects}
 
         self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping[9] = 'background'
         self.cell_canvas_app.semantic_segmentor.widget.setupLegend()

From f3f625adcd161ca689752f7ceb12560c80cdacb4 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 22 Apr 2024 14:34:54 -0400
Subject: [PATCH 24/30] Remove hack for adding background label, now it comes
 from copick

---
 src/cellcanvas/_copick/widget.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/cellcanvas/_copick/widget.py b/src/cellcanvas/_copick/widget.py
index 70ce854..124425c 100644
--- a/src/cellcanvas/_copick/widget.py
+++ b/src/cellcanvas/_copick/widget.py
@@ -630,10 +630,10 @@ def initialize_or_update_cell_canvas(self):
         # painting_layer.colormap.color_dict
         #  self.app.painting_labels
         self.cell_canvas_app.semantic_segmentor.set_colormap(self.get_copick_colormap())
-        self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in self.root.config.pickable_objects] + [9]
+        self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in self.root.config.pickable_objects]
         self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in self.root.config.pickable_objects}
 
-        self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping[9] = 'background'
+#        self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping[9] = 'background'
         self.cell_canvas_app.semantic_segmentor.widget.setupLegend()
 
 if __name__ == "__main__":

From 54e90edc6cec8bdb328b3ed7f77a9fd1da0468be Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 22 Apr 2024 17:43:10 -0400
Subject: [PATCH 25/30] Update default paths, use more copick config, lighter
 model

---
 src/cellcanvas/_copick/widget.py              | 33 ++++++++++++++-----
 .../semantic/_embedding_segmentor.py          |  4 +--
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/src/cellcanvas/_copick/widget.py b/src/cellcanvas/_copick/widget.py
index 124425c..ed1be3d 100644
--- a/src/cellcanvas/_copick/widget.py
+++ b/src/cellcanvas/_copick/widget.py
@@ -180,8 +180,8 @@ def get_complete_data_manager(self, all_pairs=False):
             voxel_spacing = self.get_voxel_spacing()
             
             # Reused paths for all datasets in a run
-            painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr')
-            prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr')
+            painting_path = self.get_default_painting_path(segmentation_dir, voxel_spacing)
+            prediction_path = self.get_default_prediction_path(segmentation_dir, voxel_spacing)
 
             zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
             image_feature_pairs = {}
@@ -203,7 +203,7 @@ def get_complete_data_manager(self, all_pairs=False):
                             }
 
             # Handle either all pairs or only those specified by the configuration
-            config_path = os.path.join(run_dir, "dataset_config.json")
+            config_path = self.get_config_path(run.static_path)
             if os.path.exists(config_path):
                 with open(config_path, 'r') as file:
                     config = json.load(file)
@@ -212,7 +212,7 @@ def get_complete_data_manager(self, all_pairs=False):
                     if 'prediction' in config:
                         prediction_path = os.path.join(segmentation_dir, config['prediction'])
 
-            if not all_pairs:
+            if os.path.exists(config_path) and not all_pairs:
                 with open(config_path, 'r') as file:
                     config = json.load(file)
                     image_path = os.path.join(voxel_spacing_dir, config['image'])
@@ -469,7 +469,7 @@ def on_run_clicked(self, item, column):
         default_selections = {}
 
         # Check for config file and load selections if present
-        config_path = os.path.join(static_path, "dataset_config.json")
+        config_path = self.get_config_path(static_path)
         if os.path.exists(config_path):
             with open(config_path, 'r') as file:
                 config = json.load(file)
@@ -556,13 +556,28 @@ def open_labels(self, tomogram):
         # TODO scale is hard coded to 10 here
         self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.name}")
 
+    def get_config_path(self, run_dir):
+        return os.path.join(run_dir, f"{self.get_user_id()}_config.json")
+        
+    def get_session_id(self):
+        return 17
+
+    def get_user_id(self):
+        return self.root.user_id
+        
+    def get_default_painting_path(self, segmentation_dir, voxel_spacing):
+        return os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_{self.get_user_id()}-cellcanvas-painting_{self.get_session_id()}_all-multilabel.zarr')
+
+    def get_default_prediction_path(self, segmentation_dir, voxel_spacing):
+        return os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_{self.get_user_id()}-cellcanvas-prediction_{self.get_session_id()}_all-multilabel.zarr')
+        
     def initialize_or_update_cell_canvas(self):
         # Collect paths from dropdowns
         paths = {layer: dropdown.currentText() for layer, dropdown in self.dropdowns.items()}
         
         if not paths["image"] or not paths["features"]:
             print("Please ensure image and feature paths are selected before initializing/updating CellCanvas.")
-            return        
+            return
 
         run_dir = self.selected_run.static_path
         segmentation_dir = self.get_segmentations_directory(self.selected_run.static_path)
@@ -573,8 +588,8 @@ def initialize_or_update_cell_canvas(self):
         # Ensure segmentations directory exists
         os.makedirs(segmentation_dir, exist_ok=True)
         
-        default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr')
-        default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr')
+        default_painting_path = self.get_default_painting_path(segmentation_dir, voxel_spacing)
+        default_prediction_path = self.get_default_prediction_path(segmentation_dir, voxel_spacing)
 
         painting_path = default_painting_path if not paths["painting"] else os.path.join(segmentation_dir, paths["painting"])
         prediction_path = default_prediction_path if not paths["prediction"] else os.path.join(segmentation_dir, paths["prediction"])
@@ -599,7 +614,7 @@ def initialize_or_update_cell_canvas(self):
             print(f"File {path} not found!", file=sys.stderr)
             return
 
-        config_path = os.path.join(run_dir, "dataset_config.json")
+        config_path = self.get_config_path(run_dir)
 
         config = {
             'image': os.path.relpath(os.path.join(voxel_spacing_dir, f"{paths['image']}"), voxel_spacing_dir),
diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py
index 539b757..0015c5f 100644
--- a/src/cellcanvas/semantic/_embedding_segmentor.py
+++ b/src/cellcanvas/semantic/_embedding_segmentor.py
@@ -377,10 +377,10 @@ def update_model(self, model_type):
         # Model fitting
         if model_type == "Random Forest":
             clf = RandomForestClassifier(
-                n_estimators=200,
+                n_estimators=100,
                 n_jobs=-1,
                 max_depth=15,
-                max_samples=0.1,
+                max_samples=0.05,
                 class_weight=weight_dict,
             )
             self.segmentation_manager.model = clf

From 51ca7bb25506e0971533c8afee48642e2ec593db Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Mon, 22 Apr 2024 20:51:04 -0400
Subject: [PATCH 26/30] Fix: remove hard coded color for label 9

---
 src/cellcanvas/_copick/widget.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/cellcanvas/_copick/widget.py b/src/cellcanvas/_copick/widget.py
index ed1be3d..268e736 100644
--- a/src/cellcanvas/_copick/widget.py
+++ b/src/cellcanvas/_copick/widget.py
@@ -110,7 +110,6 @@ def get_copick_colormap(self):
         """Return a colormap for distinct label colors based on the pickable objects."""
         colormap = {obj.label: np.array(obj.color)/255.0 for obj in self.root.config.pickable_objects}
         colormap[None] = np.array([1, 1, 1, 1])
-        colormap[9] = np.array([0, 1, 1, 1])
         return colormap
     
     def get_voxel_spacing(self):

From ae0d47574b51177d0f833ed0cabdeb06cf9d2054 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Wed, 24 Apr 2024 17:20:33 -0400
Subject: [PATCH 27/30] Better lazy loading of UI, support for opening zarr
 stores

There is a bug with opening labels probably dtype
---
 src/cellcanvas/_copick/widget.py | 316 +++++++++++--------------------
 src/cellcanvas/data/data_set.py  |  34 ++++
 2 files changed, 142 insertions(+), 208 deletions(-)

diff --git a/src/cellcanvas/_copick/widget.py b/src/cellcanvas/_copick/widget.py
index 268e736..c531552 100644
--- a/src/cellcanvas/_copick/widget.py
+++ b/src/cellcanvas/_copick/widget.py
@@ -125,30 +125,42 @@ def _init_logging(self):
         streamHandler.setFormatter(formatter)
         self.logger.addHandler(streamHandler)
         
-
     def populate_tree(self):
+        self.tree.clear()  # Clear existing items if repopulating
         for run in self.root.runs:
             run_item = QTreeWidgetItem(self.tree, [run.name])
             run_item.setData(0, Qt.UserRole, run)
+            run_item.setChildIndicatorPolicy(QTreeWidgetItem.ShowIndicator)
+
+    def setup_signals(self):
+        self.tree.itemExpanded.connect(self.on_item_expanded)
+
+    def on_item_expanded(self, item):
+        # Check if the item has already been populated
+        if not hasattr(item, 'is_populated'):
+            run = item.data(0, Qt.UserRole)
+            if isinstance(run, copick.models.CopickRun):
+                self.populate_run(item, run)
+            item.is_populated = True  # Mark as populated
+
+    def populate_run(self, run_item, run):
+        for category in ["segmentations", "meshes", "picks", "voxel_spacings"]:
+            category_item = QTreeWidgetItem(run_item, [category])
+            items = getattr(run, category, [])
+            for item in items:
+                if category == "picks":
+                    item_name = item.pickable_object_name
+                else:
+                    item_name = getattr(item, 'name', 'Unnamed')
+                child_item = QTreeWidgetItem(category_item, [item_name])
+                child_item.setData(0, Qt.UserRole, item)
+
+                if category == "voxel_spacings":
+                    for tomogram in item.tomograms:
+                        tomo_item = QTreeWidgetItem(child_item, [f"Tomogram: {tomogram.tomo_type}"])
+                        tomo_item.setData(0, Qt.UserRole, tomogram)
 
-            for category in ["segmentations", "meshes", "picks", "voxel_spacings"]:
-                category_item = QTreeWidgetItem(run_item, [category])
-                items = getattr(run, category)
-                for item in items:
-                    if category == "picks":
-                        item_name = item.pickable_object_name
-                    else:
-                        item_name = getattr(item, 'name', 'Unnamed')
-
-                    child_item = QTreeWidgetItem(category_item, [item_name])
-                    child_item.setData(0, Qt.UserRole, item)
-
-                    # list tomograms
-                    if category == "voxel_spacings":
-                        for tomogram in item.tomograms:
-                            tomo_item = QTreeWidgetItem(child_item, [f"Tomogram: {tomogram.tomo_type}"])
-                            tomo_item.setData(0, Qt.UserRole, tomogram)
-
+        
     def activate_layer(self, layer):
         print(f"Activating layer {layer}")
         if layer == "image":
@@ -167,8 +179,10 @@ def get_complete_data_manager(self, all_pairs=False):
         datasets = []
         for run in self.root.runs:
             run_dir = run.static_path
-            voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir)
-            segmentation_dir = self.get_segmentations_directory(run_dir)
+            overlay_path = run.overlay_path
+            
+            voxel_spacing_dir = self.get_default_voxel_spacing_directory(run)
+            segmentation_dir = self.get_segmentations_directory(run)
 
             if not voxel_spacing_dir:
                 print(f"No Voxel Spacing directory found for run {run.name}.")
@@ -246,90 +260,16 @@ def get_complete_data_manager(self, all_pairs=False):
 
         return DataManager(datasets=datasets)        
 
-    # Only train on config pairs
-    # def get_complete_data_manager(self, all_pairs=False):
-    #     datasets = []
-    #     for run in self.root.runs:
-    #         run_dir = run.static_path
-    #         config_path = os.path.join(run_dir, "dataset_config.json")
-
-    #         voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir)
-    #         segmentation_dir = self.get_segmentations_directory(run_dir)
-
-    #         if not voxel_spacing_dir:
-    #             print(f"No Voxel Spacing directory found for run {run.name}.")
-    #             continue
-
-    #         os.makedirs(segmentation_dir, exist_ok=True)
-            
-    #         if os.path.exists(config_path):
-    #             with open(config_path, 'r') as file:
-    #                 config = json.load(file)
-    #                 image_path = os.path.join(voxel_spacing_dir, config['image'])
-    #                 features_path = os.path.join(voxel_spacing_dir, config['features'])
-    #                 painting_path = os.path.join(segmentation_dir, config['painting'])
-    #                 prediction_path = os.path.join(segmentation_dir, config['prediction'])
-    #         else:
-    #             # Existing logic to find paths                
-    #             voxel_spacing = self.get_voxel_spacing()
-
-    #             zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
-    #             image_path = None
-    #             features_path = None
-    #             painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr')
-    #             prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr')
-
-    #             for dataset_path in zarr_datasets:
-    #                 dataset_name = os.path.basename(dataset_path).lower()
-    #                 if "_features.zarr" in dataset_name:
-    #                     features_path = dataset_path
-    #                 elif "painting" in dataset_name:
-    #                     painting_path = dataset_path
-    #                 elif "prediction" in dataset_name:
-    #                     prediction_path = dataset_path
-    #                 else:
-    #                     # TODO hard coded to use highest resolution
-    #                     image_path = os.path.join(dataset_path, "0")
-
-    #             # Save paths to JSON
-    #             config = {
-    #                 'image': os.path.relpath(image_path, voxel_spacing_dir),
-    #                 'features': os.path.relpath(features_path, voxel_spacing_dir),
-    #                 'painting': os.path.relpath(painting_path, segmentation_dir),
-    #                 'prediction': os.path.relpath(prediction_path, segmentation_dir)
-    #             }
-    #             with open(config_path, 'w') as file:
-    #                 json.dump(config, file)
-
-    #         print(f"Fitting on paths:")
-    #         print(f"Image: {image_path}")
-    #         print(f"Features: {features_path}")
-    #         print(f"Painting: {painting_path}")
-    #         print(f"Prediction: {prediction_path}")
-                    
-    #         # Load dataset with paths
-    #         if image_path and features_path:
-    #             dataset = DataSet.from_paths(
-    #                 image_path=image_path,
-    #                 features_path=features_path,
-    #                 labels_path=painting_path,
-    #                 segmentation_path=prediction_path,
-    #                 make_missing_datasets=True
-    #             )
-    #             datasets.append(dataset)
-
-    #     return DataManager(datasets=datasets)
-
-    def get_default_voxel_spacing_directory(self, static_path):
+    def get_default_voxel_spacing_directory(self, run):
         # Find VoxelSpacing directories, assuming a hard coded match for now
         voxel_spacing = self.get_voxel_spacing()
-        voxel_spacing_dirs = glob.glob(os.path.join(static_path, f'VoxelSpacing{voxel_spacing:.3f}'))
+        voxel_spacing_dirs = glob.glob(os.path.join(run.static_path, f'VoxelSpacing{voxel_spacing:.3f}'))
         if voxel_spacing_dirs:
             return voxel_spacing_dirs[0]
         return None
 
-    def get_segmentations_directory(self, static_path):
-        segmentation_dir = os.path.join(static_path, "Segmentations")
+    def get_segmentations_directory(self, run):
+        segmentation_dir = os.path.join(run.overlay_path, "Segmentations")
         return segmentation_dir
 
     def change_button_color(self, button, color):
@@ -445,6 +385,12 @@ def threaded_predict_for_all(self):
 
             print(f"Predictions written")
 
+    def get_painting_segmentation_name(self):
+        return "cellcanvas-painting"
+
+    def get_prediction_segmentation_name(self):
+        return "cellcanvas-prediction"
+            
     def on_run_clicked(self, item, column):
         data = item.data(0, Qt.UserRole)
         if not isinstance(data, copick.impl.filesystem.CopickRunFSSpec):
@@ -453,64 +399,61 @@ def on_run_clicked(self, item, column):
 
         self.selected_run = data
         static_path = self.selected_run.static_path
-        self.logger.info(f"Selected {static_path}")
+        overlay_path = self.selected_run.overlay_path
+        self.logger.info(f"Selected static path: {static_path} overlay path: {overlay_path}")
 
         # Clear existing items
         for dropdown in self.dropdowns.values():
             dropdown.clear()
 
+        voxel_spacing = self.selected_run.get_voxel_spacing(self.get_voxel_spacing())
+        if not voxel_spacing:
+            print("Voxel spacing does not exist.")
+            return
+
+        # features = self.selected_run.get_voxel_spacing(10).tomograms[0].get_features("cellcanvas01")
+            
         # Define directories
-        voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*"))
-        segmentation_dir = self.get_segmentations_directory(static_path)
-        os.makedirs(segmentation_dir, exist_ok=True)
-
-        # Initialize dictionary to hold default selections from config
-        default_selections = {}
-
-        # Check for config file and load selections if present
-        config_path = self.get_config_path(static_path)
-        if os.path.exists(config_path):
-            with open(config_path, 'r') as file:
-                config = json.load(file)
-            default_selections = {
-                'image': os.path.join(voxel_spacing_dirs[0], config.get('image')),
-                'features': os.path.join(voxel_spacing_dirs[0], config.get('features')),
-                'painting': os.path.join(segmentation_dir, config.get('painting')),
-                'prediction': os.path.join(segmentation_dir, config.get('prediction'))
-            }
+        voxel_spacing_dirs = voxel_spacing.static_path
 
         # Helper function to add items if not already in dropdown
         def add_item_if_not_exists(dropdown, item_name, item_data):
             if dropdown.findData(item_data) == -1:
                 dropdown.addItem(item_name, item_data)
 
-        # Load all zarr datasets from voxel spacing directories
-        if voxel_spacing_dirs:
-            for voxel_spacing_dir in voxel_spacing_dirs:
-                zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr"))
-                for dataset_path in zarr_datasets:
-                    dataset_name = os.path.basename(dataset_path)
-                    if "_features.zarr" in dataset_name.lower():
-                        add_item_if_not_exists(self.dropdowns["features"], dataset_name, dataset_path)
-                    else:
-                        add_item_if_not_exists(self.dropdowns["image"], dataset_name + "/0", dataset_path + "/0")
-
-        # Load all zarr datasets from segmentation directory
-        zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr"))
-        for dataset_path in zarr_datasets:
-            dataset_name = os.path.basename(dataset_path)
-            if "painting" not in dataset_name.lower():
-                add_item_if_not_exists(self.dropdowns["prediction"], dataset_name, dataset_path)
-            if "prediction" not in dataset_name.lower():
-                add_item_if_not_exists(self.dropdowns["painting"], dataset_name, dataset_path)
-
-        # Set default selections in dropdowns if specified in the config
-        for key, dropdown in self.dropdowns.items():
-            if default_selections.get(key):
-                index = dropdown.findData(default_selections[key])
-                if index != -1:
-                    dropdown.setCurrentIndex(index)
-
+        # Load image/tomograms
+        tomograms = voxel_spacing.tomograms
+        for tomogram in tomograms:
+            add_item_if_not_exists(self.dropdowns["image"],
+                                   tomogram.tomo_type,
+                                   tomogram)
+
+        # Load features
+        for tomogram in tomograms:
+            features = tomogram.features
+            if features:
+                feature = features[0]
+                add_item_if_not_exists(self.dropdowns["features"],
+                                       tomogram.tomo_type,
+                                       feature)
+
+        # Painting
+        painting_seg = self.selected_run.get_segmentations(user_id=self.root.user_id, is_multilabel=True, name=self.get_painting_segmentation_name(), voxel_size=10)
+        if not painting_seg:
+            # Create seg
+            painting_seg = self.selected_run.new_segmentation(10, self.get_painting_segmentation_name(), self.get_session_id(), True, user_id=self.root.user_id)
+        else:
+            painting_seg = painting_seg[0]
+        add_item_if_not_exists(self.dropdowns["painting"], painting_seg.name, painting_seg)
+
+        # Prediction
+        prediction_seg = self.selected_run.get_segmentations(user_id=self.root.user_id, is_multilabel=True, name=self.get_prediction_segmentation_name(), voxel_size=10)
+        if not prediction_seg:
+            # Create seg
+            prediction_seg = self.selected_run.new_segmentation(10, self.get_prediction_segmentation_name(), self.get_session_id(), True, user_id=self.root.user_id)
+        else:
+            prediction_seg = prediction_seg[0]
+        add_item_if_not_exists(self.dropdowns["prediction"], prediction_seg.name, prediction_seg)
 
     def on_item_clicked(self, item, column):
         data = item.data(0, Qt.UserRole)
@@ -533,7 +476,7 @@ def open_picks(self, picks):
         ]
 
         # TODO hard coded scaling
-        points_array = np.array(points_locations) / 10
+        points_array = np.array(points_locations) / self.get_voxel_spacing()
         
         # Adding the points layer to the viewer, using the pickable_object_name as the layer name
         pickable_object = [obj for obj in self.root.config.pickable_objects if obj.name == picks.pickable_object_name][0]
@@ -564,66 +507,37 @@ def get_session_id(self):
     def get_user_id(self):
         return self.root.user_id
         
-    def get_default_painting_path(self, segmentation_dir, voxel_spacing):
-        return os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_{self.get_user_id()}-cellcanvas-painting_{self.get_session_id()}_all-multilabel.zarr')
-
-    def get_default_prediction_path(self, segmentation_dir, voxel_spacing):
-        return os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_{self.get_user_id()}-cellcanvas-prediction_{self.get_session_id()}_all-multilabel.zarr')
-        
     def initialize_or_update_cell_canvas(self):
         # Collect paths from dropdowns
-        paths = {layer: dropdown.currentText() for layer, dropdown in self.dropdowns.items()}
+        paths = {layer: dropdown.currentData() for layer, dropdown in self.dropdowns.items()}
         
         if not paths["image"] or not paths["features"]:
             print("Please ensure image and feature paths are selected before initializing/updating CellCanvas.")
             return
 
         run_dir = self.selected_run.static_path
-        segmentation_dir = self.get_segmentations_directory(self.selected_run.static_path)
-        voxel_spacing_dir = self.get_default_voxel_spacing_directory(self.selected_run.static_path)
+        overlay_path = self.selected_run.overlay_path
+        
+        segmentation_dir = self.get_segmentations_directory(self.selected_run)
+        voxel_spacing_dir = self.get_default_voxel_spacing_directory(self.selected_run)
 
         voxel_spacing = self.get_voxel_spacing()
 
         # Ensure segmentations directory exists
-        os.makedirs(segmentation_dir, exist_ok=True)
-        
-        default_painting_path = self.get_default_painting_path(segmentation_dir, voxel_spacing)
-        default_prediction_path = self.get_default_prediction_path(segmentation_dir, voxel_spacing)
-
-        painting_path = default_painting_path if not paths["painting"] else os.path.join(segmentation_dir, paths["painting"])
-        prediction_path = default_prediction_path if not paths["prediction"] else os.path.join(segmentation_dir, paths["prediction"])
-        image_path = os.path.join(voxel_spacing_dir, paths['image'])
-        features_path = os.path.join(voxel_spacing_dir, paths["features"])
+        # os.makedirs(segmentation_dir, exist_ok=True)
         
         # TODO note this is hard coded to use the highest resolution of a multiscale zarr
         print(f"Opening paths:")
-        print(f"Image: {image_path}")
-        print(f"Features: {features_path}")
-        print(f"Painting: {painting_path}")
-        print(f"Prediction: {prediction_path}")
-        try:
-            dataset = DataSet.from_paths(
-                image_path=image_path,
-                features_path=features_path,
-                labels_path=painting_path,
-                segmentation_path=prediction_path,
-                make_missing_datasets=True,
-            )
-        except FileNotFoundError:
-            print(f"File {path} not found!", file=sys.stderr)
-            return
-
-        config_path = self.get_config_path(run_dir)
-
-        config = {
-            'image': os.path.relpath(os.path.join(voxel_spacing_dir, f"{paths['image']}"), voxel_spacing_dir),
-            'features': os.path.relpath(os.path.join(voxel_spacing_dir, paths["features"]), voxel_spacing_dir),
-            'painting': os.path.relpath(painting_path, segmentation_dir),
-            'prediction': os.path.relpath(prediction_path, segmentation_dir)
-        }
-
-        with open(config_path, 'w') as file:
-            json.dump(config, file)
+        print(f"Image: {paths['image']}")
+        print(f"Features: {paths['features']}")
+        print(f"Painting: {paths['painting']}")
+        print(f"Prediction: {paths['prediction']}")
+        dataset = DataSet.from_stores(
+            image_store=paths['image'].zarr(),
+            features_store=paths['features'].zarr(),
+            labels_store=paths['painting'].zarr(),
+            segmentation_store=paths['prediction'].zarr(),
+        )
         
         data_manager = DataManager(datasets=[dataset])
         
@@ -652,12 +566,10 @@ def initialize_or_update_cell_canvas(self):
 
 if __name__ == "__main__":
     # Project root
-    root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json")
+    
+    # root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json")
     # root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/chlamy_copick/copick_config_kyle.json")
-
-    ## Root API
-    root.config # CopickConfig object
-    root.runs # List of run objects (lazy loading from filesystem location(s))
+    root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_pickathon.json")
         
     viewer = napari.Viewer()
 
@@ -670,15 +582,3 @@ def initialize_or_update_cell_canvas(self):
 
     # napari.run()
 
-# TODO finish making the prediction computation more lazy
-# the strategy should be to start computing labels chunkwise
-# on the zarr itself
-
-# TODO check scaling between picks and zarrs
-
-# TODO check why painting doesn't work when using proper scaling
-
-# TODO add proper colormap and legend support
-# - override exclusion of non-zero labels
-# - consistent colormap in the charts
-# - consistent colormap in the painted part of the labels image
diff --git a/src/cellcanvas/data/data_set.py b/src/cellcanvas/data/data_set.py
index 117c42f..f9d0b60 100644
--- a/src/cellcanvas/data/data_set.py
+++ b/src/cellcanvas/data/data_set.py
@@ -89,3 +89,37 @@ def from_paths(
             labels=labels,
             segmentation=segmentation,
         )
+
+    @classmethod
+    def from_stores(
+        cls,
+        image_store,
+        features_store,
+        labels_store,
+        segmentation_store,
+    ):
+        """Create a DataSet from a set of paths.
+
+        todo: add ability to create missing labels/segmentations
+        """
+        # get the image
+        # TODO fix hardcoded scale for pickathon
+        image = zarr.open(image_store, "r")["0"]
+
+
+        # get the features
+        features = zarr.open(features_store, "r")
+
+        # get the labels
+        labels = zarr.open(labels_store, "a")
+
+        # get the segmentation
+        segmentation = zarr.open(segmentation_store, mode="a")
+
+        return cls(
+            image=image,
+            features=features,
+            labels=labels,
+            segmentation=segmentation,
+        )
+    

From 7c41d25b35e0fae389d1775dcec2a9dd4d2756bd Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Wed, 24 Apr 2024 17:51:11 -0400
Subject: [PATCH 28/30] More updates for loading from zarr stores

---
 src/cellcanvas/data/data_set.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/cellcanvas/data/data_set.py b/src/cellcanvas/data/data_set.py
index f9d0b60..0f5498c 100644
--- a/src/cellcanvas/data/data_set.py
+++ b/src/cellcanvas/data/data_set.py
@@ -110,11 +110,27 @@ def from_stores(
         # get the features
         features = zarr.open(features_store, "r")
 
+        group_name = "labels"
+        
         # get the labels
-        labels = zarr.open(labels_store, "a")
+        labels = zarr.open_group(labels_store,
+                                 mode="a")
+        if group_name in labels:
+            labels = labels[group_name]
+        else:
+            labels = labels.create_dataset(group_name,
+                                           shape=image.shape,
+                                           dtype="i4")
 
         # get the segmentation
-        segmentation = zarr.open(segmentation_store, mode="a")
+        segmentation = zarr.open_group(segmentation_store,
+                                       mode="a")
+        if group_name in segmentation:
+            segmentation = segmentation[group_name]
+        else:
+            segmentation = segmentation.create_dataset(group_name,
+                                                       shape=image.shape,
+                                                       dtype="i4")
 
         return cls(
             image=image,

From d48c4e28b1908ad04b68f1bc4f61c044126e6a13 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Wed, 24 Apr 2024 17:55:29 -0400
Subject: [PATCH 29/30] Features as a dict

---
 src/cellcanvas/data/data_set.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cellcanvas/data/data_set.py b/src/cellcanvas/data/data_set.py
index 0f5498c..889e081 100644
--- a/src/cellcanvas/data/data_set.py
+++ b/src/cellcanvas/data/data_set.py
@@ -108,7 +108,7 @@ def from_stores(
 
 
         # get the features
-        features = zarr.open(features_store, "r")
+        features = {"features": zarr.open(features_store, "r")}
 
         group_name = "labels"
         

From 2a5d7679d18a494341c3dc4bec910339c5a65513 Mon Sep 17 00:00:00 2001
From: Kyle Harrington <czi@kyleharrington.com>
Date: Wed, 24 Apr 2024 20:54:34 -0400
Subject: [PATCH 30/30] Add LRU cache to stores to get some speedup

---
 src/cellcanvas/data/data_set.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/cellcanvas/data/data_set.py b/src/cellcanvas/data/data_set.py
index 889e081..7021215 100644
--- a/src/cellcanvas/data/data_set.py
+++ b/src/cellcanvas/data/data_set.py
@@ -102,18 +102,20 @@ def from_stores(
 
         todo: add ability to create missing labels/segmentations
         """
+
+        # TODO rewrite this to copy everything to be local
+        
         # get the image
         # TODO fix hardcoded scale for pickathon
-        image = zarr.open(image_store, "r")["0"]
-
+        image = zarr.open(zarr.storage.LRUStoreCache(image_store, None), "r")["0"]
 
         # get the features
-        features = {"features": zarr.open(features_store, "r")}
+        features = {"features": zarr.open(zarr.storage.LRUStoreCache(features_store, None), "r")}
 
         group_name = "labels"
         
         # get the labels
-        labels = zarr.open_group(labels_store,
+        labels = zarr.open_group(zarr.storage.LRUStoreCache(labels_store, None),
                                  mode="a")
         if group_name in labels:
             labels = labels[group_name]
@@ -123,7 +125,7 @@ def from_stores(
                                            dtype="i4")
 
         # get the segmentation
-        segmentation = zarr.open_group(segmentation_store,
+        segmentation = zarr.open_group(zarr.storage.LRUStoreCache(segmentation_store, None),
                                        mode="a")
         if group_name in segmentation:
             segmentation = segmentation[group_name]
@@ -132,6 +134,8 @@ def from_stores(
                                                        shape=image.shape,
                                                        dtype="i4")
 
+        # TODO start a background thread that triggers downloads of the zarrs
+            
         return cls(
             image=image,
             features=features,