From 44f51d37929a2f986ac8f26952a351f56cc3e3c3 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Sat, 6 Apr 2024 18:29:26 -0400 Subject: [PATCH 01/30] Model fitting works lazily --- src/cellcanvas/data/data_manager.py | 39 +++++++++++-------- .../semantic/_embedding_segmentor.py | 2 + .../semantic/segmentation_manager.py | 3 +- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/cellcanvas/data/data_manager.py b/src/cellcanvas/data/data_manager.py index ddd0cfa..3f99ac6 100644 --- a/src/cellcanvas/data/data_manager.py +++ b/src/cellcanvas/data/data_manager.py @@ -3,6 +3,7 @@ import numpy as np from napari.utils.events.containers import SelectableEventedList from zarr import Array +import dask.array as da from cellcanvas.data.data_set import DataSet @@ -30,23 +31,27 @@ def get_training_data(self) -> Tuple[Array, Array]: features = [] labels = [] for dataset in self.datasets: - # get the features and labels - # todo make lazier - dataset_features = np.asarray(dataset.concatenated_features) - dataset_labels = np.asarray(dataset.labels) - - # reshape the data - dataset_labels = dataset_labels.flatten() - reshaped_features = dataset_features.reshape( - -1, dataset_features.shape[-1] - ) - - # Filter features where labels are greater than 0 - valid_labels = dataset_labels > 0 - filtered_features = reshaped_features[valid_labels, :] - filtered_labels = dataset_labels[valid_labels] - 1 # Adjust labels + dataset_features = da.asarray(dataset.concatenated_features) + dataset_labels = da.asarray(dataset.labels) + + # Flatten labels for boolean indexing + flattened_labels = dataset_labels.flatten() + + # Compute valid_indices based on labels > 0 + valid_indices = da.nonzero(flattened_labels > 0)[0].compute() + + # Flatten only the spatial dimensions of the dataset_features while preserving the feature dimension + c, h, w, d = dataset_features.shape + reshaped_features = dataset_features.reshape(c, h * w * d) + + # We need to apply valid_indices for each feature dimension separately + filtered_features_list = [da.take(reshaped_features[i, :], valid_indices, axis=0) for i in range(c)] + filtered_features = da.stack(filtered_features_list, axis=1) + + # Adjust labels + filtered_labels = flattened_labels[valid_indices] - 1 features.append(filtered_features) labels.append(filtered_labels) - - return np.concatenate(features), np.concatenate(labels) + + return da.concatenate(features), da.concatenate(labels) diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index c00e37a..4883cfd 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -672,6 +672,8 @@ def update_class_distribution_charts(self): def compute_embedding_projection(self): # Filter out entries where the label is 0 filtered_features, filtered_labels = self.data.get_training_data() + filtered_features = filtered_features.compute() + filtered_labels = filtered_labels.compute() # label values are offset by 1 for training, # undo the offset. diff --git a/src/cellcanvas/semantic/segmentation_manager.py b/src/cellcanvas/semantic/segmentation_manager.py index 87b47ac..dd248ec 100644 --- a/src/cellcanvas/semantic/segmentation_manager.py +++ b/src/cellcanvas/semantic/segmentation_manager.py @@ -23,8 +23,9 @@ def __init__(self, data: DataManager, model: SegmentationModel): def fit(self): """Fit using the model using the data in the data manager.""" features, labels = self.data.get_training_data() + features_computed, labels_computed = features.compute(), labels.compute() - self.model.fit(features, labels) + self.model.fit(features_computed, labels_computed) def predict(self, feature_image: np.ndarray): """Predict using the trained model. From c2ba1f359ca61c9a47fc34c4431d126a3fe7264b Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Sat, 6 Apr 2024 21:53:16 -0400 Subject: [PATCH 02/30] Prediction has worked with this --- .../semantic/_embedding_segmentor.py | 66 +++++++++++++------ .../semantic/segmentation_manager.py | 10 ++- 2 files changed, 53 insertions(+), 23 deletions(-) diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 4883cfd..1b72b0e 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -7,6 +7,7 @@ import matplotlib.pyplot as plt import napari import numpy as np +import dask.array as da import toolz as tz import zarr from matplotlib.backends.backend_qt5agg import ( @@ -358,26 +359,49 @@ def update_model(self, model_type): raise ValueError(f"Unsupported model type: {model_type}") def predict(self): - # We shift labels + 1 because background is 0 and has special meaning - # prediction = ( - # future.predict_segmenter( - # features.reshape(-1, features.shape[-1]), model - # ).reshape(features.shape[:-1]) - # + 1 - # ) - prediction = ( - self.segmentation_manager.predict( - np.asarray(self.data.datasets[0].concatenated_features) - ) - + 1 - ) - - # Compute stats in thread too - prediction_labels, prediction_counts = np.unique( - prediction, return_counts=True - ) - - return (prediction, prediction_labels, prediction_counts) + dataset_features = da.asarray(self.data.datasets[0].concatenated_features) + chunk_shape = dataset_features.chunksize + shape = dataset_features.shape + dtype = dataset_features.dtype + + # Placeholder for aggregated labels and counts + all_labels = [] + all_counts = [] + + # Iterate over chunks + for z in range(0, shape[1], chunk_shape[1]): + for y in range(0, shape[2], chunk_shape[2]): + for x in range(0, shape[3], chunk_shape[3]): + # Compute the slice for the current chunk + # in feature,z,y,x order + chunk_slice = ( + slice(None), + slice(z, min(z + chunk_shape[1], shape[1])), + slice(y, min(y + chunk_shape[2], shape[2])), + slice(x, min(x + chunk_shape[3], shape[3])), + ) + print(f"Predicting on chunk {chunk_slice}") + + # Extract the current chunk + chunk = dataset_features[chunk_slice].compute() + + # Predict on the chunk (adding 1 to each prediction) + predicted_chunk = self.segmentation_manager.predict(chunk) + 1 + + # Write the prediction to the corresponding region in the Zarr array + self.prediction_data[chunk_slice[1:]] = predicted_chunk + + # Aggregate labels and counts + labels, counts = np.unique(predicted_chunk, return_counts=True) + all_labels.append(labels) + all_labels.append(counts) + + # Combine all_labels and all_counts + unique_labels, inverse = np.unique(np.concatenate(all_labels), return_inverse=True) + total_counts = np.bincount(inverse, weights=np.concatenate(all_counts)) + + # Now, self.prediction_data should contain the predicted labels + return self.prediction_data, unique_labels, total_counts @thread_worker def prediction_thread(self): @@ -402,6 +426,8 @@ def start_prediction(self): # features = self.get_features() + # TODO use a yielded connect worker + self.prediction_worker = self.prediction_thread() self.prediction_worker.returned.connect(self.on_prediction_completed) self.prediction_worker.start() diff --git a/src/cellcanvas/semantic/segmentation_manager.py b/src/cellcanvas/semantic/segmentation_manager.py index dd248ec..fe9e706 100644 --- a/src/cellcanvas/semantic/segmentation_manager.py +++ b/src/cellcanvas/semantic/segmentation_manager.py @@ -1,6 +1,8 @@ from typing import Protocol import numpy as np +import dask.array as da +from dask import delayed from sklearn.exceptions import NotFittedError from cellcanvas.data.data_manager import DataManager @@ -27,7 +29,7 @@ def fit(self): self.model.fit(features_computed, labels_computed) - def predict(self, feature_image: np.ndarray): + def predict(self, feature_image): """Predict using the trained model. Parameters @@ -40,7 +42,8 @@ def predict(self, feature_image: np.ndarray): predicted_labels : Array The prediction of class. """ - features = feature_image.reshape((-1, feature_image.shape[-1])) + c, z, y, x = feature_image.shape + features = feature_image.transpose(1, 2, 3, 0).reshape(-1, c) try: predicted_labels = self.model.predict(features) @@ -50,4 +53,5 @@ def predict(self, feature_image: np.ndarray): "for example with the `fit_segmenter` function." ) from None - return predicted_labels.reshape(feature_image.shape[:-1]) + return predicted_labels.reshape(feature_image.shape[1:]) + From 71c4b713f773dc9215fb1a88b47134167e9d85b2 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Sun, 7 Apr 2024 09:36:59 -0400 Subject: [PATCH 03/30] Fix count aggregation --- src/cellcanvas/semantic/_embedding_segmentor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 1b72b0e..1ca2ba1 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -394,7 +394,7 @@ def predict(self): # Aggregate labels and counts labels, counts = np.unique(predicted_chunk, return_counts=True) all_labels.append(labels) - all_labels.append(counts) + all_counts.append(counts) # Combine all_labels and all_counts unique_labels, inverse = np.unique(np.concatenate(all_labels), return_inverse=True) From ad429ef5f754e5ce513efcf93454398b8e93e7b7 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 8 Apr 2024 17:16:28 -0400 Subject: [PATCH 04/30] Launch cellcanvas from copick explorer, legend matches --- examples/run_app_copick.py | 259 ++++++++++++++++++ src/cellcanvas/_app/main_app.py | 5 + .../semantic/_embedding_segmentor.py | 15 +- .../semantic/segmentation_manager.py | 3 + 4 files changed, 281 insertions(+), 1 deletion(-) create mode 100644 examples/run_app_copick.py diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py new file mode 100644 index 0000000..da53b53 --- /dev/null +++ b/examples/run_app_copick.py @@ -0,0 +1,259 @@ +"""Example of using CellCanvas to pick particles on a surface. + +To use: +1. update base_file_path to point to cropped_covid.zarr example file +2. Run the script to launch CellCanvas +3. Paint/predict until you're happy with the result. The seeded labels are: + - 1: background (including inside the capsules) + - 2: membrane + - 3: spike proteins +3b. You might want to switch the image layer into the plane + depiction before doing the instance segmentation. + Sometimes I have trouble manipulating the plane after + the instance segmentation - need to look into this. +4. Once you're happy with the prediction, click the "instance segmentation" tab +5. Set the label value to 2. This will extract the membrane and + make instances via connected components. +6. Remove the small objects. Suggested threshold: 100 +7. Alt + left mouse button to select an instance to modify. + Once select, you can dilate, erode, etc. to smooth it. +8. With the segment still selected, you can then mesh it + using the mesh widget. You can play with the smoothing parameters. +9. If the mesh looks good, switch to the "geometry" tab. + Select the mesh and start surfing! +""" +from collections import defaultdict +import os +import numpy as np +import napari +import cellcanvas +from cellcanvas._app.main_app import CellCanvasApp, QtCellCanvas +from cellcanvas.data.data_manager import DataManager +from cellcanvas.data.data_set import DataSet + +import json +import copick +from copick.impl.filesystem import CopickRootFSSpec +import zarr + +from qtpy.QtWidgets import QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget, QComboBox, QPushButton, QLabel +from qtpy.QtCore import Qt +import glob # For pattern matching of file names + + +# Project root +root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/working/demo_project/copick_config_kyle.json") + +## Root API +root.config # CopickConfig object +root.runs # List of run objects (lazy loading from filesystem location(s)) + +# TODO update to use root.config.pickable_objects + + +def get_labels_colormap(): + """Return a colormap for distinct label colors based on the pickable objects.""" + colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects} + colormap[None] = np.array([1, 1, 1, 1]) # default is white + colormap[0] = np.array([0, 0, 0, 0]) # Add any special cases if needed + return colormap + +cellcanvas.utils.get_labels_colormap = get_labels_colormap + +# Use the function +colormap = get_labels_colormap() + +# TODO set names from copick config +# cell_canvas.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects} + +import napari +from qtpy.QtWidgets import QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget +from qtpy.QtCore import Qt + +class NapariCopickExplorer(QWidget): + def __init__(self, viewer: napari.Viewer, root): + super().__init__() + self.viewer = viewer + self.root = root + self.selected_run = None + self.cell_canvas_app = None + + layout = QVBoxLayout() + self.setLayout(layout) + + # Dropdowns for each data layer + self.dropdowns = {} + for layer in ["image", "features", "painting", "prediction"]: + layout.addWidget(QLabel(f"{layer.capitalize()} Path:")) + self.dropdowns[layer] = QComboBox() + layout.addWidget(self.dropdowns[layer]) + + # Button to update CellCanvas with the selected dataset + self.update_button = QPushButton("Initialize/Update CellCanvas") + self.update_button.clicked.connect(self.initialize_or_update_cell_canvas) + layout.addWidget(self.update_button) + + self.tree = QTreeWidget() + self.tree.setHeaderLabel("Copick Runs") + self.tree.itemClicked.connect(self.on_run_clicked) + layout.addWidget(self.tree) + + self.populate_tree() + + def populate_tree(self): + for run in self.root.runs: + run_item = QTreeWidgetItem(self.tree, [run.name]) + run_item.setData(0, Qt.UserRole, run) + + for category in ["segmentations", "meshes", "picks", "voxel_spacings"]: + category_item = QTreeWidgetItem(run_item, [category]) + items = getattr(run, category) + for item in items: + if category == "picks": + item_name = item.pickable_object_name + else: + item_name = getattr(item, 'name', 'Unnamed') + + child_item = QTreeWidgetItem(category_item, [item_name]) + child_item.setData(0, Qt.UserRole, item) + + # list tomograms + if category == "voxel_spacings": + for tomogram in item.tomograms: + tomo_item = QTreeWidgetItem(child_item, [f"Tomogram: {tomogram.tomo_type}"]) + tomo_item.setData(0, Qt.UserRole, tomogram) + + def on_run_clicked(self, item, column): + data = item.data(0, Qt.UserRole) + if not isinstance(data, copick.impl.filesystem.CopickRunFSSpec): + self.on_item_clicked(item, column) + return + + self.selected_run = data + static_path = self.selected_run.static_path + + # Clear existing items + for dropdown in self.dropdowns.values(): + dropdown.clear() + + # Find VoxelSpacing directories + voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing*")) + + for voxel_spacing_dir in voxel_spacing_dirs: + # Find all Zarr datasets within the voxel spacing directory + zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*_features.zarr")) + + for dataset_path in zarr_datasets: + # Check for the existence of 'embedding' directory within each features zarr + embedding_path = os.path.join(dataset_path, "*", "embedding") + embedding_dirs = glob.glob(embedding_path) + + for embedding_dir in embedding_dirs: + # Assuming 'embedding' is the desired path for features + self.dropdowns["features"].addItem(embedding_dir) + + zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) + for dataset_path in zarr_datasets: + # For image paths, directly add non-features zarr datasets to the image dropdown + if not "features" in os.path.basename(dataset_path).lower(): + self.dropdowns["image"].addItem(dataset_path) + + + # Set defaults for painting and prediction layers, assuming they follow a fixed naming convention + # and are expected to be located in a specific VoxelSpacing directory, adjusting as necessary + if voxel_spacing_dirs: # Check if at least one VoxelSpacing directory was found + base_voxel_dir = voxel_spacing_dirs[0] # Assuming to use the first found directory for default paths + self.dropdowns["painting"].addItem(os.path.join(base_voxel_dir, "painting.zarr")) + self.dropdowns["prediction"].addItem(os.path.join(base_voxel_dir, "prediction.zarr")) + else: + print("No Voxel Spacing directories found. Please check the directory structure.") + + + def on_item_clicked(self, item, column): + data = item.data(0, Qt.UserRole) + if data: + if isinstance(data, copick.impl.filesystem.CopickPicksFSSpec): + self.open_picks(data) + elif isinstance(data, copick.impl.filesystem.CopickTomogramFSSpec): + self.open_tomogram(data) + + def open_picks(self, picks): + with open(picks.path, 'r') as f: + points_data = json.load(f) + + # Extracting points locations + points_locations = [ + [point['location']['z'], point['location']['y'], point['location']['x']] + for point in points_data['points'] + ] + + points_array = np.array(points_locations) + + # Adding the points layer to the viewer, using the pickable_object_name as the layer name + pickable_object = [obj for obj in root.config.pickable_objects if obj.name == picks.pickable_object_name][0] + self.viewer.add_points(points_array, name=picks.pickable_object_name, size=100, out_of_slice_display=True, face_color=np.array(pickable_object.color)/255.0) + + def open_tomogram(self, tomogram): + zarr_store = zarr.open(tomogram.zarr(), mode='r') + # TODO extract scale/transform info + + # TODO scale is hard coded to 10 here + self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.tomo_type}", scale=10) + + def initialize_or_update_cell_canvas(self): + # Collect paths from dropdowns + paths = {layer: dropdown.currentText() for layer, dropdown in self.dropdowns.items()} + + if not all(paths.values()): + print("Please ensure all paths are selected before initializing/updating CellCanvas.") + return + + dataset = DataSet.from_paths( + image_path=f"{paths['image']}/0", + features_path=paths["features"], + labels_path=paths["painting"], + segmentation_path=paths["prediction"], + make_missing_datasets=True, + ) + + data_manager = DataManager(datasets=[dataset]) + + if not self.cell_canvas_app: + self.cell_canvas_app = CellCanvasApp(data=data_manager, viewer=self.viewer, verbose=True) + cell_canvas_widget = QtCellCanvas(app=self.cell_canvas_app) + self.viewer.window.add_dock_widget(cell_canvas_widget) + else: + # Update existing CellCanvasApp's data manager + self.cell_canvas_app.update_data_manager(data_manager) + + # TODO this has multiple copick specific hardcoded hacks + + # TODO hardcoded scale factor + self.viewer.layers['Image'].scale = (10, 10, 10) + + # Set colormap + # painting_layer.colormap.color_dict + # self.app.painting_labels + colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects} + colormap[None] = np.array([1, 1, 1, 1]) + colormap[9] = np.array([0, 1, 1, 1]) + self.cell_canvas_app.semantic_segmentor.painting_layer.colormap.color_dict = colormap + self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in root.config.pickable_objects] + [9] + self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects} + + self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping[9] = 'background' + self.cell_canvas_app.semantic_segmentor.widget.setupLegend() + +viewer = napari.Viewer() +copick_explorer_widget = NapariCopickExplorer(viewer, root) +viewer.window.add_dock_widget(copick_explorer_widget, name="Copick Explorer", area="left") + + +# napari.run() + +# TODO finish making the prediction computation more lazy +# the strategy should be to start computing labels chunkwise +# on the zarr itself + +# TODO check scaling between picks and zarrs + diff --git a/src/cellcanvas/_app/main_app.py b/src/cellcanvas/_app/main_app.py index d814a87..b7aedc2 100644 --- a/src/cellcanvas/_app/main_app.py +++ b/src/cellcanvas/_app/main_app.py @@ -30,6 +30,11 @@ def __init__( extra_logging=self.verbose, ) + + def update_data_manager(self, data: DataManager): + self.data = data + self.semantic_segmentor.update_data_manager(data) + @property def mode(self) -> AppMode: return self._mode diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 1ca2ba1..c7949f3 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -91,7 +91,7 @@ def __init__( # self.logger.info(f"zarr_path: {zarr_path}") self._add_threading_workers() - self._init_viewer_layers() + self.update_data_manager(self.data) self._add_widget() self.model = None @@ -99,6 +99,19 @@ def __init__( self.start_computing_embedding_plot() self.update_class_distribution_charts() + def update_data_manager(self, data: DataManager): + self.data = data + self.segmentation_manager.update_data_manager(data) + + # get the image and features + # todo this is temporarily assuming a single dataset + # need to generalize + self.image_data = self.data.datasets[0].image + self.features = self.data.datasets[0].features + + # TODO remove old layers + self._init_viewer_layers() + def reshape_features(self, arr): return arr.reshape(-1, arr.shape[-1]) diff --git a/src/cellcanvas/semantic/segmentation_manager.py b/src/cellcanvas/semantic/segmentation_manager.py index fe9e706..3d246ec 100644 --- a/src/cellcanvas/semantic/segmentation_manager.py +++ b/src/cellcanvas/semantic/segmentation_manager.py @@ -22,6 +22,9 @@ def __init__(self, data: DataManager, model: SegmentationModel): self.data = data self.model = model + def update_data_manager(self, data: DataManager): + self.data = data + def fit(self): """Fit using the model using the data in the data manager.""" features, labels = self.data.get_training_data() From 857ee01f7bb2b43b21ad06cfd87120555f59aa7b Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 8 Apr 2024 17:33:11 -0400 Subject: [PATCH 05/30] Update painting and prediction colormaps --- examples/run_app_copick.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index da53b53..1ff3fbc 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -51,17 +51,17 @@ # TODO update to use root.config.pickable_objects -def get_labels_colormap(): +def get_copick_colormap(): """Return a colormap for distinct label colors based on the pickable objects.""" colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects} - colormap[None] = np.array([1, 1, 1, 1]) # default is white - colormap[0] = np.array([0, 0, 0, 0]) # Add any special cases if needed + colormap[None] = np.array([1, 1, 1, 1]) + colormap[9] = np.array([0, 1, 1, 1]) return colormap -cellcanvas.utils.get_labels_colormap = get_labels_colormap +cellcanvas.utils.get_labels_colormap = get_copick_colormap # Use the function -colormap = get_labels_colormap() +colormap = get_copick_colormap() # TODO set names from copick config # cell_canvas.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects} @@ -234,10 +234,9 @@ def initialize_or_update_cell_canvas(self): # Set colormap # painting_layer.colormap.color_dict # self.app.painting_labels - colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects} - colormap[None] = np.array([1, 1, 1, 1]) - colormap[9] = np.array([0, 1, 1, 1]) + colormap = get_copick_colormap() self.cell_canvas_app.semantic_segmentor.painting_layer.colormap.color_dict = colormap + self.cell_canvas_app.semantic_segmentor.prediction_layer.colormap.color_dict = colormap self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in root.config.pickable_objects] + [9] self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects} @@ -257,3 +256,5 @@ def initialize_or_update_cell_canvas(self): # TODO check scaling between picks and zarrs +# TODO check why painting doesn't work +# check if it is related to scaling From 5e1208690081e80cb7518dffee58a88b37d48328 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 8 Apr 2024 17:40:02 -0400 Subject: [PATCH 06/30] Move hard coded scaling to point data, resolves painting issue --- examples/run_app_copick.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index 1ff3fbc..65ccd59 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -187,18 +187,19 @@ def open_picks(self, picks): for point in points_data['points'] ] - points_array = np.array(points_locations) + # TODO hard coded scaling + points_array = np.array(points_locations) / 10 # Adding the points layer to the viewer, using the pickable_object_name as the layer name pickable_object = [obj for obj in root.config.pickable_objects if obj.name == picks.pickable_object_name][0] - self.viewer.add_points(points_array, name=picks.pickable_object_name, size=100, out_of_slice_display=True, face_color=np.array(pickable_object.color)/255.0) + self.viewer.add_points(points_array, name=picks.pickable_object_name, size=25, out_of_slice_display=True, face_color=np.array(pickable_object.color)/255.0) def open_tomogram(self, tomogram): zarr_store = zarr.open(tomogram.zarr(), mode='r') # TODO extract scale/transform info # TODO scale is hard coded to 10 here - self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.tomo_type}", scale=10) + self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.tomo_type}") def initialize_or_update_cell_canvas(self): # Collect paths from dropdowns @@ -229,7 +230,7 @@ def initialize_or_update_cell_canvas(self): # TODO this has multiple copick specific hardcoded hacks # TODO hardcoded scale factor - self.viewer.layers['Image'].scale = (10, 10, 10) + # self.viewer.layers['Image'].scale = (10, 10, 10) # Set colormap # painting_layer.colormap.color_dict @@ -256,5 +257,6 @@ def initialize_or_update_cell_canvas(self): # TODO check scaling between picks and zarrs -# TODO check why painting doesn't work -# check if it is related to scaling +# TODO check why painting doesn't work when using proper scaling + +# TODO add proper colormap support From 0076c51758fd575666265915ad0134425696b6a5 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Tue, 9 Apr 2024 07:47:30 -0400 Subject: [PATCH 07/30] Kludge to get colormaps working --- examples/run_app_copick.py | 9 ++++---- .../semantic/_embedding_segmentor.py | 23 ++++++++++++++----- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index 65ccd59..7246bd4 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -235,9 +235,7 @@ def initialize_or_update_cell_canvas(self): # Set colormap # painting_layer.colormap.color_dict # self.app.painting_labels - colormap = get_copick_colormap() - self.cell_canvas_app.semantic_segmentor.painting_layer.colormap.color_dict = colormap - self.cell_canvas_app.semantic_segmentor.prediction_layer.colormap.color_dict = colormap + self.cell_canvas_app.semantic_segmentor.set_colormap(get_copick_colormap()) self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in root.config.pickable_objects] + [9] self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects} @@ -259,4 +257,7 @@ def initialize_or_update_cell_canvas(self): # TODO check why painting doesn't work when using proper scaling -# TODO add proper colormap support +# TODO add proper colormap and legend support +# - override exclusion of non-zero labels +# - consistent colormap in the charts +# - consistent colormap in the painted part of the labels image diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index c7949f3..a924a85 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -60,6 +60,7 @@ def __init__( self.extra_logging = extra_logging self.data = data_manager + self.colormap = get_labels_colormap() clf = RandomForestClassifier( n_estimators=50, n_jobs=-1, @@ -99,6 +100,12 @@ def __init__( self.start_computing_embedding_plot() self.update_class_distribution_charts() + def set_colormap(self, colormap): + self.colormap = colormap + + self.prediction_layer.colormap = DirectLabelColormap(color_dict=colormap) + self.painting_layer.colormap = DirectLabelColormap(color_dict=colormap) + def update_data_manager(self, data: DataManager): self.data = data self.segmentation_manager.update_data_manager(data) @@ -139,7 +146,7 @@ def _init_viewer_layers(self): name="Prediction", scale=self.data_layer.scale, opacity=0.1, - colormap=DirectLabelColormap(color_dict=get_labels_colormap()), + colormap=DirectLabelColormap(color_dict=self.colormap), ) # self.painting_data = zarr.open( @@ -154,7 +161,7 @@ def _init_viewer_layers(self): self.painting_data, name="Painting", scale=self.data_layer.scale, - colormap=DirectLabelColormap(color_dict=get_labels_colormap()), + colormap=DirectLabelColormap(color_dict=self.colormap), ) # Set up painting logging @@ -223,7 +230,9 @@ def on_data_change(self, event, app): self.corner_pixels = self.viewer.layers["Image"].corner_pixels # TODO check if this is stalling things - self.painting_labels, self.painting_counts = np.unique( + # TODO recheck this after copick + # self.painting_labels, self.painting_counts = np.unique( + _, self.painting_counts = np.unique( self.painting_data[:], return_counts=True ) @@ -252,7 +261,9 @@ def threaded_on_data_change( self.logger.info(f"Labels data has changed! {event}") # noqa: G004 # Update stats - self.painting_labels, self.painting_counts = np.unique( + # TODO check after copick + # self.painting_labels, self.painting_counts = np.unique( + _, self.painting_counts = np.unique( self.painting_data[:], return_counts=True ) @@ -586,7 +597,7 @@ def update_class_distribution_charts(self): # Example class to color mapping class_color_mapping = { label: f"#{int(rgba[0] * 255):02x}{int(rgba[1] * 255):02x}{int(rgba[2] * 255):02x}" - for label, rgba in get_labels_colormap().items() + for label, rgba in self.colormap.items() } self.widget.figure.clear() @@ -774,7 +785,7 @@ def create_embedding_plot(self, result): label: "#{:02x}{:02x}{:02x}".format( int(rgba[0] * 255), int(rgba[1] * 255), int(rgba[2] * 255) ) - for label, rgba in get_labels_colormap().items() + for label, rgba in self.colormap.items() } # Convert filtered_labels to a list of colors for each point From 6f0a0185da7572f07620b6aed541219201a76bbc Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Fri, 12 Apr 2024 15:38:46 -0400 Subject: [PATCH 08/30] Add support for toggling layers --- examples/run_app_copick.py | 90 ++++++++++++------- src/cellcanvas/data/data_manager.py | 1 - .../semantic/_embedding_segmentor.py | 5 +- 3 files changed, 63 insertions(+), 33 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index 7246bd4..1a4b2b4 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -42,7 +42,7 @@ # Project root -root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/working/demo_project/copick_config_kyle.json") +root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json") ## Root API root.config # CopickConfig object @@ -83,8 +83,14 @@ def __init__(self, viewer: napari.Viewer, root): # Dropdowns for each data layer self.dropdowns = {} + self.layer_buttons = {} for layer in ["image", "features", "painting", "prediction"]: - layout.addWidget(QLabel(f"{layer.capitalize()} Path:")) + # Make layer button + button = QPushButton(f"Select {layer.capitalize()} Layer") + button.clicked.connect(lambda checked, layer=layer: self.activate_layer(layer)) + layout.addWidget(button) + self.layer_buttons[layer] = button + # Make layer selection dropdown self.dropdowns[layer] = QComboBox() layout.addWidget(self.dropdowns[layer]) @@ -123,6 +129,20 @@ def populate_tree(self): tomo_item = QTreeWidgetItem(child_item, [f"Tomogram: {tomogram.tomo_type}"]) tomo_item.setData(0, Qt.UserRole, tomogram) + def activate_layer(self, layer): + print(f"Activating layer {layer}") + if layer == "image": + layer = self.cell_canvas_app.semantic_segmentor.data_layer + elif layer == "painting": + layer = self.cell_canvas_app.semantic_segmentor.painting_layer + elif layer == "prediction": + layer = self.cell_canvas_app.semantic_segmentor.prediction_layer + else: + return + layer.visible = True + layer.editable = True + self.viewer.layers.selection.active = layer + def on_run_clicked(self, item, column): data = item.data(0, Qt.UserRole) if not isinstance(data, copick.impl.filesystem.CopickRunFSSpec): @@ -137,36 +157,35 @@ def on_run_clicked(self, item, column): dropdown.clear() # Find VoxelSpacing directories - voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing*")) + # TODO hardcoded to match spacing = 10 + voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*")) + + if not voxel_spacing_dirs: # Check if at least one VoxelSpacing directory was found + print(f"No Voxel Spacing directories found in {static_path}. Please check the directory structure.") + return + self.voxel_spacing_dir = voxel_spacing_dirs[0] + for voxel_spacing_dir in voxel_spacing_dirs: # Find all Zarr datasets within the voxel spacing directory - zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*_features.zarr")) - - for dataset_path in zarr_datasets: - # Check for the existence of 'embedding' directory within each features zarr - embedding_path = os.path.join(dataset_path, "*", "embedding") - embedding_dirs = glob.glob(embedding_path) - - for embedding_dir in embedding_dirs: - # Assuming 'embedding' is the desired path for features - self.dropdowns["features"].addItem(embedding_dir) - zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) + + # Filtering the paths for each dropdown category for dataset_path in zarr_datasets: - # For image paths, directly add non-features zarr datasets to the image dropdown - if not "features" in os.path.basename(dataset_path).lower(): - self.dropdowns["image"].addItem(dataset_path) + dataset_name = os.path.basename(dataset_path) + if "_features.zarr" in dataset_name.lower(): + self.dropdowns["features"].addItem(dataset_name, dataset_path) + elif "painting.zarr" in dataset_name.lower(): + self.dropdowns["painting"].addItem(dataset_name, dataset_path) + elif "prediction.zarr" in dataset_name.lower(): + self.dropdowns["prediction"].addItem(dataset_name, dataset_path) + else: + # This is for the image dropdown, excluding features, painting, and prediction zarr files + self.dropdowns["image"].addItem(dataset_name, dataset_path) # Set defaults for painting and prediction layers, assuming they follow a fixed naming convention # and are expected to be located in a specific VoxelSpacing directory, adjusting as necessary - if voxel_spacing_dirs: # Check if at least one VoxelSpacing directory was found - base_voxel_dir = voxel_spacing_dirs[0] # Assuming to use the first found directory for default paths - self.dropdowns["painting"].addItem(os.path.join(base_voxel_dir, "painting.zarr")) - self.dropdowns["prediction"].addItem(os.path.join(base_voxel_dir, "prediction.zarr")) - else: - print("No Voxel Spacing directories found. Please check the directory structure.") def on_item_clicked(self, item, column): @@ -204,16 +223,19 @@ def open_tomogram(self, tomogram): def initialize_or_update_cell_canvas(self): # Collect paths from dropdowns paths = {layer: dropdown.currentText() for layer, dropdown in self.dropdowns.items()} + + if not paths["image"] or not paths["features"]: + print("Please ensure image and feature paths are selected before initializing/updating CellCanvas.") + return - if not all(paths.values()): - print("Please ensure all paths are selected before initializing/updating CellCanvas.") - return - + default_painting_path = os.path.join(self.voxel_spacing_dir, "painting_001.zarr") + default_prediction_path = os.path.join(self.voxel_spacing_dir, "prediction_001.zarr") + dataset = DataSet.from_paths( - image_path=f"{paths['image']}/0", - features_path=paths["features"], - labels_path=paths["painting"], - segmentation_path=paths["prediction"], + image_path=os.path.join(self.voxel_spacing_dir, f"{paths['image']}/0"), + features_path=os.path.join(self.voxel_spacing_dir, paths["features"]), + labels_path=default_painting_path if not paths["painting"] else os.path.join(self.voxel_spacing_dir, paths["painting"]), + segmentation_path=default_prediction_path if not paths["prediction"] else os.path.join(self.voxel_spacing_dir, paths["prediction"]), make_missing_datasets=True, ) @@ -243,6 +265,11 @@ def initialize_or_update_cell_canvas(self): self.cell_canvas_app.semantic_segmentor.widget.setupLegend() viewer = napari.Viewer() + +# Hide layer list and controls +# viewer.window.qt_viewer.dockLayerList.setVisible(False) +# viewer.window.qt_viewer.dockLayerControls.setVisible(False) + copick_explorer_widget = NapariCopickExplorer(viewer, root) viewer.window.add_dock_widget(copick_explorer_widget, name="Copick Explorer", area="left") @@ -261,3 +288,4 @@ def initialize_or_update_cell_canvas(self): # - override exclusion of non-zero labels # - consistent colormap in the charts # - consistent colormap in the painted part of the labels image + diff --git a/src/cellcanvas/data/data_manager.py b/src/cellcanvas/data/data_manager.py index 3f99ac6..44e4f9f 100644 --- a/src/cellcanvas/data/data_manager.py +++ b/src/cellcanvas/data/data_manager.py @@ -33,7 +33,6 @@ def get_training_data(self) -> Tuple[Array, Array]: for dataset in self.datasets: dataset_features = da.asarray(dataset.concatenated_features) dataset_labels = da.asarray(dataset.labels) - # Flatten labels for boolean indexing flattened_labels = dataset_labels.flatten() diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index a924a85..8325a0c 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -104,7 +104,8 @@ def set_colormap(self, colormap): self.colormap = colormap self.prediction_layer.colormap = DirectLabelColormap(color_dict=colormap) - self.painting_layer.colormap = DirectLabelColormap(color_dict=colormap) + self.painting_layer.colormap = DirectLabelColormap(color_dict=colormap) + self.update_class_distribution_charts() def update_data_manager(self, data: DataManager): self.data = data @@ -117,6 +118,7 @@ def update_data_manager(self, data: DataManager): self.features = self.data.datasets[0].features # TODO remove old layers + self.viewer.layers.clear() self._init_viewer_layers() def reshape_features(self, arr): @@ -133,6 +135,7 @@ def _init_viewer_layers(self): self.data_layer = self.viewer.add_image( self.image_data, name="Image", projection_mode="mean" ) + self.data_layer._keep_auto_contrast = True # self.prediction_data = zarr.open( # f"{self.zarr_path}/prediction", # mode="a", From d143192c855b94aac4588420543601a75f9d1b27 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Fri, 12 Apr 2024 17:10:49 -0400 Subject: [PATCH 09/30] Train and predict on all tomograms in a copick project --- examples/run_app_copick.py | 133 ++++++++++++++++++ .../semantic/_embedding_segmentor.py | 1 + 2 files changed, 134 insertions(+) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index 1a4b2b4..d06366a 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -40,6 +40,13 @@ from qtpy.QtCore import Qt import glob # For pattern matching of file names +from sklearn.ensemble import RandomForestClassifier + +from cellcanvas.semantic.segmentation_manager import ( + SemanticSegmentationManager, +) + +import dask.array as da # Project root root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json") @@ -81,6 +88,15 @@ def __init__(self, viewer: napari.Viewer, root): layout = QVBoxLayout() self.setLayout(layout) + # Adding new buttons for "Fit on all" and "Predict for all" + self.fit_all_button = QPushButton("Fit on all") + self.fit_all_button.clicked.connect(self.fit_on_all) + layout.addWidget(self.fit_all_button) + + self.predict_all_button = QPushButton("Predict for all") + self.predict_all_button.clicked.connect(self.predict_for_all) + layout.addWidget(self.predict_all_button) + # Dropdowns for each data layer self.dropdowns = {} self.layer_buttons = {} @@ -143,6 +159,123 @@ def activate_layer(self, layer): layer.editable = True self.viewer.layers.selection.active = layer + def get_complete_data_manager(self): + datasets = [] + for run in self.root.runs: + static_path = run.static_path + # Assume there is a method to get the default voxel spacing directory for each run + voxel_spacing_dir = self.get_default_voxel_spacing_directory(static_path) + + if not voxel_spacing_dir: + print(f"No Voxel Spacing directory found for run {run.name}.") + continue + + # Get all Zarr datasets within the voxel spacing directory + zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) + + # Initialize paths + image_path = None + features_path = None + painting_path = os.path.join(voxel_spacing_dir, "painting_001.zarr") + prediction_path = os.path.join(voxel_spacing_dir, "prediction_001.zarr") + + # Assign paths based on dataset names + for dataset_path in zarr_datasets: + dataset_name = os.path.basename(dataset_path) + if "_features.zarr" in dataset_name.lower(): + features_path = dataset_path + elif "painting" in dataset_name.lower(): + painting_path = dataset_path + elif "prediction" in dataset_name.lower(): + prediction_path = dataset_path + else: + image_path = dataset_path + + # Assume each dataset should be loaded with a specific method that may also handle missing datasets + if image_path and features_path: + # TODO remove hack for highest resolution + dataset = DataSet.from_paths( + image_path=os.path.join(image_path, "0"), + features_path=features_path, + labels_path=painting_path, + segmentation_path=prediction_path, + make_missing_datasets=True + ) + datasets.append(dataset) + + # Create a new data manager with all datasets + return DataManager(datasets=datasets) + + def get_default_voxel_spacing_directory(self, static_path): + # Find VoxelSpacing directories, assuming a hard coded match for now + voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*")) + if voxel_spacing_dirs: + return voxel_spacing_dirs[0] + return None + + def fit_on_all(self): + print("Fitting all models to the selected dataset.") + + data_manager = self.get_complete_data_manager() + + clf = RandomForestClassifier( + n_estimators=50, + n_jobs=-1, + max_depth=10, + max_samples=0.05, + ) + + segmentation_manager = SemanticSegmentationManager( + data=data_manager, model=clf + ) + segmentation_manager.fit() + + # TODO this is bad + self.cell_canvas_app.semantic_segmentor.segmentation_manager = segmentation_manager + + def predict_for_all(self): + print("Running predictions on all datasets.") + + # Check if segmentation manager is properly initialized + if not hasattr(self.cell_canvas_app.semantic_segmentor, 'segmentation_manager') or self.cell_canvas_app.semantic_segmentor.segmentation_manager is None: + print("Segmentation manager is not initialized.") + return + + # Retrieve the complete data manager that includes all runs + data_manager = self.get_complete_data_manager() + + # Iterate through each dataset within the data manager + for dataset in data_manager.datasets: + dataset_features = da.asarray(dataset.concatenated_features) + chunk_shape = dataset_features.chunksize + shape = dataset_features.shape + dtype = dataset_features.dtype + + # Iterate over chunks + for z in range(0, shape[1], chunk_shape[1]): + for y in range(0, shape[2], chunk_shape[2]): + for x in range(0, shape[3], chunk_shape[3]): + # Compute the slice for the current chunk + # in feature,z,y,x order + chunk_slice = ( + slice(None), + slice(z, min(z + chunk_shape[1], shape[1])), + slice(y, min(y + chunk_shape[2], shape[2])), + slice(x, min(x + chunk_shape[3], shape[3])), + ) + print(f"Predicting on chunk {chunk_slice}") + + # Extract the current chunk + chunk = dataset_features[chunk_slice].compute() + + # Predict on the chunk (adding 1 to each prediction) + predicted_chunk = self.cell_canvas_app.semantic_segmentor.segmentation_manager.predict(chunk) + 1 + + # Write the prediction to the corresponding region in the Zarr array + dataset.segmentation[chunk_slice[1:]] = predicted_chunk + + print(f"Predictions written") + def on_run_clicked(self, item, column): data = item.data(0, Qt.UserRole) if not isinstance(data, copick.impl.filesystem.CopickRunFSSpec): diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 8325a0c..09733e2 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -136,6 +136,7 @@ def _init_viewer_layers(self): self.image_data, name="Image", projection_mode="mean" ) self.data_layer._keep_auto_contrast = True + self.data_layer.refresh() # self.prediction_data = zarr.open( # f"{self.zarr_path}/prediction", # mode="a", From 723c53acf657f09d449deed9df54d87c88c1d75f Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 15 Apr 2024 12:20:26 -0400 Subject: [PATCH 10/30] Activate label when clicked on in the legend --- examples/run_app_copick.py | 3 ++- .../semantic/_embedding_segmentor.py | 20 ++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index d06366a..4a24b0f 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -363,7 +363,8 @@ def initialize_or_update_cell_canvas(self): default_painting_path = os.path.join(self.voxel_spacing_dir, "painting_001.zarr") default_prediction_path = os.path.join(self.voxel_spacing_dir, "prediction_001.zarr") - + + # TODO note this is hard coded to use the highest resolution of a multiscale zarr dataset = DataSet.from_paths( image_path=os.path.join(self.voxel_spacing_dir, f"{paths['image']}/0"), features_path=os.path.join(self.voxel_spacing_dir, paths["features"]), diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 09733e2..a3b8bed 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -19,7 +19,7 @@ from napari.qt.threading import thread_worker from napari.utils import DirectLabelColormap from psygnal import debounced -from qtpy.QtCore import Qt +from qtpy.QtCore import Qt, Signal from qtpy.QtGui import QColor, QPainter, QPixmap from qtpy.QtWidgets import ( QCheckBox, @@ -903,6 +903,15 @@ def paint_thread(self, lasso_path, target_label): # print(f"Painted {np.sum(contained)} pixels with label {target_label}") +class ClickableLabel(QLabel): + clicked = Signal(int) # Emits the label ID + + def __init__(self, label_id, *args, **kwargs): + super().__init__(*args, **kwargs) + self.label_id = label_id + + def mousePressEvent(self, event): + self.clicked.emit(self.label_id) class EmbeddingPaintingWidget(QWidget): def __init__(self, app, parent=None): @@ -1096,7 +1105,7 @@ def setupLegend(self): color = painting_layer.colormap.color_dict[label_id] # Create a QLabel for color swatch - color_swatch = QLabel() + color_swatch = ClickableLabel(label_id) pixmap = QPixmap(16, 16) if color is None: @@ -1105,6 +1114,7 @@ def setupLegend(self): pixmap.fill(QColor(*[int(c * 255) for c in color])) color_swatch.setPixmap(pixmap) + color_swatch.clicked.connect(self.activateLabel) # Update the mapping with new classes or use the existing name if label_id not in self.class_labels_mapping: @@ -1117,7 +1127,7 @@ def setupLegend(self): label_edit = QLineEdit(label_name) # Highlight the label if it is currently being used - if label_id == painting_layer._selected_label: + if label_id == painting_layer.selected_label: self.highlightLabel(label_edit) # Save changes to class labels back to the mapping @@ -1139,6 +1149,10 @@ def setupLegend(self): self.legend_placeholder_index, self.legend_group ) + def activateLabel(self, label_id): + painting_layer = self.app.get_painting_layer() + painting_layer.selected_label = label_id + def updateLegendHighlighting(self, selected_label_event): """Update highlighting of legend""" current_label_id = selected_label_event.source._selected_label From 6fab1cec44942af9d6c94de4174c9fd09ae60314 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 15 Apr 2024 13:02:43 -0400 Subject: [PATCH 11/30] Update prediction to account for chunkwise predictions --- examples/run_app_copick.py | 38 ++++++++++++++----- .../semantic/_embedding_segmentor.py | 3 -- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index 4a24b0f..e6cc443 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -213,6 +213,12 @@ def get_default_voxel_spacing_directory(self, static_path): return voxel_spacing_dirs[0] return None + def get_segmentations_directory(self, static_path): + segmentation_dir = glob.glob(os.path.join(static_path, "Segmentations")) + if segmentation_dir: + return segmentation_dir[0] + return None + def fit_on_all(self): print("Fitting all models to the selected dataset.") @@ -290,7 +296,7 @@ def on_run_clicked(self, item, column): dropdown.clear() # Find VoxelSpacing directories - # TODO hardcoded to match spacing = 10 + # TODO hardcoded to match spacing = 10 voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*")) if not voxel_spacing_dirs: # Check if at least one VoxelSpacing directory was found @@ -361,17 +367,29 @@ def initialize_or_update_cell_canvas(self): print("Please ensure image and feature paths are selected before initializing/updating CellCanvas.") return - default_painting_path = os.path.join(self.voxel_spacing_dir, "painting_001.zarr") - default_prediction_path = os.path.join(self.voxel_spacing_dir, "prediction_001.zarr") + # TODO put these into the segmentations directory + segmentation_dir = self.get_segmentations_directory(self.selected_run.static_path) + + voxel_spacing = 10 + + # Ensure segmentations directory exists + os.makedirs(segmentation_dir, exist_ok=True) + + default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr') + default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr') # TODO note this is hard coded to use the highest resolution of a multiscale zarr - dataset = DataSet.from_paths( - image_path=os.path.join(self.voxel_spacing_dir, f"{paths['image']}/0"), - features_path=os.path.join(self.voxel_spacing_dir, paths["features"]), - labels_path=default_painting_path if not paths["painting"] else os.path.join(self.voxel_spacing_dir, paths["painting"]), - segmentation_path=default_prediction_path if not paths["prediction"] else os.path.join(self.voxel_spacing_dir, paths["prediction"]), - make_missing_datasets=True, - ) + try: + dataset = DataSet.from_paths( + image_path=os.path.join(self.voxel_spacing_dir, f"{paths['image']}/0"), + features_path=os.path.join(self.voxel_spacing_dir, paths["features"]), + labels_path=default_painting_path if not paths["painting"] else os.path.join(self.voxel_spacing_dir, paths["painting"]), + segmentation_path=default_prediction_path if not paths["prediction"] else os.path.join(self.voxel_spacing_dir, paths["prediction"]), + make_missing_datasets=True, + ) + except FileNotFoundError: + print(f"File {path} not found!", file=sys.stderr) + return data_manager = DataManager(datasets=[dataset]) diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index a3b8bed..2a99a1e 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -468,9 +468,6 @@ def on_prediction_completed(self, result): self.prediction_labels = prediction_labels self.prediction_counts = prediction_counts - self.get_prediction_layer().data = self.prediction_data.reshape( - self.get_prediction_layer().data.shape - ) self.get_prediction_layer().refresh() self.update_class_distribution_charts() From e063f0f7904e715815346110be93effef1b21d11 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 15 Apr 2024 13:35:25 -0400 Subject: [PATCH 12/30] Add support for labels with no annotations --- .../semantic/_embedding_segmentor.py | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 2a99a1e..6f1462e 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -519,16 +519,23 @@ def update_class_distribution_charts(self): else 1 ) - painting_counts = ( - self.painting_counts - if self.painting_counts is not None - else np.array([0]) - ) - painting_labels = ( - self.painting_labels - if self.painting_labels is not None - else np.array([0]) - ) + # Initialize counts for all labels in painting_labels with zero + if self.painting_labels is not None: + unique_labels = np.unique(self.painting_labels) + painting_counts_dict = {label: 0 for label in unique_labels} + else: + unique_labels = np.array([0]) + painting_counts_dict = {0: 0} + + # Update counts from existing painting_counts if available + if self.painting_counts is not None and self.painting_labels is not None: + for label, count in zip(self.painting_labels, self.painting_counts): + painting_counts_dict[label] = count + + # Create arrays from the dictionary + painting_labels = np.array(list(painting_counts_dict.keys())) + painting_counts = np.array(list(painting_counts_dict.values())) + prediction_counts = ( self.prediction_counts if self.prediction_counts is not None From 59bda09b4cb86f7a4cbd0f42b4a3610b56c0de0c Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 15 Apr 2024 17:01:19 -0400 Subject: [PATCH 13/30] Add support for importing models --- .../semantic/_embedding_segmentor.py | 54 ++++++++++++++++--- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 6f1462e..0868402 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -47,6 +47,8 @@ ) from cellcanvas.utils import get_labels_colormap, paint_maker +import xgboost as xgb + ACTIVE_BUTTON_COLOR = "#AF8B38" @@ -61,11 +63,22 @@ def __init__( self.extra_logging = extra_logging self.data = data_manager self.colormap = get_labels_colormap() - clf = RandomForestClassifier( - n_estimators=50, - n_jobs=-1, - max_depth=10, - max_samples=0.05, + # clf = RandomForestClassifier( + # n_estimators=25, + # n_jobs=-1, + # max_depth=10, + # max_samples=0.05, + # max_features='sqrt', + # class_weight='balanced' + # ) + + clf = xgb.XGBClassifier( + objective='multi:softmax', + num_class=10, # Specify number of classes if using softmax + n_estimators=200, + max_depth=20, + learning_rate=0.1, + scale_pos_weight='balanced' # For handling imbalance ) self.segmentation_manager = SemanticSegmentationManager( data=self.data, model=clf @@ -160,7 +173,8 @@ def _init_viewer_layers(self): # dtype="i4", # dimension_separator=".", # ) - self.painting_data = self.data.datasets[0].labels + + self.painting_data = self.data.datasets[0].labels.astype(int) self.painting_layer = self.viewer.add_labels( self.painting_data, name="Painting", @@ -349,13 +363,14 @@ def update_model(self, model_type): if filtered_labels.size == 0: self.logger.info("No labels present. Skipping model update.") return None - + # Calculate class weights unique_labels = np.unique(filtered_labels) class_weights = compute_class_weight( "balanced", classes=unique_labels, y=filtered_labels ) weight_dict = dict(zip(unique_labels, class_weights)) + self.logger.info(f"Class balance calculated {class_weights}") # Apply weights # sample_weights = np.vectorize(weight_dict.get)(filtered_labels) @@ -370,6 +385,8 @@ def update_model(self, model_type): class_weight=weight_dict, ) self.segmentation_manager.model = clf + # self.segmentation_manager.fit() + self.logger.info(f"Starting model fitting") self.segmentation_manager.fit() return self.segmentation_manager.model elif model_type == "XGBoost": @@ -984,10 +1001,16 @@ def initUI(self): live_pred_layout.addWidget(self.live_pred_button) controls_layout.addLayout(live_pred_layout) + # Export model self.export_model_button = QPushButton("Export Model") controls_layout.addWidget(self.export_model_button) self.export_model_button.clicked.connect(self.export_model) + # Import model + self.import_model_button = QPushButton("Import Model") + controls_layout.addWidget(self.import_model_button) + self.import_model_button.clicked.connect(self.import_model) + controls_group.setLayout(controls_layout) main_layout.addWidget(controls_group) @@ -1061,6 +1084,23 @@ def export_model(self): self, "Model Export", "No model available to export." ) + def import_model(self): + filePath, _ = QFileDialog.getOpenFileName( + self, "Open Model", "", "Joblib Files (*.joblib)" + ) + if filePath: + try: + model = joblib.load(filePath) + self.app.model = model + QMessageBox.information( + self, "Model Import", "Model imported successfully!" + ) + print(f"Loaded model file from: {filePath}") + except Exception as e: + QMessageBox.warning( + self, "Model Import", f"Failed to import model. Error: {str(e)}" + ) + def change_embedding_label_color(self, color): """Change the background color of the embedding label.""" self.embedding_label.setStyleSheet(f"background-color: {color};") From c9dbd233f7879997d03d587e8cebab84cf13457a Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Wed, 17 Apr 2024 06:54:38 -0400 Subject: [PATCH 14/30] Better multiscale support, better UI for fit/predict on al --- examples/run_app_copick.py | 121 +++++++++++++++--- src/cellcanvas/data/data_manager.py | 1 + src/cellcanvas/data/data_set.py | 8 +- .../semantic/_embedding_segmentor.py | 6 +- .../semantic/segmentation_manager.py | 23 +++- src/cellcanvas/utils.py | 3 + 6 files changed, 141 insertions(+), 21 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index e6cc443..ed2d633 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -30,7 +30,10 @@ from cellcanvas._app.main_app import CellCanvasApp, QtCellCanvas from cellcanvas.data.data_manager import DataManager from cellcanvas.data.data_set import DataSet +from napari.qt.threading import thread_worker +import sys +import logging import json import copick from copick.impl.filesystem import CopickRootFSSpec @@ -45,6 +48,7 @@ from cellcanvas.semantic.segmentation_manager import ( SemanticSegmentationManager, ) +from cellcanvas.utils import get_active_button_color import dask.array as da @@ -88,6 +92,8 @@ def __init__(self, viewer: napari.Viewer, root): layout = QVBoxLayout() self.setLayout(layout) + self._init_logging() + # Adding new buttons for "Fit on all" and "Predict for all" self.fit_all_button = QPushButton("Fit on all") self.fit_all_button.clicked.connect(self.fit_on_all) @@ -122,6 +128,17 @@ def __init__(self, viewer: napari.Viewer, root): self.populate_tree() + def _init_logging(self): + self.logger = logging.getLogger("cellcanvas") + self.logger.setLevel(logging.DEBUG) + streamHandler = logging.StreamHandler(sys.stdout) + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + streamHandler.setFormatter(formatter) + self.logger.addHandler(streamHandler) + + def populate_tree(self): for run in self.root.runs: run_item = QTreeWidgetItem(self.tree, [run.name]) @@ -214,14 +231,32 @@ def get_default_voxel_spacing_directory(self, static_path): return None def get_segmentations_directory(self, static_path): - segmentation_dir = glob.glob(os.path.join(static_path, "Segmentations")) - if segmentation_dir: - return segmentation_dir[0] - return None + segmentation_dir = os.path.join(static_path, "Segmentations") + return segmentation_dir + def change_button_color(self, button, color): + button.setStyleSheet(f"background-color: {color};") + + def reset_button_color(self, button): + self.change_button_color(button, "") + def fit_on_all(self): + if not self.cell_canvas_app: + print("Initialize cell canvas first") + return + print("Fitting all models to the selected dataset.") + self.change_button_color( + self.fit_all_button, get_active_button_color() + ) + + self.model_fit_worker = self.threaded_fit_on_all() + self.model_fit_worker.returned.connect(self.on_model_fit_completed) + self.model_fit_worker.start() + + @thread_worker + def threaded_fit_on_all(self): data_manager = self.get_complete_data_manager() clf = RandomForestClassifier( @@ -236,10 +271,39 @@ def fit_on_all(self): ) segmentation_manager.fit() - # TODO this is bad - self.cell_canvas_app.semantic_segmentor.segmentation_manager = segmentation_manager + return segmentation_manager + + def on_model_fit_completed(self, segmentation_manager): + self.logger.debug("on_model_fit_completed") + + self.cell_canvas_app.semantic_segmentor.segmentation_manager = segmentation_manager + + # Reset color + self.reset_button_color(self.fit_all_button) def predict_for_all(self): + if not self.cell_canvas_app: + print("Initialize cell canvas first") + return + + print("Fitting all models to the selected dataset.") + + self.change_button_color( + self.predict_all_button, get_active_button_color() + ) + + self.predict_worker = self.threaded_predict_for_all() + self.predict_worker.returned.connect(self.on_predict_completed) + self.predict_worker.start() + + def on_predict_completed(self, result): + self.logger.debug("on_predict_completed") + + # Reset color + self.reset_button_color(self.predict_all_button) + + @thread_worker + def threaded_predict_for_all(self): print("Running predictions on all datasets.") # Check if segmentation manager is properly initialized @@ -290,6 +354,7 @@ def on_run_clicked(self, item, column): self.selected_run = data static_path = self.selected_run.static_path + self.logger.info(f"Selected {static_path}") # Clear existing items for dropdown in self.dropdowns.values(): @@ -298,13 +363,14 @@ def on_run_clicked(self, item, column): # Find VoxelSpacing directories # TODO hardcoded to match spacing = 10 voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*")) + segmentation_dir = self.get_segmentations_directory(static_path) if not voxel_spacing_dirs: # Check if at least one VoxelSpacing directory was found print(f"No Voxel Spacing directories found in {static_path}. Please check the directory structure.") return + # First handle image and features self.voxel_spacing_dir = voxel_spacing_dirs[0] - for voxel_spacing_dir in voxel_spacing_dirs: # Find all Zarr datasets within the voxel spacing directory zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) @@ -314,19 +380,33 @@ def on_run_clicked(self, item, column): dataset_name = os.path.basename(dataset_path) if "_features.zarr" in dataset_name.lower(): self.dropdowns["features"].addItem(dataset_name, dataset_path) - elif "painting.zarr" in dataset_name.lower(): - self.dropdowns["painting"].addItem(dataset_name, dataset_path) - elif "prediction.zarr" in dataset_name.lower(): - self.dropdowns["prediction"].addItem(dataset_name, dataset_path) else: # This is for the image dropdown, excluding features, painting, and prediction zarr files self.dropdowns["image"].addItem(dataset_name, dataset_path) - # Set defaults for painting and prediction layers, assuming they follow a fixed naming convention - # and are expected to be located in a specific VoxelSpacing directory, adjusting as necessary + # Find all Zarr datasets within the Segmentations directory + os.makedirs(segmentation_dir, exist_ok=True) + zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr")) - + voxel_spacing = 10 + session_id = 0 + + default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_{session_id}_all-multilabel.zarr') + default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_{session_id}_all-multilabel.zarr') + + self.dropdowns["painting"].addItem(os.path.basename(default_painting_path), default_painting_path) + self.dropdowns["prediction"].addItem(os.path.basename(default_prediction_path), default_prediction_path) + + # Filtering the paths for each dropdown category + for dataset_path in zarr_datasets: + dataset_name = os.path.basename(dataset_path) + # Do not add painting or prediction to prediction or painting respectively + if "painting" not in dataset_name.lower(): + self.dropdowns["prediction"].addItem(dataset_name, dataset_path) + if "prediction" not in dataset_name.lower(): + self.dropdowns["painting"].addItem(dataset_name, dataset_path) + def on_item_clicked(self, item, column): data = item.data(0, Qt.UserRole) if data: @@ -334,10 +414,12 @@ def on_item_clicked(self, item, column): self.open_picks(data) elif isinstance(data, copick.impl.filesystem.CopickTomogramFSSpec): self.open_tomogram(data) + elif isinstance(data, copick.models.CopickSegmentation): + self.open_labels(data) def open_picks(self, picks): with open(picks.path, 'r') as f: - points_data = json.load(f) + points_data = json.load(f) # Extracting points locations points_locations = [ @@ -354,11 +436,20 @@ def open_picks(self, picks): def open_tomogram(self, tomogram): zarr_store = zarr.open(tomogram.zarr(), mode='r') + print(f"open_tomogram {tomogram.zarr()}") # TODO extract scale/transform info # TODO scale is hard coded to 10 here self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.tomo_type}") + def open_labels(self, tomogram): + zarr_store = zarr.open(tomogram.zarr(), mode='r') + print(f"open_labels {tomogram.zarr()}") + # TODO extract scale/transform info + + # TODO scale is hard coded to 10 here + self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.name}") + def initialize_or_update_cell_canvas(self): # Collect paths from dropdowns paths = {layer: dropdown.currentText() for layer, dropdown in self.dropdowns.items()} diff --git a/src/cellcanvas/data/data_manager.py b/src/cellcanvas/data/data_manager.py index 44e4f9f..e114816 100644 --- a/src/cellcanvas/data/data_manager.py +++ b/src/cellcanvas/data/data_manager.py @@ -16,6 +16,7 @@ def __init__(self, datasets: Optional[List[DataSet]] = None): datasets = [datasets] self.datasets = SelectableEventedList(datasets) + # Normal version def get_training_data(self) -> Tuple[Array, Array]: """Get the pixel-wise semantic segmentation training data for datasets. diff --git a/src/cellcanvas/data/data_set.py b/src/cellcanvas/data/data_set.py index f61b423..117c42f 100644 --- a/src/cellcanvas/data/data_set.py +++ b/src/cellcanvas/data/data_set.py @@ -6,6 +6,8 @@ import zarr from zarr import Array +from ome_zarr.io import ZarrLocation +from ome_zarr.reader import Multiscales @dataclass class DataSet: @@ -62,7 +64,11 @@ def from_paths( dimension_separator=".", ) else: - labels = zarr.open(labels_path, "a") + if Multiscales.matches(ZarrLocation(labels_path)): + labels = zarr.open(os.path.join(labels_path, "0"), + "a") + else: + labels = zarr.open(labels_path, "a") # get the segmentation if (not os.path.isdir(segmentation_path)) and make_missing_datasets: diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 0868402..29450de 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -173,8 +173,8 @@ def _init_viewer_layers(self): # dtype="i4", # dimension_separator=".", # ) - - self.painting_data = self.data.datasets[0].labels.astype(int) + self.painting_data = self.data.datasets[0].labels + # .data.astype("i4") self.painting_layer = self.viewer.add_labels( self.painting_data, name="Painting", @@ -510,7 +510,6 @@ def start_model_fit(self): self.model_fit_worker = self.model_fit_thread(self.get_model_type()) self.model_fit_worker.returned.connect(self.on_model_fit_completed) - # TODO update UI to indicate that model training has started self.model_fit_worker.start() def on_model_fit_completed(self, model): @@ -1196,6 +1195,7 @@ def setupLegend(self): def activateLabel(self, label_id): painting_layer = self.app.get_painting_layer() painting_layer.selected_label = label_id + self.updateLegendHighlighting() def updateLegendHighlighting(self, selected_label_event): """Update highlighting of legend""" diff --git a/src/cellcanvas/semantic/segmentation_manager.py b/src/cellcanvas/semantic/segmentation_manager.py index 3d246ec..09e5d67 100644 --- a/src/cellcanvas/semantic/segmentation_manager.py +++ b/src/cellcanvas/semantic/segmentation_manager.py @@ -1,12 +1,14 @@ from typing import Protocol +import sys +import logging import numpy as np import dask.array as da from dask import delayed from sklearn.exceptions import NotFittedError from cellcanvas.data.data_manager import DataManager - +from tqdm import tqdm class SegmentationModel(Protocol): """Protocol for semantic segmentations models that are @@ -22,14 +24,31 @@ def __init__(self, data: DataManager, model: SegmentationModel): self.data = data self.model = model + self._init_logging() + + def _init_logging(self): + self.logger = logging.getLogger("cellcanvas") + self.logger.setLevel(logging.DEBUG) + streamHandler = logging.StreamHandler(sys.stdout) + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + streamHandler.setFormatter(formatter) + self.logger.addHandler(streamHandler) + def update_data_manager(self, data: DataManager): self.data = data - + def fit(self): """Fit using the model using the data in the data manager.""" + self.logger.info("Starting to fit") + # Get training data from the data manager features, labels = self.data.get_training_data() + features_computed, labels_computed = features.compute(), labels.compute() + self.logger.info("Starting the actual model fit") + self.model.fit(features_computed, labels_computed) def predict(self, feature_image): diff --git a/src/cellcanvas/utils.py b/src/cellcanvas/utils.py index fbc7211..338ee7e 100644 --- a/src/cellcanvas/utils.py +++ b/src/cellcanvas/utils.py @@ -78,3 +78,6 @@ def paint(self, coord, new_label, refresh=True): ) return paint + +def get_active_button_color(): + return "#AF8B38" From e5c106dc7a10bcbc6664561856395683a9b6706e Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Wed, 17 Apr 2024 10:24:32 -0400 Subject: [PATCH 15/30] Fix default painting/prediction path, remove settings from UI --- examples/run_app_copick.py | 4 +- .../semantic/_embedding_segmentor.py | 74 ++++++++----------- src/cellcanvas/utils.py | 6 +- 3 files changed, 36 insertions(+), 48 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index ed2d633..f3ad0cd 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -474,8 +474,8 @@ def initialize_or_update_cell_canvas(self): dataset = DataSet.from_paths( image_path=os.path.join(self.voxel_spacing_dir, f"{paths['image']}/0"), features_path=os.path.join(self.voxel_spacing_dir, paths["features"]), - labels_path=default_painting_path if not paths["painting"] else os.path.join(self.voxel_spacing_dir, paths["painting"]), - segmentation_path=default_prediction_path if not paths["prediction"] else os.path.join(self.voxel_spacing_dir, paths["prediction"]), + labels_path=default_painting_path if not paths["painting"] else os.path.join(segmentation_dir, paths["painting"]), + segmentation_path=default_prediction_path if not paths["prediction"] else os.path.join(segmentation_dir, paths["prediction"]), make_missing_datasets=True, ) except FileNotFoundError: diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 29450de..1863a1e 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -19,7 +19,7 @@ from napari.qt.threading import thread_worker from napari.utils import DirectLabelColormap from psygnal import debounced -from qtpy.QtCore import Qt, Signal +from qtpy.QtCore import Qt, Signal, Slot from qtpy.QtGui import QColor, QPainter, QPixmap from qtpy.QtWidgets import ( QCheckBox, @@ -35,6 +35,7 @@ QVBoxLayout, QWidget, ) +from qtpy import QtCore, QtWidgets from sklearn.cross_decomposition import PLSRegression from sklearn.ensemble import RandomForestClassifier from sklearn.utils.class_weight import compute_class_weight @@ -222,7 +223,7 @@ def _connect_events(self): listener.connect( debounced( ensure_main_thread(on_data_change_handler), - timeout=1000, + timeout=5000, ) ) @@ -378,10 +379,10 @@ def update_model(self, model_type): # Model fitting if model_type == "Random Forest": clf = RandomForestClassifier( - n_estimators=50, + n_estimators=200, n_jobs=-1, - max_depth=10, - max_samples=0.05, + max_depth=15, + max_samples=0.1, class_weight=weight_dict, ) self.segmentation_manager.model = clf @@ -576,9 +577,6 @@ def update_class_distribution_charts(self): self.logger.info( f"image layer: contrast_limits = {self.viewer.layers['Image'].contrast_limits}, opacity = {self.viewer.layers['Image'].opacity}, gamma = {self.viewer.layers['Image'].gamma}" # noqa G004 ) - self.logger.info( - f"Current model type: {self.widget.model_dropdown.currentText()}" # noqa G004 - ) # Calculate percentages instead of raw counts painting_percentages = (painting_counts / total_pixels) * 100 @@ -916,10 +914,10 @@ def paint_thread(self, lasso_path, target_label): # Update the painting data self.painting_data[z, y, x] = target_label - if self.extra_logging: - self.logger.info( - f"lasso paint: label = {target_label}, indices = {paint_indices}" # noqa G004 - ) + # if self.extra_logging: + # self.logger.info( + # f"lasso paint: label = {target_label}, indices = {paint_indices}" # noqa G004 + # ) # print(f"Painted {np.sum(contained)} pixels with label {target_label}") @@ -932,7 +930,7 @@ def __init__(self, label_id, *args, **kwargs): def mousePressEvent(self, event): self.clicked.emit(self.label_id) - + class EmbeddingPaintingWidget(QWidget): def __init__(self, app, parent=None): super().__init__(parent=parent) @@ -945,22 +943,6 @@ def initUI(self): self.legend_placeholder_index = 0 - # Settings Group - settings_group = QGroupBox("Settings") - settings_layout = QVBoxLayout() - - model_layout = QHBoxLayout() - model_label = QLabel("Select Model") - self.model_dropdown = QComboBox() - self.model_dropdown.addItems(["Random Forest", "XGBoost"]) - model_layout.addWidget(model_label) - model_layout.addWidget(self.model_dropdown) - settings_layout.addLayout(model_layout) - - self.add_features_button = QPushButton("Add Features") - self.add_features_button.clicked.connect(self.add_features) - settings_layout.addWidget(self.add_features_button) - thickness_layout = QHBoxLayout() thickness_label = QLabel("Adjust Slice Thickness") self.thickness_slider = QSlider(Qt.Horizontal) @@ -970,14 +952,11 @@ def initUI(self): self.thickness_slider.setValue(10) thickness_layout.addWidget(thickness_label) thickness_layout.addWidget(self.thickness_slider) - settings_layout.addLayout(thickness_layout) - + main_layout.addLayout(thickness_layout) + # Update layer contrast limits after thick slices has effect self.app.viewer.layers["Image"].reset_contrast_limits() - settings_group.setLayout(settings_layout) - main_layout.addWidget(settings_group) - # Controls Group controls_group = QGroupBox("Controls") controls_layout = QVBoxLayout() @@ -1000,6 +979,14 @@ def initUI(self): live_pred_layout.addWidget(self.live_pred_button) controls_layout.addLayout(live_pred_layout) + # Connect checkbox signals to actions + self.live_fit_checkbox.stateChanged.connect(self.on_live_fit_changed) + self.live_pred_checkbox.stateChanged.connect(self.on_live_pred_changed) + + # Connect button clicks to actions + self.live_fit_button.clicked.connect(self.app.start_model_fit) + self.live_pred_button.clicked.connect(self.app.start_prediction) + # Export model self.export_model_button = QPushButton("Export Model") controls_layout.addWidget(self.export_model_button) @@ -1044,14 +1031,6 @@ def initUI(self): self.setLayout(main_layout) - # Connect checkbox signals to actions - self.live_fit_checkbox.stateChanged.connect(self.on_live_fit_changed) - self.live_pred_checkbox.stateChanged.connect(self.on_live_pred_changed) - - # Connect button clicks to actions - self.live_fit_button.clicked.connect(self.app.start_model_fit) - self.live_pred_button.clicked.connect(self.app.start_prediction) - def add_features(self): zarr_path = QFileDialog.getExistingDirectory(self, "Select Directory") @@ -1192,10 +1171,15 @@ def setupLegend(self): self.legend_placeholder_index, self.legend_group ) - def activateLabel(self, label_id): + def activateLabel(self, current_label_id): painting_layer = self.app.get_painting_layer() - painting_layer.selected_label = label_id - self.updateLegendHighlighting() + painting_layer.selected_label = current_label_id + + for label_id, label_edit in self.label_edits.items(): + if label_id == current_label_id: + self.highlightLabel(label_edit) + else: + self.removeHighlightLabel(label_edit) def updateLegendHighlighting(self, selected_label_event): """Update highlighting of legend""" diff --git a/src/cellcanvas/utils.py b/src/cellcanvas/utils.py index 338ee7e..0041ae2 100644 --- a/src/cellcanvas/utils.py +++ b/src/cellcanvas/utils.py @@ -3,6 +3,9 @@ sphere_indices, ) +from qtpy.QtWidgets import (QApplication, QGroupBox, QVBoxLayout, QHBoxLayout, + QLabel, QComboBox, QPushButton, QWidget, QCheckBox) +from qtpy.QtCore import Slot, Qt def get_labels_colormap(): """Return a colormap for distinct label colors based on: @@ -71,7 +74,7 @@ def paint(self, coord, new_label, refresh=True): int ) - logger.info("paint: label = %s, indices = %s", new_label, mask_indices) + # logger.info("paint: label = %s, indices = %s", new_label, mask_indices) self._paint_indices( mask_indices, new_label, shape, dims_to_paint, slice_coord, refresh @@ -81,3 +84,4 @@ def paint(self, coord, new_label, refresh=True): def get_active_button_color(): return "#AF8B38" + From ec75334bcad39b879abf3f5454ccb47b99e0a7ef Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Wed, 17 Apr 2024 11:48:05 -0400 Subject: [PATCH 16/30] Clean up path handling and support configs per run --- examples/run_app_copick.py | 210 +++++++++++++++++++++++-------------- 1 file changed, 134 insertions(+), 76 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index f3ad0cd..82475f3 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -128,6 +128,9 @@ def __init__(self, viewer: napari.Viewer, root): self.populate_tree() + def get_voxel_spacing(self): + return 10 + def _init_logging(self): self.logger = logging.getLogger("cellcanvas") self.logger.setLevel(logging.DEBUG) @@ -179,40 +182,67 @@ def activate_layer(self, layer): def get_complete_data_manager(self): datasets = [] for run in self.root.runs: - static_path = run.static_path - # Assume there is a method to get the default voxel spacing directory for each run - voxel_spacing_dir = self.get_default_voxel_spacing_directory(static_path) + run_dir = run.static_path + config_path = os.path.join(run_dir, "dataset_config.json") + + voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir) + segmentation_dir = self.get_segmentations_directory(run_dir) if not voxel_spacing_dir: print(f"No Voxel Spacing directory found for run {run.name}.") continue - # Get all Zarr datasets within the voxel spacing directory - zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) - - # Initialize paths - image_path = None - features_path = None - painting_path = os.path.join(voxel_spacing_dir, "painting_001.zarr") - prediction_path = os.path.join(voxel_spacing_dir, "prediction_001.zarr") + os.makedirs(segmentation_dir, exist_ok=True) - # Assign paths based on dataset names - for dataset_path in zarr_datasets: - dataset_name = os.path.basename(dataset_path) - if "_features.zarr" in dataset_name.lower(): - features_path = dataset_path - elif "painting" in dataset_name.lower(): - painting_path = dataset_path - elif "prediction" in dataset_name.lower(): - prediction_path = dataset_path - else: - image_path = dataset_path - - # Assume each dataset should be loaded with a specific method that may also handle missing datasets + if os.path.exists(config_path): + with open(config_path, 'r') as file: + config = json.load(file) + image_path = os.path.join(voxel_spacing_dir, config['image']) + features_path = os.path.join(voxel_spacing_dir, config['features']) + painting_path = os.path.join(segmentation_dir, config['painting']) + prediction_path = os.path.join(segmentation_dir, config['prediction']) + else: + # Existing logic to find paths + voxel_spacing = self.get_voxel_spacing() + + zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) + image_path = None + features_path = None + painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr') + prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr') + + for dataset_path in zarr_datasets: + dataset_name = os.path.basename(dataset_path).lower() + if "_features.zarr" in dataset_name: + features_path = dataset_path + elif "painting" in dataset_name: + painting_path = dataset_path + elif "prediction" in dataset_name: + prediction_path = dataset_path + else: + # TODO hard coded to use highest resolution + image_path = os.path.join(dataset_path, "0") + + # Save paths to JSON + config = { + 'image': os.path.relpath(image_path, voxel_spacing_dir), + 'features': os.path.relpath(features_path, voxel_spacing_dir), + 'painting': os.path.relpath(painting_path, segmentation_dir), + 'prediction': os.path.relpath(prediction_path, segmentation_dir) + } + with open(config_path, 'w') as file: + json.dump(config, file) + + print(f"Fitting on paths:") + print(f"Image: {image_path}") + print(f"Features: {features_path}") + print(f"Painting: {painting_path}") + print(f"Prediction: {prediction_path}") + + # Load dataset with paths if image_path and features_path: - # TODO remove hack for highest resolution dataset = DataSet.from_paths( - image_path=os.path.join(image_path, "0"), + image_path=image_path, features_path=features_path, labels_path=painting_path, segmentation_path=prediction_path, @@ -220,12 +250,12 @@ def get_complete_data_manager(self): ) datasets.append(dataset) - # Create a new data manager with all datasets return DataManager(datasets=datasets) def get_default_voxel_spacing_directory(self, static_path): # Find VoxelSpacing directories, assuming a hard coded match for now - voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*")) + voxel_spacing = self.get_voxel_spacing() + voxel_spacing_dirs = glob.glob(os.path.join(static_path, f'VoxelSpacing{voxel_spacing:.3f}')) if voxel_spacing_dirs: return voxel_spacing_dirs[0] return None @@ -360,52 +390,57 @@ def on_run_clicked(self, item, column): for dropdown in self.dropdowns.values(): dropdown.clear() - # Find VoxelSpacing directories - # TODO hardcoded to match spacing = 10 - voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*")) - segmentation_dir = self.get_segmentations_directory(static_path) + config_path = os.path.join(static_path, "dataset_config.json") + if os.path.exists(config_path): + # Use the JSON file to set up the dropdowns + with open(config_path, 'r') as file: + config = json.load(file) + + # Populate dropdowns using the paths in the config + for key, rel_path in config.items(): + abs_path = os.path.join(static_path, rel_path) + if os.path.exists(abs_path): + dropdown_key = key.split('_')[0] # 'image', 'features', 'painting', 'prediction' + self.dropdowns[dropdown_key].addItem(rel_path, abs_path) + else: + # Find VoxelSpacing directories + # TODO hard coded voxel spacing here + voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*")) + segmentation_dir = self.get_segmentations_directory(static_path) + + if not voxel_spacing_dirs: + print(f"No Voxel Spacing directories found in {static_path}. Please check the directory structure.") + return + + voxel_spacing_dir = voxel_spacing_dirs[0] + for voxel_spacing_dir in voxel_spacing_dirs: + # Find all Zarr datasets within the voxel spacing directory + zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) + + for dataset_path in zarr_datasets: + dataset_name = os.path.basename(dataset_path) + if "_features.zarr" in dataset_name.lower(): + self.dropdowns["features"].addItem(dataset_name, os.path.join(voxel_spacing_dir, dataset_path)) + else: + self.dropdowns["image"].addItem(dataset_name + "/0", os.path.join(voxel_spacing_dir, dataset_path, "0")) - if not voxel_spacing_dirs: # Check if at least one VoxelSpacing directory was found - print(f"No Voxel Spacing directories found in {static_path}. Please check the directory structure.") - return + # Handling segmentations + os.makedirs(segmentation_dir, exist_ok=True) + zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr")) + voxel_spacing = self.get_voxel_spacing() + session_id = 0 + default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_{session_id}_all-multilabel.zarr') + default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_{session_id}_all-multilabel.zarr') + + self.dropdowns["painting"].addItem(os.path.basename(default_painting_path), default_painting_path) + self.dropdowns["prediction"].addItem(os.path.basename(default_prediction_path), default_prediction_path) - # First handle image and features - self.voxel_spacing_dir = voxel_spacing_dirs[0] - for voxel_spacing_dir in voxel_spacing_dirs: - # Find all Zarr datasets within the voxel spacing directory - zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) - - # Filtering the paths for each dropdown category for dataset_path in zarr_datasets: dataset_name = os.path.basename(dataset_path) - if "_features.zarr" in dataset_name.lower(): - self.dropdowns["features"].addItem(dataset_name, dataset_path) - else: - # This is for the image dropdown, excluding features, painting, and prediction zarr files - self.dropdowns["image"].addItem(dataset_name, dataset_path) - - - # Find all Zarr datasets within the Segmentations directory - os.makedirs(segmentation_dir, exist_ok=True) - zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr")) - - voxel_spacing = 10 - session_id = 0 - - default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_{session_id}_all-multilabel.zarr') - default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_{session_id}_all-multilabel.zarr') - - self.dropdowns["painting"].addItem(os.path.basename(default_painting_path), default_painting_path) - self.dropdowns["prediction"].addItem(os.path.basename(default_prediction_path), default_prediction_path) - - # Filtering the paths for each dropdown category - for dataset_path in zarr_datasets: - dataset_name = os.path.basename(dataset_path) - # Do not add painting or prediction to prediction or painting respectively - if "painting" not in dataset_name.lower(): - self.dropdowns["prediction"].addItem(dataset_name, dataset_path) - if "prediction" not in dataset_name.lower(): - self.dropdowns["painting"].addItem(dataset_name, dataset_path) + if "painting" not in dataset_name.lower(): + self.dropdowns["prediction"].addItem(dataset_name, dataset_path) + if "prediction" not in dataset_name.lower(): + self.dropdowns["painting"].addItem(dataset_name, dataset_path) def on_item_clicked(self, item, column): data = item.data(0, Qt.UserRole) @@ -458,10 +493,11 @@ def initialize_or_update_cell_canvas(self): print("Please ensure image and feature paths are selected before initializing/updating CellCanvas.") return - # TODO put these into the segmentations directory + run_dir = self.selected_run.static_path segmentation_dir = self.get_segmentations_directory(self.selected_run.static_path) + voxel_spacing_dir = self.get_default_voxel_spacing_directory(self.selected_run.static_path) - voxel_spacing = 10 + voxel_spacing = self.get_voxel_spacing() # Ensure segmentations directory exists os.makedirs(segmentation_dir, exist_ok=True) @@ -469,19 +505,41 @@ def initialize_or_update_cell_canvas(self): default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr') default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr') + painting_path = default_painting_path if not paths["painting"] else os.path.join(segmentation_dir, paths["painting"]) + prediction_path = default_prediction_path if not paths["prediction"] else os.path.join(segmentation_dir, paths["prediction"]) + image_path = os.path.join(voxel_spacing_dir, paths['image']) + features_path = os.path.join(voxel_spacing_dir, paths["features"]) + # TODO note this is hard coded to use the highest resolution of a multiscale zarr + print(f"Opening paths:") + print(f"Image: {image_path}") + print(f"Features: {features_path}") + print(f"Painting: {painting_path}") + print(f"Prediction: {prediction_path}") try: dataset = DataSet.from_paths( - image_path=os.path.join(self.voxel_spacing_dir, f"{paths['image']}/0"), - features_path=os.path.join(self.voxel_spacing_dir, paths["features"]), - labels_path=default_painting_path if not paths["painting"] else os.path.join(segmentation_dir, paths["painting"]), - segmentation_path=default_prediction_path if not paths["prediction"] else os.path.join(segmentation_dir, paths["prediction"]), + image_path=image_path, + features_path=features_path, + labels_path=painting_path, + segmentation_path=prediction_path, make_missing_datasets=True, ) except FileNotFoundError: print(f"File {path} not found!", file=sys.stderr) return + config_path = os.path.join(run_dir, "dataset_config.json") + + config = { + 'image': os.path.relpath(os.path.join(voxel_spacing_dir, f"{paths['image']}"), voxel_spacing_dir), + 'features': os.path.relpath(os.path.join(voxel_spacing_dir, paths["features"]), voxel_spacing_dir), + 'painting': os.path.relpath(painting_path, segmentation_dir), + 'prediction': os.path.relpath(prediction_path, segmentation_dir) + } + + with open(config_path, 'w') as file: + json.dump(config, file) + data_manager = DataManager(datasets=[dataset]) if not self.cell_canvas_app: From b9995122866c534aa27c47aefd3f12eabe1f4ca7 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Wed, 17 Apr 2024 12:11:28 -0400 Subject: [PATCH 17/30] Make embedding computation button triggered, improved config supp --- examples/run_app_copick.py | 83 ++++++++++--------- .../semantic/_embedding_segmentor.py | 7 +- 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index 82475f3..a686edd 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -390,58 +390,59 @@ def on_run_clicked(self, item, column): for dropdown in self.dropdowns.values(): dropdown.clear() + # Define directories + voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*")) + segmentation_dir = self.get_segmentations_directory(static_path) + os.makedirs(segmentation_dir, exist_ok=True) + + # Initialize dictionary to hold default selections from config + default_selections = {} + + # Check for config file and load selections if present config_path = os.path.join(static_path, "dataset_config.json") if os.path.exists(config_path): - # Use the JSON file to set up the dropdowns with open(config_path, 'r') as file: config = json.load(file) - - # Populate dropdowns using the paths in the config - for key, rel_path in config.items(): - abs_path = os.path.join(static_path, rel_path) - if os.path.exists(abs_path): - dropdown_key = key.split('_')[0] # 'image', 'features', 'painting', 'prediction' - self.dropdowns[dropdown_key].addItem(rel_path, abs_path) - else: - # Find VoxelSpacing directories - # TODO hard coded voxel spacing here - voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*")) - segmentation_dir = self.get_segmentations_directory(static_path) - - if not voxel_spacing_dirs: - print(f"No Voxel Spacing directories found in {static_path}. Please check the directory structure.") - return - - voxel_spacing_dir = voxel_spacing_dirs[0] + default_selections = { + 'image': os.path.join(voxel_spacing_dirs[0], config.get('image')), + 'features': os.path.join(voxel_spacing_dirs[0], config.get('features')), + 'painting': os.path.join(segmentation_dir, config.get('painting')), + 'prediction': os.path.join(segmentation_dir, config.get('prediction')) + } + + # Helper function to add items if not already in dropdown + def add_item_if_not_exists(dropdown, item_name, item_data): + if dropdown.findData(item_data) == -1: + dropdown.addItem(item_name, item_data) + + # Load all zarr datasets from voxel spacing directories + if voxel_spacing_dirs: for voxel_spacing_dir in voxel_spacing_dirs: - # Find all Zarr datasets within the voxel spacing directory zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) - for dataset_path in zarr_datasets: dataset_name = os.path.basename(dataset_path) if "_features.zarr" in dataset_name.lower(): - self.dropdowns["features"].addItem(dataset_name, os.path.join(voxel_spacing_dir, dataset_path)) + add_item_if_not_exists(self.dropdowns["features"], dataset_name, dataset_path) else: - self.dropdowns["image"].addItem(dataset_name + "/0", os.path.join(voxel_spacing_dir, dataset_path, "0")) + add_item_if_not_exists(self.dropdowns["image"], dataset_name + "/0", dataset_path + "/0") + + # Load all zarr datasets from segmentation directory + zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr")) + for dataset_path in zarr_datasets: + dataset_name = os.path.basename(dataset_path) + if "painting" not in dataset_name.lower(): + add_item_if_not_exists(self.dropdowns["prediction"], dataset_name, dataset_path) + if "prediction" not in dataset_name.lower(): + add_item_if_not_exists(self.dropdowns["painting"], dataset_name, dataset_path) + + # Set default selections in dropdowns if specified in the config + for key, dropdown in self.dropdowns.items(): + if default_selections.get(key): + index = dropdown.findData(default_selections[key]) + if index != -1: + dropdown.setCurrentIndex(index) + - # Handling segmentations - os.makedirs(segmentation_dir, exist_ok=True) - zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr")) - voxel_spacing = self.get_voxel_spacing() - session_id = 0 - default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_{session_id}_all-multilabel.zarr') - default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_{session_id}_all-multilabel.zarr') - - self.dropdowns["painting"].addItem(os.path.basename(default_painting_path), default_painting_path) - self.dropdowns["prediction"].addItem(os.path.basename(default_prediction_path), default_prediction_path) - - for dataset_path in zarr_datasets: - dataset_name = os.path.basename(dataset_path) - if "painting" not in dataset_name.lower(): - self.dropdowns["prediction"].addItem(dataset_name, dataset_path) - if "prediction" not in dataset_name.lower(): - self.dropdowns["painting"].addItem(dataset_name, dataset_path) - def on_item_clicked(self, item, column): data = item.data(0, Qt.UserRole) if data: diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 1863a1e..6f49933 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -262,7 +262,7 @@ def on_data_change(self, event, app): self.update_class_distribution_charts() # Update projection - self.start_computing_embedding_plot() + # self.start_computing_embedding_plot() self.widget.setupLegend() @@ -1026,6 +1026,11 @@ def initUI(self): self.embedding_canvas = FigureCanvas(self.embedding_figure) self.stats_summary_layout.addWidget(self.embedding_canvas) + # Create a button for computing the embedding plot + self.compute_embedding_button = QPushButton("Compute Embedding Plot") + self.compute_embedding_button.clicked.connect(self.app.start_computing_embedding_plot) + self.stats_summary_layout.addWidget(self.compute_embedding_button) + stats_summary_group.setLayout(self.stats_summary_layout) main_layout.addWidget(stats_summary_group) From 942cae3f919958cef456c7e0f419c80bd24c7552 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Wed, 17 Apr 2024 12:52:16 -0400 Subject: [PATCH 18/30] Support for training on all pairs of images (denoised, wbp, etc.) --- examples/run_app_copick.py | 193 +++++++++++++----- .../semantic/_embedding_segmentor.py | 2 + 2 files changed, 141 insertions(+), 54 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index a686edd..29338d5 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -179,12 +179,10 @@ def activate_layer(self, layer): layer.editable = True self.viewer.layers.selection.active = layer - def get_complete_data_manager(self): + def get_complete_data_manager(self, all_pairs=False): datasets = [] for run in self.root.runs: run_dir = run.static_path - config_path = os.path.join(run_dir, "dataset_config.json") - voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir) segmentation_dir = self.get_segmentations_directory(run_dir) @@ -193,64 +191,150 @@ def get_complete_data_manager(self): continue os.makedirs(segmentation_dir, exist_ok=True) + + voxel_spacing = self.get_voxel_spacing() + # Reused paths for all datasets in a run + painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr') + prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr') + + zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) + image_feature_pairs = {} + + # Locate all images and corresponding features + for dataset_path in zarr_datasets: + dataset_name = os.path.basename(dataset_path) + if dataset_name.endswith(".zarr") and not dataset_name.endswith("_features.zarr"): + base_image_name = dataset_name.replace(".zarr", "") + # Find corresponding feature files + feature_files = [path for path in zarr_datasets if base_image_name in path and "_features.zarr" in path] + for feature_path in feature_files: + features_base_name = os.path.basename(feature_path).replace("_features.zarr", "") + # Check if the image base name matches the start of the feature base name + if features_base_name.startswith(base_image_name): + image_feature_pairs[features_base_name] = { + 'image': os.path.join(dataset_path, "0"), # Assuming highest resolution + 'features': feature_path + } + + # Handle either all pairs or only those specified by the configuration + config_path = os.path.join(run_dir, "dataset_config.json") if os.path.exists(config_path): + with open(config_path, 'r') as file: + config = json.load(file) + if 'painting' in config: + painting_path = os.path.join(segmentation_dir, config['painting']) + if 'prediction' in config: + prediction_path = os.path.join(segmentation_dir, config['prediction']) + + if not all_pairs: with open(config_path, 'r') as file: config = json.load(file) image_path = os.path.join(voxel_spacing_dir, config['image']) features_path = os.path.join(voxel_spacing_dir, config['features']) - painting_path = os.path.join(segmentation_dir, config['painting']) - prediction_path = os.path.join(segmentation_dir, config['prediction']) + if 'painting' in config: + painting_path = os.path.join(segmentation_dir, config['painting']) + if 'prediction' in config: + prediction_path = os.path.join(segmentation_dir, config['prediction']) + + # Load dataset with specific config paths + dataset = DataSet.from_paths( + image_path=image_path, + features_path=features_path, + labels_path=painting_path, + segmentation_path=prediction_path, + make_missing_datasets=True + ) + datasets.append(dataset) else: - # Existing logic to find paths - voxel_spacing = self.get_voxel_spacing() - - zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) - image_path = None - features_path = None - painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr') - prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr') - - for dataset_path in zarr_datasets: - dataset_name = os.path.basename(dataset_path).lower() - if "_features.zarr" in dataset_name: - features_path = dataset_path - elif "painting" in dataset_name: - painting_path = dataset_path - elif "prediction" in dataset_name: - prediction_path = dataset_path - else: - # TODO hard coded to use highest resolution - image_path = os.path.join(dataset_path, "0") - - # Save paths to JSON - config = { - 'image': os.path.relpath(image_path, voxel_spacing_dir), - 'features': os.path.relpath(features_path, voxel_spacing_dir), - 'painting': os.path.relpath(painting_path, segmentation_dir), - 'prediction': os.path.relpath(prediction_path, segmentation_dir) - } - with open(config_path, 'w') as file: - json.dump(config, file) - - print(f"Fitting on paths:") - print(f"Image: {image_path}") - print(f"Features: {features_path}") - print(f"Painting: {painting_path}") - print(f"Prediction: {prediction_path}") + # Load all available pairs + for base_name, paths in image_feature_pairs.items(): + dataset = DataSet.from_paths( + image_path=paths['image'], + features_path=paths['features'], + labels_path=painting_path, + segmentation_path=prediction_path, + make_missing_datasets=True + ) + datasets.append(dataset) + + print(f"Loaded datasets for run {run.name}") + + return DataManager(datasets=datasets) + + # Only train on config pairs + # def get_complete_data_manager(self, all_pairs=False): + # datasets = [] + # for run in self.root.runs: + # run_dir = run.static_path + # config_path = os.path.join(run_dir, "dataset_config.json") + + # voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir) + # segmentation_dir = self.get_segmentations_directory(run_dir) + + # if not voxel_spacing_dir: + # print(f"No Voxel Spacing directory found for run {run.name}.") + # continue + + # os.makedirs(segmentation_dir, exist_ok=True) + + # if os.path.exists(config_path): + # with open(config_path, 'r') as file: + # config = json.load(file) + # image_path = os.path.join(voxel_spacing_dir, config['image']) + # features_path = os.path.join(voxel_spacing_dir, config['features']) + # painting_path = os.path.join(segmentation_dir, config['painting']) + # prediction_path = os.path.join(segmentation_dir, config['prediction']) + # else: + # # Existing logic to find paths + # voxel_spacing = self.get_voxel_spacing() + + # zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) + # image_path = None + # features_path = None + # painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr') + # prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr') + + # for dataset_path in zarr_datasets: + # dataset_name = os.path.basename(dataset_path).lower() + # if "_features.zarr" in dataset_name: + # features_path = dataset_path + # elif "painting" in dataset_name: + # painting_path = dataset_path + # elif "prediction" in dataset_name: + # prediction_path = dataset_path + # else: + # # TODO hard coded to use highest resolution + # image_path = os.path.join(dataset_path, "0") + + # # Save paths to JSON + # config = { + # 'image': os.path.relpath(image_path, voxel_spacing_dir), + # 'features': os.path.relpath(features_path, voxel_spacing_dir), + # 'painting': os.path.relpath(painting_path, segmentation_dir), + # 'prediction': os.path.relpath(prediction_path, segmentation_dir) + # } + # with open(config_path, 'w') as file: + # json.dump(config, file) + + # print(f"Fitting on paths:") + # print(f"Image: {image_path}") + # print(f"Features: {features_path}") + # print(f"Painting: {painting_path}") + # print(f"Prediction: {prediction_path}") - # Load dataset with paths - if image_path and features_path: - dataset = DataSet.from_paths( - image_path=image_path, - features_path=features_path, - labels_path=painting_path, - segmentation_path=prediction_path, - make_missing_datasets=True - ) - datasets.append(dataset) - - return DataManager(datasets=datasets) + # # Load dataset with paths + # if image_path and features_path: + # dataset = DataSet.from_paths( + # image_path=image_path, + # features_path=features_path, + # labels_path=painting_path, + # segmentation_path=prediction_path, + # make_missing_datasets=True + # ) + # datasets.append(dataset) + + # return DataManager(datasets=datasets) def get_default_voxel_spacing_directory(self, static_path): # Find VoxelSpacing directories, assuming a hard coded match for now @@ -287,7 +371,8 @@ def fit_on_all(self): @thread_worker def threaded_fit_on_all(self): - data_manager = self.get_complete_data_manager() + # Fit model on all pairs + data_manager = self.get_complete_data_manager(all_pairs=True) clf = RandomForestClassifier( n_estimators=50, diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 6f49933..6179b5b 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -1185,6 +1185,8 @@ def activateLabel(self, current_label_id): self.highlightLabel(label_edit) else: self.removeHighlightLabel(label_edit) + + self.app.viewer.layers.selection.active = painting_layer def updateLegendHighlighting(self, selected_label_event): """Update highlighting of legend""" From 2c6e7dcf9f4208bf2536a4999af7b23b58aa992e Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 22 Apr 2024 10:49:43 -0400 Subject: [PATCH 19/30] Fix for removed model dropdown --- examples/run_app_copick.py | 1 + src/cellcanvas/semantic/_embedding_segmentor.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index 29338d5..b2b2d6d 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -54,6 +54,7 @@ # Project root root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json") +# root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/chlamy_copick/copick_config_kyle.json") ## Root API root.config # CopickConfig object diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 6179b5b..539b757 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -294,9 +294,7 @@ def threaded_on_data_change( self.start_prediction() def get_model_type(self): - if not self.model_type: - self.model_type = self.widget.model_dropdown.currentText() - return self.model_type + return "Random Forest" def get_corner_pixels(self): if self.corner_pixels is None: From faf4f13f2aab37327c1b66719b0eae8a7e52a76b Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 22 Apr 2024 10:55:17 -0400 Subject: [PATCH 20/30] Refactor into if main clause --- examples/run_app_copick.py | 61 ++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 35 deletions(-) diff --git a/examples/run_app_copick.py b/examples/run_app_copick.py index b2b2d6d..f1c5b2c 100644 --- a/examples/run_app_copick.py +++ b/examples/run_app_copick.py @@ -52,32 +52,6 @@ import dask.array as da -# Project root -root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json") -# root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/chlamy_copick/copick_config_kyle.json") - -## Root API -root.config # CopickConfig object -root.runs # List of run objects (lazy loading from filesystem location(s)) - -# TODO update to use root.config.pickable_objects - - -def get_copick_colormap(): - """Return a colormap for distinct label colors based on the pickable objects.""" - colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects} - colormap[None] = np.array([1, 1, 1, 1]) - colormap[9] = np.array([0, 1, 1, 1]) - return colormap - -cellcanvas.utils.get_labels_colormap = get_copick_colormap - -# Use the function -colormap = get_copick_colormap() - -# TODO set names from copick config -# cell_canvas.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects} - import napari from qtpy.QtWidgets import QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget from qtpy.QtCore import Qt @@ -129,6 +103,16 @@ def __init__(self, viewer: napari.Viewer, root): self.populate_tree() + # Monkeypatch + cellcanvas.utils.get_labels_colormap = self.get_copick_colormap + + def get_copick_colormap(self): + """Return a colormap for distinct label colors based on the pickable objects.""" + colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects} + colormap[None] = np.array([1, 1, 1, 1]) + colormap[9] = np.array([0, 1, 1, 1]) + return colormap + def get_voxel_spacing(self): return 10 @@ -645,24 +629,32 @@ def initialize_or_update_cell_canvas(self): # Set colormap # painting_layer.colormap.color_dict # self.app.painting_labels - self.cell_canvas_app.semantic_segmentor.set_colormap(get_copick_colormap()) + self.cell_canvas_app.semantic_segmentor.set_colormap(self.get_copick_colormap()) self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in root.config.pickable_objects] + [9] self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects} self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping[9] = 'background' self.cell_canvas_app.semantic_segmentor.widget.setupLegend() -viewer = napari.Viewer() +if __name__ == "__main__": + # Project root + root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json") + # root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/chlamy_copick/copick_config_kyle.json") -# Hide layer list and controls -# viewer.window.qt_viewer.dockLayerList.setVisible(False) -# viewer.window.qt_viewer.dockLayerControls.setVisible(False) + ## Root API + root.config # CopickConfig object + root.runs # List of run objects (lazy loading from filesystem location(s)) + + viewer = napari.Viewer() -copick_explorer_widget = NapariCopickExplorer(viewer, root) -viewer.window.add_dock_widget(copick_explorer_widget, name="Copick Explorer", area="left") + # Hide layer list and controls + # viewer.window.qt_viewer.dockLayerList.setVisible(False) + # viewer.window.qt_viewer.dockLayerControls.setVisible(False) + copick_explorer_widget = NapariCopickExplorer(viewer, root) + viewer.window.add_dock_widget(copick_explorer_widget, name="Copick Explorer", area="left") -# napari.run() + # napari.run() # TODO finish making the prediction computation more lazy # the strategy should be to start computing labels chunkwise @@ -676,4 +668,3 @@ def initialize_or_update_cell_canvas(self): # - override exclusion of non-zero labels # - consistent colormap in the charts # - consistent colormap in the painted part of the labels image - From 24e2f28fbaec90376facf913b958e9b58327421e Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 22 Apr 2024 11:20:37 -0400 Subject: [PATCH 21/30] Move copick widget into source tree --- src/cellcanvas/_copick/widget.py | 670 +++++++++++++++++++++++++++++++ 1 file changed, 670 insertions(+) create mode 100644 src/cellcanvas/_copick/widget.py diff --git a/src/cellcanvas/_copick/widget.py b/src/cellcanvas/_copick/widget.py new file mode 100644 index 0000000..f1c5b2c --- /dev/null +++ b/src/cellcanvas/_copick/widget.py @@ -0,0 +1,670 @@ +"""Example of using CellCanvas to pick particles on a surface. + +To use: +1. update base_file_path to point to cropped_covid.zarr example file +2. Run the script to launch CellCanvas +3. Paint/predict until you're happy with the result. The seeded labels are: + - 1: background (including inside the capsules) + - 2: membrane + - 3: spike proteins +3b. You might want to switch the image layer into the plane + depiction before doing the instance segmentation. + Sometimes I have trouble manipulating the plane after + the instance segmentation - need to look into this. +4. Once you're happy with the prediction, click the "instance segmentation" tab +5. Set the label value to 2. This will extract the membrane and + make instances via connected components. +6. Remove the small objects. Suggested threshold: 100 +7. Alt + left mouse button to select an instance to modify. + Once select, you can dilate, erode, etc. to smooth it. +8. With the segment still selected, you can then mesh it + using the mesh widget. You can play with the smoothing parameters. +9. If the mesh looks good, switch to the "geometry" tab. + Select the mesh and start surfing! +""" +from collections import defaultdict +import os +import numpy as np +import napari +import cellcanvas +from cellcanvas._app.main_app import CellCanvasApp, QtCellCanvas +from cellcanvas.data.data_manager import DataManager +from cellcanvas.data.data_set import DataSet +from napari.qt.threading import thread_worker + +import sys +import logging +import json +import copick +from copick.impl.filesystem import CopickRootFSSpec +import zarr + +from qtpy.QtWidgets import QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget, QComboBox, QPushButton, QLabel +from qtpy.QtCore import Qt +import glob # For pattern matching of file names + +from sklearn.ensemble import RandomForestClassifier + +from cellcanvas.semantic.segmentation_manager import ( + SemanticSegmentationManager, +) +from cellcanvas.utils import get_active_button_color + +import dask.array as da + +import napari +from qtpy.QtWidgets import QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget +from qtpy.QtCore import Qt + +class NapariCopickExplorer(QWidget): + def __init__(self, viewer: napari.Viewer, root): + super().__init__() + self.viewer = viewer + self.root = root + self.selected_run = None + self.cell_canvas_app = None + + layout = QVBoxLayout() + self.setLayout(layout) + + self._init_logging() + + # Adding new buttons for "Fit on all" and "Predict for all" + self.fit_all_button = QPushButton("Fit on all") + self.fit_all_button.clicked.connect(self.fit_on_all) + layout.addWidget(self.fit_all_button) + + self.predict_all_button = QPushButton("Predict for all") + self.predict_all_button.clicked.connect(self.predict_for_all) + layout.addWidget(self.predict_all_button) + + # Dropdowns for each data layer + self.dropdowns = {} + self.layer_buttons = {} + for layer in ["image", "features", "painting", "prediction"]: + # Make layer button + button = QPushButton(f"Select {layer.capitalize()} Layer") + button.clicked.connect(lambda checked, layer=layer: self.activate_layer(layer)) + layout.addWidget(button) + self.layer_buttons[layer] = button + # Make layer selection dropdown + self.dropdowns[layer] = QComboBox() + layout.addWidget(self.dropdowns[layer]) + + # Button to update CellCanvas with the selected dataset + self.update_button = QPushButton("Initialize/Update CellCanvas") + self.update_button.clicked.connect(self.initialize_or_update_cell_canvas) + layout.addWidget(self.update_button) + + self.tree = QTreeWidget() + self.tree.setHeaderLabel("Copick Runs") + self.tree.itemClicked.connect(self.on_run_clicked) + layout.addWidget(self.tree) + + self.populate_tree() + + # Monkeypatch + cellcanvas.utils.get_labels_colormap = self.get_copick_colormap + + def get_copick_colormap(self): + """Return a colormap for distinct label colors based on the pickable objects.""" + colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects} + colormap[None] = np.array([1, 1, 1, 1]) + colormap[9] = np.array([0, 1, 1, 1]) + return colormap + + def get_voxel_spacing(self): + return 10 + + def _init_logging(self): + self.logger = logging.getLogger("cellcanvas") + self.logger.setLevel(logging.DEBUG) + streamHandler = logging.StreamHandler(sys.stdout) + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + streamHandler.setFormatter(formatter) + self.logger.addHandler(streamHandler) + + + def populate_tree(self): + for run in self.root.runs: + run_item = QTreeWidgetItem(self.tree, [run.name]) + run_item.setData(0, Qt.UserRole, run) + + for category in ["segmentations", "meshes", "picks", "voxel_spacings"]: + category_item = QTreeWidgetItem(run_item, [category]) + items = getattr(run, category) + for item in items: + if category == "picks": + item_name = item.pickable_object_name + else: + item_name = getattr(item, 'name', 'Unnamed') + + child_item = QTreeWidgetItem(category_item, [item_name]) + child_item.setData(0, Qt.UserRole, item) + + # list tomograms + if category == "voxel_spacings": + for tomogram in item.tomograms: + tomo_item = QTreeWidgetItem(child_item, [f"Tomogram: {tomogram.tomo_type}"]) + tomo_item.setData(0, Qt.UserRole, tomogram) + + def activate_layer(self, layer): + print(f"Activating layer {layer}") + if layer == "image": + layer = self.cell_canvas_app.semantic_segmentor.data_layer + elif layer == "painting": + layer = self.cell_canvas_app.semantic_segmentor.painting_layer + elif layer == "prediction": + layer = self.cell_canvas_app.semantic_segmentor.prediction_layer + else: + return + layer.visible = True + layer.editable = True + self.viewer.layers.selection.active = layer + + def get_complete_data_manager(self, all_pairs=False): + datasets = [] + for run in self.root.runs: + run_dir = run.static_path + voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir) + segmentation_dir = self.get_segmentations_directory(run_dir) + + if not voxel_spacing_dir: + print(f"No Voxel Spacing directory found for run {run.name}.") + continue + + os.makedirs(segmentation_dir, exist_ok=True) + + voxel_spacing = self.get_voxel_spacing() + + # Reused paths for all datasets in a run + painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr') + prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr') + + zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) + image_feature_pairs = {} + + # Locate all images and corresponding features + for dataset_path in zarr_datasets: + dataset_name = os.path.basename(dataset_path) + if dataset_name.endswith(".zarr") and not dataset_name.endswith("_features.zarr"): + base_image_name = dataset_name.replace(".zarr", "") + # Find corresponding feature files + feature_files = [path for path in zarr_datasets if base_image_name in path and "_features.zarr" in path] + for feature_path in feature_files: + features_base_name = os.path.basename(feature_path).replace("_features.zarr", "") + # Check if the image base name matches the start of the feature base name + if features_base_name.startswith(base_image_name): + image_feature_pairs[features_base_name] = { + 'image': os.path.join(dataset_path, "0"), # Assuming highest resolution + 'features': feature_path + } + + # Handle either all pairs or only those specified by the configuration + config_path = os.path.join(run_dir, "dataset_config.json") + if os.path.exists(config_path): + with open(config_path, 'r') as file: + config = json.load(file) + if 'painting' in config: + painting_path = os.path.join(segmentation_dir, config['painting']) + if 'prediction' in config: + prediction_path = os.path.join(segmentation_dir, config['prediction']) + + if not all_pairs: + with open(config_path, 'r') as file: + config = json.load(file) + image_path = os.path.join(voxel_spacing_dir, config['image']) + features_path = os.path.join(voxel_spacing_dir, config['features']) + if 'painting' in config: + painting_path = os.path.join(segmentation_dir, config['painting']) + if 'prediction' in config: + prediction_path = os.path.join(segmentation_dir, config['prediction']) + + # Load dataset with specific config paths + dataset = DataSet.from_paths( + image_path=image_path, + features_path=features_path, + labels_path=painting_path, + segmentation_path=prediction_path, + make_missing_datasets=True + ) + datasets.append(dataset) + else: + # Load all available pairs + for base_name, paths in image_feature_pairs.items(): + dataset = DataSet.from_paths( + image_path=paths['image'], + features_path=paths['features'], + labels_path=painting_path, + segmentation_path=prediction_path, + make_missing_datasets=True + ) + datasets.append(dataset) + + print(f"Loaded datasets for run {run.name}") + + return DataManager(datasets=datasets) + + # Only train on config pairs + # def get_complete_data_manager(self, all_pairs=False): + # datasets = [] + # for run in self.root.runs: + # run_dir = run.static_path + # config_path = os.path.join(run_dir, "dataset_config.json") + + # voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir) + # segmentation_dir = self.get_segmentations_directory(run_dir) + + # if not voxel_spacing_dir: + # print(f"No Voxel Spacing directory found for run {run.name}.") + # continue + + # os.makedirs(segmentation_dir, exist_ok=True) + + # if os.path.exists(config_path): + # with open(config_path, 'r') as file: + # config = json.load(file) + # image_path = os.path.join(voxel_spacing_dir, config['image']) + # features_path = os.path.join(voxel_spacing_dir, config['features']) + # painting_path = os.path.join(segmentation_dir, config['painting']) + # prediction_path = os.path.join(segmentation_dir, config['prediction']) + # else: + # # Existing logic to find paths + # voxel_spacing = self.get_voxel_spacing() + + # zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) + # image_path = None + # features_path = None + # painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr') + # prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr') + + # for dataset_path in zarr_datasets: + # dataset_name = os.path.basename(dataset_path).lower() + # if "_features.zarr" in dataset_name: + # features_path = dataset_path + # elif "painting" in dataset_name: + # painting_path = dataset_path + # elif "prediction" in dataset_name: + # prediction_path = dataset_path + # else: + # # TODO hard coded to use highest resolution + # image_path = os.path.join(dataset_path, "0") + + # # Save paths to JSON + # config = { + # 'image': os.path.relpath(image_path, voxel_spacing_dir), + # 'features': os.path.relpath(features_path, voxel_spacing_dir), + # 'painting': os.path.relpath(painting_path, segmentation_dir), + # 'prediction': os.path.relpath(prediction_path, segmentation_dir) + # } + # with open(config_path, 'w') as file: + # json.dump(config, file) + + # print(f"Fitting on paths:") + # print(f"Image: {image_path}") + # print(f"Features: {features_path}") + # print(f"Painting: {painting_path}") + # print(f"Prediction: {prediction_path}") + + # # Load dataset with paths + # if image_path and features_path: + # dataset = DataSet.from_paths( + # image_path=image_path, + # features_path=features_path, + # labels_path=painting_path, + # segmentation_path=prediction_path, + # make_missing_datasets=True + # ) + # datasets.append(dataset) + + # return DataManager(datasets=datasets) + + def get_default_voxel_spacing_directory(self, static_path): + # Find VoxelSpacing directories, assuming a hard coded match for now + voxel_spacing = self.get_voxel_spacing() + voxel_spacing_dirs = glob.glob(os.path.join(static_path, f'VoxelSpacing{voxel_spacing:.3f}')) + if voxel_spacing_dirs: + return voxel_spacing_dirs[0] + return None + + def get_segmentations_directory(self, static_path): + segmentation_dir = os.path.join(static_path, "Segmentations") + return segmentation_dir + + def change_button_color(self, button, color): + button.setStyleSheet(f"background-color: {color};") + + def reset_button_color(self, button): + self.change_button_color(button, "") + + def fit_on_all(self): + if not self.cell_canvas_app: + print("Initialize cell canvas first") + return + + print("Fitting all models to the selected dataset.") + + self.change_button_color( + self.fit_all_button, get_active_button_color() + ) + + self.model_fit_worker = self.threaded_fit_on_all() + self.model_fit_worker.returned.connect(self.on_model_fit_completed) + self.model_fit_worker.start() + + @thread_worker + def threaded_fit_on_all(self): + # Fit model on all pairs + data_manager = self.get_complete_data_manager(all_pairs=True) + + clf = RandomForestClassifier( + n_estimators=50, + n_jobs=-1, + max_depth=10, + max_samples=0.05, + ) + + segmentation_manager = SemanticSegmentationManager( + data=data_manager, model=clf + ) + segmentation_manager.fit() + + return segmentation_manager + + def on_model_fit_completed(self, segmentation_manager): + self.logger.debug("on_model_fit_completed") + + self.cell_canvas_app.semantic_segmentor.segmentation_manager = segmentation_manager + + # Reset color + self.reset_button_color(self.fit_all_button) + + def predict_for_all(self): + if not self.cell_canvas_app: + print("Initialize cell canvas first") + return + + print("Fitting all models to the selected dataset.") + + self.change_button_color( + self.predict_all_button, get_active_button_color() + ) + + self.predict_worker = self.threaded_predict_for_all() + self.predict_worker.returned.connect(self.on_predict_completed) + self.predict_worker.start() + + def on_predict_completed(self, result): + self.logger.debug("on_predict_completed") + + # Reset color + self.reset_button_color(self.predict_all_button) + + @thread_worker + def threaded_predict_for_all(self): + print("Running predictions on all datasets.") + + # Check if segmentation manager is properly initialized + if not hasattr(self.cell_canvas_app.semantic_segmentor, 'segmentation_manager') or self.cell_canvas_app.semantic_segmentor.segmentation_manager is None: + print("Segmentation manager is not initialized.") + return + + # Retrieve the complete data manager that includes all runs + data_manager = self.get_complete_data_manager() + + # Iterate through each dataset within the data manager + for dataset in data_manager.datasets: + dataset_features = da.asarray(dataset.concatenated_features) + chunk_shape = dataset_features.chunksize + shape = dataset_features.shape + dtype = dataset_features.dtype + + # Iterate over chunks + for z in range(0, shape[1], chunk_shape[1]): + for y in range(0, shape[2], chunk_shape[2]): + for x in range(0, shape[3], chunk_shape[3]): + # Compute the slice for the current chunk + # in feature,z,y,x order + chunk_slice = ( + slice(None), + slice(z, min(z + chunk_shape[1], shape[1])), + slice(y, min(y + chunk_shape[2], shape[2])), + slice(x, min(x + chunk_shape[3], shape[3])), + ) + print(f"Predicting on chunk {chunk_slice}") + + # Extract the current chunk + chunk = dataset_features[chunk_slice].compute() + + # Predict on the chunk (adding 1 to each prediction) + predicted_chunk = self.cell_canvas_app.semantic_segmentor.segmentation_manager.predict(chunk) + 1 + + # Write the prediction to the corresponding region in the Zarr array + dataset.segmentation[chunk_slice[1:]] = predicted_chunk + + print(f"Predictions written") + + def on_run_clicked(self, item, column): + data = item.data(0, Qt.UserRole) + if not isinstance(data, copick.impl.filesystem.CopickRunFSSpec): + self.on_item_clicked(item, column) + return + + self.selected_run = data + static_path = self.selected_run.static_path + self.logger.info(f"Selected {static_path}") + + # Clear existing items + for dropdown in self.dropdowns.values(): + dropdown.clear() + + # Define directories + voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*")) + segmentation_dir = self.get_segmentations_directory(static_path) + os.makedirs(segmentation_dir, exist_ok=True) + + # Initialize dictionary to hold default selections from config + default_selections = {} + + # Check for config file and load selections if present + config_path = os.path.join(static_path, "dataset_config.json") + if os.path.exists(config_path): + with open(config_path, 'r') as file: + config = json.load(file) + default_selections = { + 'image': os.path.join(voxel_spacing_dirs[0], config.get('image')), + 'features': os.path.join(voxel_spacing_dirs[0], config.get('features')), + 'painting': os.path.join(segmentation_dir, config.get('painting')), + 'prediction': os.path.join(segmentation_dir, config.get('prediction')) + } + + # Helper function to add items if not already in dropdown + def add_item_if_not_exists(dropdown, item_name, item_data): + if dropdown.findData(item_data) == -1: + dropdown.addItem(item_name, item_data) + + # Load all zarr datasets from voxel spacing directories + if voxel_spacing_dirs: + for voxel_spacing_dir in voxel_spacing_dirs: + zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) + for dataset_path in zarr_datasets: + dataset_name = os.path.basename(dataset_path) + if "_features.zarr" in dataset_name.lower(): + add_item_if_not_exists(self.dropdowns["features"], dataset_name, dataset_path) + else: + add_item_if_not_exists(self.dropdowns["image"], dataset_name + "/0", dataset_path + "/0") + + # Load all zarr datasets from segmentation directory + zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr")) + for dataset_path in zarr_datasets: + dataset_name = os.path.basename(dataset_path) + if "painting" not in dataset_name.lower(): + add_item_if_not_exists(self.dropdowns["prediction"], dataset_name, dataset_path) + if "prediction" not in dataset_name.lower(): + add_item_if_not_exists(self.dropdowns["painting"], dataset_name, dataset_path) + + # Set default selections in dropdowns if specified in the config + for key, dropdown in self.dropdowns.items(): + if default_selections.get(key): + index = dropdown.findData(default_selections[key]) + if index != -1: + dropdown.setCurrentIndex(index) + + + def on_item_clicked(self, item, column): + data = item.data(0, Qt.UserRole) + if data: + if isinstance(data, copick.impl.filesystem.CopickPicksFSSpec): + self.open_picks(data) + elif isinstance(data, copick.impl.filesystem.CopickTomogramFSSpec): + self.open_tomogram(data) + elif isinstance(data, copick.models.CopickSegmentation): + self.open_labels(data) + + def open_picks(self, picks): + with open(picks.path, 'r') as f: + points_data = json.load(f) + + # Extracting points locations + points_locations = [ + [point['location']['z'], point['location']['y'], point['location']['x']] + for point in points_data['points'] + ] + + # TODO hard coded scaling + points_array = np.array(points_locations) / 10 + + # Adding the points layer to the viewer, using the pickable_object_name as the layer name + pickable_object = [obj for obj in root.config.pickable_objects if obj.name == picks.pickable_object_name][0] + self.viewer.add_points(points_array, name=picks.pickable_object_name, size=25, out_of_slice_display=True, face_color=np.array(pickable_object.color)/255.0) + + def open_tomogram(self, tomogram): + zarr_store = zarr.open(tomogram.zarr(), mode='r') + print(f"open_tomogram {tomogram.zarr()}") + # TODO extract scale/transform info + + # TODO scale is hard coded to 10 here + self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.tomo_type}") + + def open_labels(self, tomogram): + zarr_store = zarr.open(tomogram.zarr(), mode='r') + print(f"open_labels {tomogram.zarr()}") + # TODO extract scale/transform info + + # TODO scale is hard coded to 10 here + self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.name}") + + def initialize_or_update_cell_canvas(self): + # Collect paths from dropdowns + paths = {layer: dropdown.currentText() for layer, dropdown in self.dropdowns.items()} + + if not paths["image"] or not paths["features"]: + print("Please ensure image and feature paths are selected before initializing/updating CellCanvas.") + return + + run_dir = self.selected_run.static_path + segmentation_dir = self.get_segmentations_directory(self.selected_run.static_path) + voxel_spacing_dir = self.get_default_voxel_spacing_directory(self.selected_run.static_path) + + voxel_spacing = self.get_voxel_spacing() + + # Ensure segmentations directory exists + os.makedirs(segmentation_dir, exist_ok=True) + + default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr') + default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr') + + painting_path = default_painting_path if not paths["painting"] else os.path.join(segmentation_dir, paths["painting"]) + prediction_path = default_prediction_path if not paths["prediction"] else os.path.join(segmentation_dir, paths["prediction"]) + image_path = os.path.join(voxel_spacing_dir, paths['image']) + features_path = os.path.join(voxel_spacing_dir, paths["features"]) + + # TODO note this is hard coded to use the highest resolution of a multiscale zarr + print(f"Opening paths:") + print(f"Image: {image_path}") + print(f"Features: {features_path}") + print(f"Painting: {painting_path}") + print(f"Prediction: {prediction_path}") + try: + dataset = DataSet.from_paths( + image_path=image_path, + features_path=features_path, + labels_path=painting_path, + segmentation_path=prediction_path, + make_missing_datasets=True, + ) + except FileNotFoundError: + print(f"File {path} not found!", file=sys.stderr) + return + + config_path = os.path.join(run_dir, "dataset_config.json") + + config = { + 'image': os.path.relpath(os.path.join(voxel_spacing_dir, f"{paths['image']}"), voxel_spacing_dir), + 'features': os.path.relpath(os.path.join(voxel_spacing_dir, paths["features"]), voxel_spacing_dir), + 'painting': os.path.relpath(painting_path, segmentation_dir), + 'prediction': os.path.relpath(prediction_path, segmentation_dir) + } + + with open(config_path, 'w') as file: + json.dump(config, file) + + data_manager = DataManager(datasets=[dataset]) + + if not self.cell_canvas_app: + self.cell_canvas_app = CellCanvasApp(data=data_manager, viewer=self.viewer, verbose=True) + cell_canvas_widget = QtCellCanvas(app=self.cell_canvas_app) + self.viewer.window.add_dock_widget(cell_canvas_widget) + else: + # Update existing CellCanvasApp's data manager + self.cell_canvas_app.update_data_manager(data_manager) + + # TODO this has multiple copick specific hardcoded hacks + + # TODO hardcoded scale factor + # self.viewer.layers['Image'].scale = (10, 10, 10) + + # Set colormap + # painting_layer.colormap.color_dict + # self.app.painting_labels + self.cell_canvas_app.semantic_segmentor.set_colormap(self.get_copick_colormap()) + self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in root.config.pickable_objects] + [9] + self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects} + + self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping[9] = 'background' + self.cell_canvas_app.semantic_segmentor.widget.setupLegend() + +if __name__ == "__main__": + # Project root + root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json") + # root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/chlamy_copick/copick_config_kyle.json") + + ## Root API + root.config # CopickConfig object + root.runs # List of run objects (lazy loading from filesystem location(s)) + + viewer = napari.Viewer() + + # Hide layer list and controls + # viewer.window.qt_viewer.dockLayerList.setVisible(False) + # viewer.window.qt_viewer.dockLayerControls.setVisible(False) + + copick_explorer_widget = NapariCopickExplorer(viewer, root) + viewer.window.add_dock_widget(copick_explorer_widget, name="Copick Explorer", area="left") + + # napari.run() + +# TODO finish making the prediction computation more lazy +# the strategy should be to start computing labels chunkwise +# on the zarr itself + +# TODO check scaling between picks and zarrs + +# TODO check why painting doesn't work when using proper scaling + +# TODO add proper colormap and legend support +# - override exclusion of non-zero labels +# - consistent colormap in the charts +# - consistent colormap in the painted part of the labels image From 7cdc73e10768f627c487dbdecd9267ff941ee92b Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 22 Apr 2024 11:39:11 -0400 Subject: [PATCH 22/30] Add __init__ for _copick --- src/cellcanvas/_copick/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/cellcanvas/_copick/__init__.py diff --git a/src/cellcanvas/_copick/__init__.py b/src/cellcanvas/_copick/__init__.py new file mode 100644 index 0000000..e69de29 From e8f0d555020d007f0db4b3143e6fb002eb47591d Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 22 Apr 2024 13:38:09 -0400 Subject: [PATCH 23/30] Update attribute reference for copick project --- src/cellcanvas/_copick/widget.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cellcanvas/_copick/widget.py b/src/cellcanvas/_copick/widget.py index f1c5b2c..70ce854 100644 --- a/src/cellcanvas/_copick/widget.py +++ b/src/cellcanvas/_copick/widget.py @@ -108,7 +108,7 @@ def __init__(self, viewer: napari.Viewer, root): def get_copick_colormap(self): """Return a colormap for distinct label colors based on the pickable objects.""" - colormap = {obj.label: np.array(obj.color)/255.0 for obj in root.config.pickable_objects} + colormap = {obj.label: np.array(obj.color)/255.0 for obj in self.root.config.pickable_objects} colormap[None] = np.array([1, 1, 1, 1]) colormap[9] = np.array([0, 1, 1, 1]) return colormap @@ -537,7 +537,7 @@ def open_picks(self, picks): points_array = np.array(points_locations) / 10 # Adding the points layer to the viewer, using the pickable_object_name as the layer name - pickable_object = [obj for obj in root.config.pickable_objects if obj.name == picks.pickable_object_name][0] + pickable_object = [obj for obj in self.root.config.pickable_objects if obj.name == picks.pickable_object_name][0] self.viewer.add_points(points_array, name=picks.pickable_object_name, size=25, out_of_slice_display=True, face_color=np.array(pickable_object.color)/255.0) def open_tomogram(self, tomogram): @@ -630,8 +630,8 @@ def initialize_or_update_cell_canvas(self): # painting_layer.colormap.color_dict # self.app.painting_labels self.cell_canvas_app.semantic_segmentor.set_colormap(self.get_copick_colormap()) - self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in root.config.pickable_objects] + [9] - self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in root.config.pickable_objects} + self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in self.root.config.pickable_objects] + [9] + self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in self.root.config.pickable_objects} self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping[9] = 'background' self.cell_canvas_app.semantic_segmentor.widget.setupLegend() From f3f625adcd161ca689752f7ceb12560c80cdacb4 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 22 Apr 2024 14:34:54 -0400 Subject: [PATCH 24/30] Remove hack for adding background label, now it comes from copick --- src/cellcanvas/_copick/widget.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cellcanvas/_copick/widget.py b/src/cellcanvas/_copick/widget.py index 70ce854..124425c 100644 --- a/src/cellcanvas/_copick/widget.py +++ b/src/cellcanvas/_copick/widget.py @@ -630,10 +630,10 @@ def initialize_or_update_cell_canvas(self): # painting_layer.colormap.color_dict # self.app.painting_labels self.cell_canvas_app.semantic_segmentor.set_colormap(self.get_copick_colormap()) - self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in self.root.config.pickable_objects] + [9] + self.cell_canvas_app.semantic_segmentor.painting_labels = [obj.label for obj in self.root.config.pickable_objects] self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping = {obj.label: obj.name for obj in self.root.config.pickable_objects} - self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping[9] = 'background' +# self.cell_canvas_app.semantic_segmentor.widget.class_labels_mapping[9] = 'background' self.cell_canvas_app.semantic_segmentor.widget.setupLegend() if __name__ == "__main__": From 54e90edc6cec8bdb328b3ed7f77a9fd1da0468be Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 22 Apr 2024 17:43:10 -0400 Subject: [PATCH 25/30] Update default paths, use more copick config, lighter model --- src/cellcanvas/_copick/widget.py | 33 ++++++++++++++----- .../semantic/_embedding_segmentor.py | 4 +-- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/cellcanvas/_copick/widget.py b/src/cellcanvas/_copick/widget.py index 124425c..ed1be3d 100644 --- a/src/cellcanvas/_copick/widget.py +++ b/src/cellcanvas/_copick/widget.py @@ -180,8 +180,8 @@ def get_complete_data_manager(self, all_pairs=False): voxel_spacing = self.get_voxel_spacing() # Reused paths for all datasets in a run - painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr') - prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr') + painting_path = self.get_default_painting_path(segmentation_dir, voxel_spacing) + prediction_path = self.get_default_prediction_path(segmentation_dir, voxel_spacing) zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) image_feature_pairs = {} @@ -203,7 +203,7 @@ def get_complete_data_manager(self, all_pairs=False): } # Handle either all pairs or only those specified by the configuration - config_path = os.path.join(run_dir, "dataset_config.json") + config_path = self.get_config_path(run.static_path) if os.path.exists(config_path): with open(config_path, 'r') as file: config = json.load(file) @@ -212,7 +212,7 @@ def get_complete_data_manager(self, all_pairs=False): if 'prediction' in config: prediction_path = os.path.join(segmentation_dir, config['prediction']) - if not all_pairs: + if os.path.exists(config_path) and not all_pairs: with open(config_path, 'r') as file: config = json.load(file) image_path = os.path.join(voxel_spacing_dir, config['image']) @@ -469,7 +469,7 @@ def on_run_clicked(self, item, column): default_selections = {} # Check for config file and load selections if present - config_path = os.path.join(static_path, "dataset_config.json") + config_path = self.get_config_path(static_path) if os.path.exists(config_path): with open(config_path, 'r') as file: config = json.load(file) @@ -556,13 +556,28 @@ def open_labels(self, tomogram): # TODO scale is hard coded to 10 here self.viewer.add_image(zarr_store[0], name=f"Tomogram: {tomogram.name}") + def get_config_path(self, run_dir): + return os.path.join(run_dir, f"{self.get_user_id()}_config.json") + + def get_session_id(self): + return 17 + + def get_user_id(self): + return self.root.user_id + + def get_default_painting_path(self, segmentation_dir, voxel_spacing): + return os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_{self.get_user_id()}-cellcanvas-painting_{self.get_session_id()}_all-multilabel.zarr') + + def get_default_prediction_path(self, segmentation_dir, voxel_spacing): + return os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_{self.get_user_id()}-cellcanvas-prediction_{self.get_session_id()}_all-multilabel.zarr') + def initialize_or_update_cell_canvas(self): # Collect paths from dropdowns paths = {layer: dropdown.currentText() for layer, dropdown in self.dropdowns.items()} if not paths["image"] or not paths["features"]: print("Please ensure image and feature paths are selected before initializing/updating CellCanvas.") - return + return run_dir = self.selected_run.static_path segmentation_dir = self.get_segmentations_directory(self.selected_run.static_path) @@ -573,8 +588,8 @@ def initialize_or_update_cell_canvas(self): # Ensure segmentations directory exists os.makedirs(segmentation_dir, exist_ok=True) - default_painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr') - default_prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr') + default_painting_path = self.get_default_painting_path(segmentation_dir, voxel_spacing) + default_prediction_path = self.get_default_prediction_path(segmentation_dir, voxel_spacing) painting_path = default_painting_path if not paths["painting"] else os.path.join(segmentation_dir, paths["painting"]) prediction_path = default_prediction_path if not paths["prediction"] else os.path.join(segmentation_dir, paths["prediction"]) @@ -599,7 +614,7 @@ def initialize_or_update_cell_canvas(self): print(f"File {path} not found!", file=sys.stderr) return - config_path = os.path.join(run_dir, "dataset_config.json") + config_path = self.get_config_path(run_dir) config = { 'image': os.path.relpath(os.path.join(voxel_spacing_dir, f"{paths['image']}"), voxel_spacing_dir), diff --git a/src/cellcanvas/semantic/_embedding_segmentor.py b/src/cellcanvas/semantic/_embedding_segmentor.py index 539b757..0015c5f 100644 --- a/src/cellcanvas/semantic/_embedding_segmentor.py +++ b/src/cellcanvas/semantic/_embedding_segmentor.py @@ -377,10 +377,10 @@ def update_model(self, model_type): # Model fitting if model_type == "Random Forest": clf = RandomForestClassifier( - n_estimators=200, + n_estimators=100, n_jobs=-1, max_depth=15, - max_samples=0.1, + max_samples=0.05, class_weight=weight_dict, ) self.segmentation_manager.model = clf From 51ca7bb25506e0971533c8afee48642e2ec593db Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Mon, 22 Apr 2024 20:51:04 -0400 Subject: [PATCH 26/30] Fix: remove hard coded color for label 9 --- src/cellcanvas/_copick/widget.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cellcanvas/_copick/widget.py b/src/cellcanvas/_copick/widget.py index ed1be3d..268e736 100644 --- a/src/cellcanvas/_copick/widget.py +++ b/src/cellcanvas/_copick/widget.py @@ -110,7 +110,6 @@ def get_copick_colormap(self): """Return a colormap for distinct label colors based on the pickable objects.""" colormap = {obj.label: np.array(obj.color)/255.0 for obj in self.root.config.pickable_objects} colormap[None] = np.array([1, 1, 1, 1]) - colormap[9] = np.array([0, 1, 1, 1]) return colormap def get_voxel_spacing(self): From ae0d47574b51177d0f833ed0cabdeb06cf9d2054 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Wed, 24 Apr 2024 17:20:33 -0400 Subject: [PATCH 27/30] Better lazy loading of UI, support for opening zarr stores There is a bug with opening labels probably dtype --- src/cellcanvas/_copick/widget.py | 316 +++++++++++-------------------- src/cellcanvas/data/data_set.py | 34 ++++ 2 files changed, 142 insertions(+), 208 deletions(-) diff --git a/src/cellcanvas/_copick/widget.py b/src/cellcanvas/_copick/widget.py index 268e736..c531552 100644 --- a/src/cellcanvas/_copick/widget.py +++ b/src/cellcanvas/_copick/widget.py @@ -125,30 +125,42 @@ def _init_logging(self): streamHandler.setFormatter(formatter) self.logger.addHandler(streamHandler) - def populate_tree(self): + self.tree.clear() # Clear existing items if repopulating for run in self.root.runs: run_item = QTreeWidgetItem(self.tree, [run.name]) run_item.setData(0, Qt.UserRole, run) + run_item.setChildIndicatorPolicy(QTreeWidgetItem.ShowIndicator) + + def setup_signals(self): + self.tree.itemExpanded.connect(self.on_item_expanded) + + def on_item_expanded(self, item): + # Check if the item has already been populated + if not hasattr(item, 'is_populated'): + run = item.data(0, Qt.UserRole) + if isinstance(run, copick.models.CopickRun): + self.populate_run(item, run) + item.is_populated = True # Mark as populated + + def populate_run(self, run_item, run): + for category in ["segmentations", "meshes", "picks", "voxel_spacings"]: + category_item = QTreeWidgetItem(run_item, [category]) + items = getattr(run, category, []) + for item in items: + if category == "picks": + item_name = item.pickable_object_name + else: + item_name = getattr(item, 'name', 'Unnamed') + child_item = QTreeWidgetItem(category_item, [item_name]) + child_item.setData(0, Qt.UserRole, item) + + if category == "voxel_spacings": + for tomogram in item.tomograms: + tomo_item = QTreeWidgetItem(child_item, [f"Tomogram: {tomogram.tomo_type}"]) + tomo_item.setData(0, Qt.UserRole, tomogram) - for category in ["segmentations", "meshes", "picks", "voxel_spacings"]: - category_item = QTreeWidgetItem(run_item, [category]) - items = getattr(run, category) - for item in items: - if category == "picks": - item_name = item.pickable_object_name - else: - item_name = getattr(item, 'name', 'Unnamed') - - child_item = QTreeWidgetItem(category_item, [item_name]) - child_item.setData(0, Qt.UserRole, item) - - # list tomograms - if category == "voxel_spacings": - for tomogram in item.tomograms: - tomo_item = QTreeWidgetItem(child_item, [f"Tomogram: {tomogram.tomo_type}"]) - tomo_item.setData(0, Qt.UserRole, tomogram) - + def activate_layer(self, layer): print(f"Activating layer {layer}") if layer == "image": @@ -167,8 +179,10 @@ def get_complete_data_manager(self, all_pairs=False): datasets = [] for run in self.root.runs: run_dir = run.static_path - voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir) - segmentation_dir = self.get_segmentations_directory(run_dir) + overlay_path = run.overlay_path + + voxel_spacing_dir = self.get_default_voxel_spacing_directory(run) + segmentation_dir = self.get_segmentations_directory(run) if not voxel_spacing_dir: print(f"No Voxel Spacing directory found for run {run.name}.") @@ -246,90 +260,16 @@ def get_complete_data_manager(self, all_pairs=False): return DataManager(datasets=datasets) - # Only train on config pairs - # def get_complete_data_manager(self, all_pairs=False): - # datasets = [] - # for run in self.root.runs: - # run_dir = run.static_path - # config_path = os.path.join(run_dir, "dataset_config.json") - - # voxel_spacing_dir = self.get_default_voxel_spacing_directory(run_dir) - # segmentation_dir = self.get_segmentations_directory(run_dir) - - # if not voxel_spacing_dir: - # print(f"No Voxel Spacing directory found for run {run.name}.") - # continue - - # os.makedirs(segmentation_dir, exist_ok=True) - - # if os.path.exists(config_path): - # with open(config_path, 'r') as file: - # config = json.load(file) - # image_path = os.path.join(voxel_spacing_dir, config['image']) - # features_path = os.path.join(voxel_spacing_dir, config['features']) - # painting_path = os.path.join(segmentation_dir, config['painting']) - # prediction_path = os.path.join(segmentation_dir, config['prediction']) - # else: - # # Existing logic to find paths - # voxel_spacing = self.get_voxel_spacing() - - # zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) - # image_path = None - # features_path = None - # painting_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-painting_0_all-multilabel.zarr') - # prediction_path = os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_cellcanvas-prediction_0_all-multilabel.zarr') - - # for dataset_path in zarr_datasets: - # dataset_name = os.path.basename(dataset_path).lower() - # if "_features.zarr" in dataset_name: - # features_path = dataset_path - # elif "painting" in dataset_name: - # painting_path = dataset_path - # elif "prediction" in dataset_name: - # prediction_path = dataset_path - # else: - # # TODO hard coded to use highest resolution - # image_path = os.path.join(dataset_path, "0") - - # # Save paths to JSON - # config = { - # 'image': os.path.relpath(image_path, voxel_spacing_dir), - # 'features': os.path.relpath(features_path, voxel_spacing_dir), - # 'painting': os.path.relpath(painting_path, segmentation_dir), - # 'prediction': os.path.relpath(prediction_path, segmentation_dir) - # } - # with open(config_path, 'w') as file: - # json.dump(config, file) - - # print(f"Fitting on paths:") - # print(f"Image: {image_path}") - # print(f"Features: {features_path}") - # print(f"Painting: {painting_path}") - # print(f"Prediction: {prediction_path}") - - # # Load dataset with paths - # if image_path and features_path: - # dataset = DataSet.from_paths( - # image_path=image_path, - # features_path=features_path, - # labels_path=painting_path, - # segmentation_path=prediction_path, - # make_missing_datasets=True - # ) - # datasets.append(dataset) - - # return DataManager(datasets=datasets) - - def get_default_voxel_spacing_directory(self, static_path): + def get_default_voxel_spacing_directory(self, run): # Find VoxelSpacing directories, assuming a hard coded match for now voxel_spacing = self.get_voxel_spacing() - voxel_spacing_dirs = glob.glob(os.path.join(static_path, f'VoxelSpacing{voxel_spacing:.3f}')) + voxel_spacing_dirs = glob.glob(os.path.join(run.static_path, f'VoxelSpacing{voxel_spacing:.3f}')) if voxel_spacing_dirs: return voxel_spacing_dirs[0] return None - def get_segmentations_directory(self, static_path): - segmentation_dir = os.path.join(static_path, "Segmentations") + def get_segmentations_directory(self, run): + segmentation_dir = os.path.join(run.overlay_path, "Segmentations") return segmentation_dir def change_button_color(self, button, color): @@ -445,6 +385,12 @@ def threaded_predict_for_all(self): print(f"Predictions written") + def get_painting_segmentation_name(self): + return "cellcanvas-painting" + + def get_prediction_segmentation_name(self): + return "cellcanvas-prediction" + def on_run_clicked(self, item, column): data = item.data(0, Qt.UserRole) if not isinstance(data, copick.impl.filesystem.CopickRunFSSpec): @@ -453,64 +399,61 @@ def on_run_clicked(self, item, column): self.selected_run = data static_path = self.selected_run.static_path - self.logger.info(f"Selected {static_path}") + overlay_path = self.selected_run.overlay_path + self.logger.info(f"Selected static path: {static_path} overlay path: {overlay_path}") # Clear existing items for dropdown in self.dropdowns.values(): dropdown.clear() + voxel_spacing = self.selected_run.get_voxel_spacing(self.get_voxel_spacing()) + if not voxel_spacing: + print("Voxel spacing does not exist.") + return + + # features = self.selected_run.get_voxel_spacing(10).tomograms[0].get_features("cellcanvas01") + # Define directories - voxel_spacing_dirs = glob.glob(os.path.join(static_path, "VoxelSpacing10*")) - segmentation_dir = self.get_segmentations_directory(static_path) - os.makedirs(segmentation_dir, exist_ok=True) - - # Initialize dictionary to hold default selections from config - default_selections = {} - - # Check for config file and load selections if present - config_path = self.get_config_path(static_path) - if os.path.exists(config_path): - with open(config_path, 'r') as file: - config = json.load(file) - default_selections = { - 'image': os.path.join(voxel_spacing_dirs[0], config.get('image')), - 'features': os.path.join(voxel_spacing_dirs[0], config.get('features')), - 'painting': os.path.join(segmentation_dir, config.get('painting')), - 'prediction': os.path.join(segmentation_dir, config.get('prediction')) - } + voxel_spacing_dirs = voxel_spacing.static_path # Helper function to add items if not already in dropdown def add_item_if_not_exists(dropdown, item_name, item_data): if dropdown.findData(item_data) == -1: dropdown.addItem(item_name, item_data) - # Load all zarr datasets from voxel spacing directories - if voxel_spacing_dirs: - for voxel_spacing_dir in voxel_spacing_dirs: - zarr_datasets = glob.glob(os.path.join(voxel_spacing_dir, "*.zarr")) - for dataset_path in zarr_datasets: - dataset_name = os.path.basename(dataset_path) - if "_features.zarr" in dataset_name.lower(): - add_item_if_not_exists(self.dropdowns["features"], dataset_name, dataset_path) - else: - add_item_if_not_exists(self.dropdowns["image"], dataset_name + "/0", dataset_path + "/0") - - # Load all zarr datasets from segmentation directory - zarr_datasets = glob.glob(os.path.join(segmentation_dir, "*.zarr")) - for dataset_path in zarr_datasets: - dataset_name = os.path.basename(dataset_path) - if "painting" not in dataset_name.lower(): - add_item_if_not_exists(self.dropdowns["prediction"], dataset_name, dataset_path) - if "prediction" not in dataset_name.lower(): - add_item_if_not_exists(self.dropdowns["painting"], dataset_name, dataset_path) - - # Set default selections in dropdowns if specified in the config - for key, dropdown in self.dropdowns.items(): - if default_selections.get(key): - index = dropdown.findData(default_selections[key]) - if index != -1: - dropdown.setCurrentIndex(index) - + # Load image/tomograms + tomograms = voxel_spacing.tomograms + for tomogram in tomograms: + add_item_if_not_exists(self.dropdowns["image"], + tomogram.tomo_type, + tomogram) + + # Load features + for tomogram in tomograms: + features = tomogram.features + if features: + feature = features[0] + add_item_if_not_exists(self.dropdowns["features"], + tomogram.tomo_type, + feature) + + # Painting + painting_seg = self.selected_run.get_segmentations(user_id=self.root.user_id, is_multilabel=True, name=self.get_painting_segmentation_name(), voxel_size=10) + if not painting_seg: + # Create seg + painting_seg = self.selected_run.new_segmentation(10, self.get_painting_segmentation_name(), self.get_session_id(), True, user_id=self.root.user_id) + else: + painting_seg = painting_seg[0] + add_item_if_not_exists(self.dropdowns["painting"], painting_seg.name, painting_seg) + + # Prediction + prediction_seg = self.selected_run.get_segmentations(user_id=self.root.user_id, is_multilabel=True, name=self.get_prediction_segmentation_name(), voxel_size=10) + if not prediction_seg: + # Create seg + prediction_seg = self.selected_run.new_segmentation(10, self.get_prediction_segmentation_name(), self.get_session_id(), True, user_id=self.root.user_id) + else: + prediction_seg = prediction_seg[0] + add_item_if_not_exists(self.dropdowns["prediction"], prediction_seg.name, prediction_seg) def on_item_clicked(self, item, column): data = item.data(0, Qt.UserRole) @@ -533,7 +476,7 @@ def open_picks(self, picks): ] # TODO hard coded scaling - points_array = np.array(points_locations) / 10 + points_array = np.array(points_locations) / self.get_voxel_spacing() # Adding the points layer to the viewer, using the pickable_object_name as the layer name pickable_object = [obj for obj in self.root.config.pickable_objects if obj.name == picks.pickable_object_name][0] @@ -564,66 +507,37 @@ def get_session_id(self): def get_user_id(self): return self.root.user_id - def get_default_painting_path(self, segmentation_dir, voxel_spacing): - return os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_{self.get_user_id()}-cellcanvas-painting_{self.get_session_id()}_all-multilabel.zarr') - - def get_default_prediction_path(self, segmentation_dir, voxel_spacing): - return os.path.join(segmentation_dir, f'{voxel_spacing:.3f}_{self.get_user_id()}-cellcanvas-prediction_{self.get_session_id()}_all-multilabel.zarr') - def initialize_or_update_cell_canvas(self): # Collect paths from dropdowns - paths = {layer: dropdown.currentText() for layer, dropdown in self.dropdowns.items()} + paths = {layer: dropdown.currentData() for layer, dropdown in self.dropdowns.items()} if not paths["image"] or not paths["features"]: print("Please ensure image and feature paths are selected before initializing/updating CellCanvas.") return run_dir = self.selected_run.static_path - segmentation_dir = self.get_segmentations_directory(self.selected_run.static_path) - voxel_spacing_dir = self.get_default_voxel_spacing_directory(self.selected_run.static_path) + overlay_path = self.selected_run.overlay_path + + segmentation_dir = self.get_segmentations_directory(self.selected_run) + voxel_spacing_dir = self.get_default_voxel_spacing_directory(self.selected_run) voxel_spacing = self.get_voxel_spacing() # Ensure segmentations directory exists - os.makedirs(segmentation_dir, exist_ok=True) - - default_painting_path = self.get_default_painting_path(segmentation_dir, voxel_spacing) - default_prediction_path = self.get_default_prediction_path(segmentation_dir, voxel_spacing) - - painting_path = default_painting_path if not paths["painting"] else os.path.join(segmentation_dir, paths["painting"]) - prediction_path = default_prediction_path if not paths["prediction"] else os.path.join(segmentation_dir, paths["prediction"]) - image_path = os.path.join(voxel_spacing_dir, paths['image']) - features_path = os.path.join(voxel_spacing_dir, paths["features"]) + # os.makedirs(segmentation_dir, exist_ok=True) # TODO note this is hard coded to use the highest resolution of a multiscale zarr print(f"Opening paths:") - print(f"Image: {image_path}") - print(f"Features: {features_path}") - print(f"Painting: {painting_path}") - print(f"Prediction: {prediction_path}") - try: - dataset = DataSet.from_paths( - image_path=image_path, - features_path=features_path, - labels_path=painting_path, - segmentation_path=prediction_path, - make_missing_datasets=True, - ) - except FileNotFoundError: - print(f"File {path} not found!", file=sys.stderr) - return - - config_path = self.get_config_path(run_dir) - - config = { - 'image': os.path.relpath(os.path.join(voxel_spacing_dir, f"{paths['image']}"), voxel_spacing_dir), - 'features': os.path.relpath(os.path.join(voxel_spacing_dir, paths["features"]), voxel_spacing_dir), - 'painting': os.path.relpath(painting_path, segmentation_dir), - 'prediction': os.path.relpath(prediction_path, segmentation_dir) - } - - with open(config_path, 'w') as file: - json.dump(config, file) + print(f"Image: {paths['image']}") + print(f"Features: {paths['features']}") + print(f"Painting: {paths['painting']}") + print(f"Prediction: {paths['prediction']}") + dataset = DataSet.from_stores( + image_store=paths['image'].zarr(), + features_store=paths['features'].zarr(), + labels_store=paths['painting'].zarr(), + segmentation_store=paths['prediction'].zarr(), + ) data_manager = DataManager(datasets=[dataset]) @@ -652,12 +566,10 @@ def initialize_or_update_cell_canvas(self): if __name__ == "__main__": # Project root - root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json") + + # root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_kyle.json") # root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/chlamy_copick/copick_config_kyle.json") - - ## Root API - root.config # CopickConfig object - root.runs # List of run objects (lazy loading from filesystem location(s)) + root = CopickRootFSSpec.from_file("/Volumes/kish@CZI.T7/demo_project/copick_config_pickathon.json") viewer = napari.Viewer() @@ -670,15 +582,3 @@ def initialize_or_update_cell_canvas(self): # napari.run() -# TODO finish making the prediction computation more lazy -# the strategy should be to start computing labels chunkwise -# on the zarr itself - -# TODO check scaling between picks and zarrs - -# TODO check why painting doesn't work when using proper scaling - -# TODO add proper colormap and legend support -# - override exclusion of non-zero labels -# - consistent colormap in the charts -# - consistent colormap in the painted part of the labels image diff --git a/src/cellcanvas/data/data_set.py b/src/cellcanvas/data/data_set.py index 117c42f..f9d0b60 100644 --- a/src/cellcanvas/data/data_set.py +++ b/src/cellcanvas/data/data_set.py @@ -89,3 +89,37 @@ def from_paths( labels=labels, segmentation=segmentation, ) + + @classmethod + def from_stores( + cls, + image_store, + features_store, + labels_store, + segmentation_store, + ): + """Create a DataSet from a set of paths. + + todo: add ability to create missing labels/segmentations + """ + # get the image + # TODO fix hardcoded scale for pickathon + image = zarr.open(image_store, "r")["0"] + + + # get the features + features = zarr.open(features_store, "r") + + # get the labels + labels = zarr.open(labels_store, "a") + + # get the segmentation + segmentation = zarr.open(segmentation_store, mode="a") + + return cls( + image=image, + features=features, + labels=labels, + segmentation=segmentation, + ) + From 7c41d25b35e0fae389d1775dcec2a9dd4d2756bd Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Wed, 24 Apr 2024 17:51:11 -0400 Subject: [PATCH 28/30] More updates for loading from zarr stores --- src/cellcanvas/data/data_set.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/cellcanvas/data/data_set.py b/src/cellcanvas/data/data_set.py index f9d0b60..0f5498c 100644 --- a/src/cellcanvas/data/data_set.py +++ b/src/cellcanvas/data/data_set.py @@ -110,11 +110,27 @@ def from_stores( # get the features features = zarr.open(features_store, "r") + group_name = "labels" + # get the labels - labels = zarr.open(labels_store, "a") + labels = zarr.open_group(labels_store, + mode="a") + if group_name in labels: + labels = labels[group_name] + else: + labels = labels.create_dataset(group_name, + shape=image.shape, + dtype="i4") # get the segmentation - segmentation = zarr.open(segmentation_store, mode="a") + segmentation = zarr.open_group(segmentation_store, + mode="a") + if group_name in segmentation: + segmentation = segmentation[group_name] + else: + segmentation = segmentation.create_dataset(group_name, + shape=image.shape, + dtype="i4") return cls( image=image, From d48c4e28b1908ad04b68f1bc4f61c044126e6a13 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Wed, 24 Apr 2024 17:55:29 -0400 Subject: [PATCH 29/30] Features as a dict --- src/cellcanvas/data/data_set.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cellcanvas/data/data_set.py b/src/cellcanvas/data/data_set.py index 0f5498c..889e081 100644 --- a/src/cellcanvas/data/data_set.py +++ b/src/cellcanvas/data/data_set.py @@ -108,7 +108,7 @@ def from_stores( # get the features - features = zarr.open(features_store, "r") + features = {"features": zarr.open(features_store, "r")} group_name = "labels" From 2a5d7679d18a494341c3dc4bec910339c5a65513 Mon Sep 17 00:00:00 2001 From: Kyle Harrington Date: Wed, 24 Apr 2024 20:54:34 -0400 Subject: [PATCH 30/30] Add LRU cache to stores to get some speedup --- src/cellcanvas/data/data_set.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/cellcanvas/data/data_set.py b/src/cellcanvas/data/data_set.py index 889e081..7021215 100644 --- a/src/cellcanvas/data/data_set.py +++ b/src/cellcanvas/data/data_set.py @@ -102,18 +102,20 @@ def from_stores( todo: add ability to create missing labels/segmentations """ + + # TODO rewrite this to copy everything to be local + # get the image # TODO fix hardcoded scale for pickathon - image = zarr.open(image_store, "r")["0"] - + image = zarr.open(zarr.storage.LRUStoreCache(image_store, None), "r")["0"] # get the features - features = {"features": zarr.open(features_store, "r")} + features = {"features": zarr.open(zarr.storage.LRUStoreCache(features_store, None), "r")} group_name = "labels" # get the labels - labels = zarr.open_group(labels_store, + labels = zarr.open_group(zarr.storage.LRUStoreCache(labels_store, None), mode="a") if group_name in labels: labels = labels[group_name] @@ -123,7 +125,7 @@ def from_stores( dtype="i4") # get the segmentation - segmentation = zarr.open_group(segmentation_store, + segmentation = zarr.open_group(zarr.storage.LRUStoreCache(segmentation_store, None), mode="a") if group_name in segmentation: segmentation = segmentation[group_name] @@ -132,6 +134,8 @@ def from_stores( shape=image.shape, dtype="i4") + # TODO start a background thread that triggers downloads of the zarrs + return cls( image=image, features=features,