diff --git a/CMakeLists.txt b/CMakeLists.txt index 426a550c8d..d599f45a14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -537,6 +537,7 @@ set(SIMPLNX_HDRS ${SIMPLNX_SOURCE_DIR}/Utilities/DataGroupUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/DataObjectUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/DataStoreUtilities.hpp + ${SIMPLNX_SOURCE_DIR}/Utilities/AlgorithmDispatch.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/FilePathGenerator.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/ColorTableUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/FileUtilities.hpp @@ -558,6 +559,7 @@ set(SIMPLNX_HDRS ${SIMPLNX_SOURCE_DIR}/Utilities/ParallelTaskAlgorithm.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/SamplingUtils.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/SegmentFeatures.hpp + ${SIMPLNX_SOURCE_DIR}/Utilities/UnionFind.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/TimeUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/TooltipGenerator.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/TooltipRowItem.hpp diff --git a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.cpp b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.cpp index 587ecbf6c3..07d27afad6 100644 --- a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.cpp +++ b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.cpp @@ -5,6 +5,7 @@ #include "simplnx/Common/Constants.hpp" #include "simplnx/DataStructure/DataArray.hpp" #include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" #include "simplnx/Utilities/ClusteringUtilities.hpp" #include @@ -68,8 +69,16 @@ Result<> CAxisSegmentFeatures::operator()() auto* active = m_DataStructure.getDataAs(m_InputValues->ActiveArrayPath); active->fill(1); - // Run the segmentation algorithm - execute(imageGeometry); + // Dispatch between DFS (in-core) and CCL (OOC) algorithms + if(IsOutOfCore(*m_FeatureIdsArray) || ForceOocAlgorithm()) + { + auto& featureIdsStore = m_FeatureIdsArray->getDataStoreRef(); + executeCCL(imageGeometry, featureIdsStore); + } + else + { + execute(imageGeometry); + } // Sanity check the result. if(this->m_FoundFeatures < 1) { @@ -127,10 +136,7 @@ int64 CAxisSegmentFeatures::getSeed(int32 gnum, int64 nextSeed) const } if(seed >= 0) { - auto& cellFeatureAM = m_DataStructure.getDataRefAs(m_InputValues->CellFeatureAttributeMatrixPath); featureIds[static_cast(seed)] = gnum; - const ShapeType tDims = {static_cast(gnum) + 1}; - cellFeatureAM.resizeTuples(tDims); // This will resize the active array } return seed; } @@ -182,3 +188,59 @@ bool CAxisSegmentFeatures::determineGrouping(int64 referencepoint, int64 neighbo } return group; } + +// ----------------------------------------------------------------------------- +bool CAxisSegmentFeatures::isValidVoxel(int64 point) const +{ + // Check mask + if(m_InputValues->UseMask && !m_GoodVoxelsArray->isTrue(point)) + { + return false; + } + // Check that the voxel has a valid phase (> 0) + Int32Array& cellPhases = *m_CellPhases; + if(cellPhases[point] <= 0) + { + return false; + } + return true; +} + +// ----------------------------------------------------------------------------- +bool CAxisSegmentFeatures::areNeighborsSimilar(int64 point1, int64 point2) const +{ + // The neighbor must also be valid + if(!isValidVoxel(point2)) + { + return false; + } + + Int32Array& cellPhases = *m_CellPhases; + + // Must be same phase + if(cellPhases[point1] != cellPhases[point2]) + { + return false; + } + + // Calculate c-axis misalignment + const Eigen::Vector3f cAxis{0.0f, 0.0f, 1.0f}; + Float32Array& quats = *m_QuatsArray; + + const ebsdlib::QuatF q1(quats[point1 * 4], quats[point1 * 4 + 1], quats[point1 * 4 + 2], quats[point1 * 4 + 3]); + const ebsdlib::QuatF q2(quats[point2 * 4], quats[point2 * 4 + 1], quats[point2 * 4 + 2], quats[point2 * 4 + 3]); + + const ebsdlib::OrientationMatrixFType oMatrix1 = q1.toOrientationMatrix(); + const ebsdlib::OrientationMatrixFType oMatrix2 = q2.toOrientationMatrix(); + + Eigen::Vector3f c1 = oMatrix1.transpose() * cAxis; + Eigen::Vector3f c2 = oMatrix2.transpose() * cAxis; + + c1.normalize(); + c2.normalize(); + + float32 w = std::clamp(((c1[0] * c2[0]) + (c1[1] * c2[1]) + (c1[2] * c2[2])), -1.0F, 1.0F); + w = std::acos(w); + + return w <= m_InputValues->MisorientationTolerance || (Constants::k_PiD - w) <= m_InputValues->MisorientationTolerance; +} diff --git a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.hpp b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.hpp index b1d0fe9d88..3a93d89972 100644 --- a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.hpp +++ b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.hpp @@ -49,6 +49,21 @@ class ORIENTATIONANALYSIS_EXPORT CAxisSegmentFeatures : public SegmentFeatures int64 getSeed(int32 gnum, int64 nextSeed) const override; bool determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const override; + /** + * @brief Checks whether a voxel can participate in C-axis segmentation based on mask and phase. + * @param point Linear voxel index. + * @return true if the voxel passes mask and phase checks. + */ + bool isValidVoxel(int64 point) const override; + + /** + * @brief Determines whether two neighboring voxels belong to the same C-axis segment. + * @param point1 First voxel index. + * @param point2 Second (neighbor) voxel index. + * @return true if both voxels share the same phase and their C-axis misalignment is within tolerance. + */ + bool areNeighborsSimilar(int64 point1, int64 point2) const override; + private: const CAxisSegmentFeaturesInputValues* m_InputValues = nullptr; diff --git a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.cpp b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.cpp index 000b1e28b3..4cbe5862c3 100644 --- a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.cpp +++ b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.cpp @@ -2,6 +2,7 @@ #include "simplnx/DataStructure/DataStore.hpp" #include "simplnx/DataStructure/Geometry/IGridGeometry.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" using namespace nx::core; @@ -43,8 +44,16 @@ Result<> EBSDSegmentFeatures::operator()() m_FeatureIdsArray = m_DataStructure.getDataAs(m_InputValues->FeatureIdsArrayPath); m_FeatureIdsArray->fill(0); // initialize the output array with zeros - // Run the segmentation algorithm - execute(gridGeom); + // Dispatch between DFS (in-core) and CCL (OOC) algorithms + if(IsOutOfCore(*m_FeatureIdsArray) || ForceOocAlgorithm()) + { + auto& featureIdsStore = m_FeatureIdsArray->getDataStoreRef(); + executeCCL(gridGeom, featureIdsStore); + } + else + { + execute(gridGeom); + } // Sanity check the result. if(this->m_FoundFeatures < 1) { @@ -152,3 +161,55 @@ bool EBSDSegmentFeatures::determineGrouping(int64 referencePoint, int64 neighbor return group; } + +// ----------------------------------------------------------------------------- +bool EBSDSegmentFeatures::isValidVoxel(int64 point) const +{ + // Check mask + if(m_InputValues->UseMask && !m_GoodVoxelsArray->isTrue(point)) + { + return false; + } + // Check that the voxel has a valid phase (> 0) + AbstractDataStore& cellPhases = m_CellPhases->getDataStoreRef(); + if(cellPhases[point] <= 0) + { + return false; + } + return true; +} + +// ----------------------------------------------------------------------------- +bool EBSDSegmentFeatures::areNeighborsSimilar(int64 point1, int64 point2) const +{ + // The neighbor must also be valid + if(!isValidVoxel(point2)) + { + return false; + } + + AbstractDataStore& cellPhases = m_CellPhases->getDataStoreRef(); + + // Must be same phase + if(cellPhases[point1] != cellPhases[point2]) + { + return false; + } + + // Check crystal structure validity + int32 laueClass = (*m_CrystalStructures)[cellPhases[point1]]; + if(static_cast(laueClass) >= m_OrientationOps.size()) + { + return false; + } + + // Calculate misorientation + Float32Array& quats = *m_QuatsArray; + const ebsdlib::QuatD q1(quats[point1 * 4], quats[point1 * 4 + 1], quats[point1 * 4 + 2], quats[point1 * 4 + 3]); + const ebsdlib::QuatD q2(quats[point2 * 4], quats[point2 * 4 + 1], quats[point2 * 4 + 2], quats[point2 * 4 + 3]); + + ebsdlib::AxisAngleDType axisAngle = m_OrientationOps[laueClass]->calculateMisorientation(q1, q2); + float w = static_cast(axisAngle[3]); + + return w < m_InputValues->MisorientationTolerance; +} diff --git a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.hpp b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.hpp index d1db6caada..fe92c6c7f7 100644 --- a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.hpp +++ b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.hpp @@ -56,26 +56,23 @@ class ORIENTATIONANALYSIS_EXPORT EBSDSegmentFeatures : public SegmentFeatures Result<> operator()(); protected: + int64_t getSeed(int32 gnum, int64 nextSeed) const override; + bool determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const override; + /** - * @brief - * @param data - * @param args - * @param gnum - * @param nextSeed - * @return int64 + * @brief Checks whether a voxel can participate in EBSD segmentation based on mask and phase. + * @param point Linear voxel index. + * @return true if the voxel passes mask and phase checks. */ - int64_t getSeed(int32 gnum, int64 nextSeed) const override; + bool isValidVoxel(int64 point) const override; /** - * @brief - * @param data - * @param args - * @param referencepoint - * @param neighborpoint - * @param gnum - * @return bool + * @brief Determines whether two neighboring voxels belong to the same EBSD segment. + * @param point1 First voxel index. + * @param point2 Second (neighbor) voxel index. + * @return true if both voxels share the same phase and their misorientation is within tolerance. */ - bool determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const override; + bool areNeighborsSimilar(int64 point1, int64 point2) const override; private: const EBSDSegmentFeaturesInputValues* m_InputValues = nullptr; diff --git a/src/Plugins/OrientationAnalysis/test/CAxisSegmentFeaturesTest.cpp b/src/Plugins/OrientationAnalysis/test/CAxisSegmentFeaturesTest.cpp index 5a6e1a4681..1f67e223cb 100644 --- a/src/Plugins/OrientationAnalysis/test/CAxisSegmentFeaturesTest.cpp +++ b/src/Plugins/OrientationAnalysis/test/CAxisSegmentFeaturesTest.cpp @@ -5,11 +5,15 @@ #include "OrientationAnalysisTestUtils.hpp" #include "simplnx/Core/Application.hpp" +#include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" #include "simplnx/Parameters/ArrayCreationParameter.hpp" #include "simplnx/Parameters/Dream3dImportParameter.hpp" #include "simplnx/Parameters/GeometrySelectionParameter.hpp" #include "simplnx/UnitTest/UnitTestCommon.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" +#include #include namespace fs = std::filesystem; @@ -39,6 +43,12 @@ inline const DataPath k_FeatureIdsMaskAllPath = k_InputGeometryPath.createChildP TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures:Face", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") { + UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // segment_features_test_data: 3x144x144, Quats (float32, 4-comp) => 144*144*4*4 = 331,776 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 331776, true); + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); // Read Exemplar DREAM3D File Filter auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); @@ -94,6 +104,12 @@ TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures:Face", "[OrientationAnalysi TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures:All", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") { + UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // segment_features_test_data: 3x144x144, Quats (float32, 4-comp) => 144*144*4*4 = 331,776 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 331776, true); + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); // Read Exemplar DREAM3D File Filter auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); @@ -149,6 +165,12 @@ TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures:All", "[OrientationAnalysis TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures:MaskFace", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") { + UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // segment_features_test_data: 3x144x144, Quats (float32, 4-comp) => 144*144*4*4 = 331,776 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 331776, true); + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); // Read Exemplar DREAM3D File Filter auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); @@ -204,6 +226,12 @@ TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures:MaskFace", "[OrientationAna TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures:MaskAll", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") { + UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // segment_features_test_data: 3x144x144, Quats (float32, 4-comp) => 144*144*4*4 = 331,776 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 331776, true); + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); // Read Exemplar DREAM3D File Filter auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); @@ -256,3 +284,98 @@ TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures:MaskAll", "[OrientationAnal UnitTest::CheckArraysInheritTupleDims(dataStructure, SmallIn100::k_TupleCheckIgnoredPaths); } + +TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures: Benchmark 200x200x200", "[OrientationAnalysis][CAxisSegmentFeaturesFilter][Benchmark]") +{ + UnitTest::LoadPlugins(); + // 200x200x200, Quats float32 4-comp => 200*200*4*4 = 640,000 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 640000, true); + + constexpr usize k_DimX = 200; + constexpr usize k_DimY = 200; + constexpr usize k_DimZ = 200; + const ShapeType cellTupleShape = {k_DimZ, k_DimY, k_DimX}; + const auto benchmarkFile = fs::path(fmt::format("{}/caxis_segment_features_benchmark.dream3d", unit_test::k_BinaryTestOutputDir)); + + // Stage 1: Build data programmatically and write to .dream3d + { + DataStructure buildDS; + auto* imageGeom = ImageGeom::Create(buildDS, "DataContainer"); + imageGeom->setDimensions({k_DimX, k_DimY, k_DimZ}); + imageGeom->setSpacing({1.0f, 1.0f, 1.0f}); + imageGeom->setOrigin({0.0f, 0.0f, 0.0f}); + + auto* cellAM = AttributeMatrix::Create(buildDS, "CellData", cellTupleShape, imageGeom->getId()); + imageGeom->setCellData(*cellAM); + + // Create Quats array (float32, 4-component) with block grain pattern + auto* quatsArray = UnitTest::CreateTestDataArray(buildDS, "Quats", cellTupleShape, {4}, cellAM->getId()); + auto& quatsStore = quatsArray->getDataStoreRef(); + + // Create Phases array (int32, 1-component) - all phase 1 + auto* phasesArray = UnitTest::CreateTestDataArray(buildDS, "Phases", cellTupleShape, {1}, cellAM->getId()); + auto& phasesStore = phasesArray->getDataStoreRef(); + + // Fill quaternions: divide into 25-voxel blocks, each block gets a distinct orientation + constexpr usize k_BlockSize = 25; + for(usize z = 0; z < k_DimZ; z++) + { + for(usize y = 0; y < k_DimY; y++) + { + for(usize x = 0; x < k_DimX; x++) + { + const usize idx = z * k_DimX * k_DimY + y * k_DimX + x; + phasesStore[idx] = 1; + + usize bx = x / k_BlockSize; + usize by = y / k_BlockSize; + usize bz = z / k_BlockSize; + float angle = static_cast((bx * 73 + by * 137 + bz * 251) % 360) * (3.14159265f / 180.0f); + float halfAngle = angle * 0.5f; + quatsStore[idx * 4 + 0] = std::cos(halfAngle); + quatsStore[idx * 4 + 1] = 0.0f; + quatsStore[idx * 4 + 2] = 0.0f; + quatsStore[idx * 4 + 3] = std::sin(halfAngle); + } + } + } + + // Create CellEnsembleData with CrystalStructures + const ShapeType ensembleTupleShape = {2}; + auto* ensembleAM = AttributeMatrix::Create(buildDS, "CellEnsembleData", ensembleTupleShape, imageGeom->getId()); + auto* crystalStructsArray = UnitTest::CreateTestDataArray(buildDS, "CrystalStructures", ensembleTupleShape, {1}, ensembleAM->getId()); + auto& crystalStructsStore = crystalStructsArray->getDataStoreRef(); + crystalStructsStore[0] = 999; // Phase 0: Unknown + crystalStructsStore[1] = 0; // Phase 1: Hexagonal_High (required for CAxis) + + UnitTest::WriteTestDataStructure(buildDS, benchmarkFile); + } + + // Stage 2: Reload (arrays become ZarrStore in OOC) and run filter + DataStructure dataStructure = UnitTest::LoadDataStructure(benchmarkFile); + + { + CAxisSegmentFeaturesFilter filter; + Arguments args; + + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0F)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(0)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_UseMask_Key, std::make_any(false)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(DataPath{})); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(DataPath({"DataContainer"}))); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(DataPath({"DataContainer", "CellData", "Quats"}))); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(DataPath({"DataContainer", "CellData", "Phases"}))); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(DataPath({"DataContainer", "CellEnsembleData", "CrystalStructures"}))); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any("FeatureIds")); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any("Grain Data")); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any("Active")); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + } + + fs::remove(benchmarkFile); +} diff --git a/src/Plugins/OrientationAnalysis/test/EBSDSegmentFeaturesFilterTest.cpp b/src/Plugins/OrientationAnalysis/test/EBSDSegmentFeaturesFilterTest.cpp index 3765e2f6f1..568c8135cc 100644 --- a/src/Plugins/OrientationAnalysis/test/EBSDSegmentFeaturesFilterTest.cpp +++ b/src/Plugins/OrientationAnalysis/test/EBSDSegmentFeaturesFilterTest.cpp @@ -5,13 +5,17 @@ #include "OrientationAnalysisTestUtils.hpp" #include "simplnx/Core/Application.hpp" +#include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" #include "simplnx/Parameters/ArrayCreationParameter.hpp" #include "simplnx/Parameters/Dream3dImportParameter.hpp" #include "simplnx/Parameters/GeometrySelectionParameter.hpp" #include "simplnx/UnitTest/UnitTestCommon.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" #include +#include #include namespace fs = std::filesystem; @@ -43,6 +47,10 @@ inline const DataPath k_FeatureIdsMaskAllPath = k_InputGeometryPath.createChildP TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures:Face", "[OrientationAnalysis][EBSDSegmentFeatures]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // segment_features_test_data: 3x144x144, Quats (float32, 4-comp) => 144*144*4*4 = 331,776 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 331776, true); const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); // Read Exemplar DREAM3D File Filter @@ -100,12 +108,15 @@ TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures:Face", "[OrientationAnalysis TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures:All", "[OrientationAnalysis][EBSDSegmentFeatures]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // segment_features_test_data: 3x144x144, Quats (float32, 4-comp) => 144*144*4*4 = 331,776 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 331776, true); const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); // Read Exemplar DREAM3D File Filter auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); DataStructure dataStructure = UnitTest::LoadDataStructure(exemplarFilePath); - // EBSD Segment Features/Semgent Features (Misorientation) Filter { EBSDSegmentFeaturesFilter filter; @@ -157,6 +168,10 @@ TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures:All", "[OrientationAnalysis] TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures:MaskFace", "[OrientationAnalysis][EBSDSegmentFeatures]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // segment_features_test_data: 3x144x144, Quats (float32, 4-comp) => 144*144*4*4 = 331,776 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 331776, true); const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); // Read Exemplar DREAM3D File Filter @@ -214,6 +229,10 @@ TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures:MaskFace", "[OrientationAnal TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures:MaskAll", "[OrientationAnalysis][EBSDSegmentFeatures]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // segment_features_test_data: 3x144x144, Quats (float32, 4-comp) => 144*144*4*4 = 331,776 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 331776, true); const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); // Read Exemplar DREAM3D File Filter @@ -267,3 +286,98 @@ TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures:MaskAll", "[OrientationAnaly UnitTest::CheckArraysInheritTupleDims(dataStructure, SmallIn100::k_TupleCheckIgnoredPaths); } + +TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures: Benchmark 200x200x200", "[OrientationAnalysis][EBSDSegmentFeatures][Benchmark]") +{ + UnitTest::LoadPlugins(); + // 200x200x200, Quats float32 4-comp => 200*200*4*4 = 640,000 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 640000, true); + + constexpr usize k_DimX = 200; + constexpr usize k_DimY = 200; + constexpr usize k_DimZ = 200; + const ShapeType cellTupleShape = {k_DimZ, k_DimY, k_DimX}; + const auto benchmarkFile = fs::path(fmt::format("{}/ebsd_segment_features_benchmark.dream3d", unit_test::k_BinaryTestOutputDir)); + + // Stage 1: Build data programmatically and write to .dream3d + { + DataStructure buildDS; + auto* imageGeom = ImageGeom::Create(buildDS, "DataContainer"); + imageGeom->setDimensions({k_DimX, k_DimY, k_DimZ}); + imageGeom->setSpacing({1.0f, 1.0f, 1.0f}); + imageGeom->setOrigin({0.0f, 0.0f, 0.0f}); + + auto* cellAM = AttributeMatrix::Create(buildDS, "CellData", cellTupleShape, imageGeom->getId()); + imageGeom->setCellData(*cellAM); + + // Create Quats array (float32, 4-component) with block grain pattern + auto* quatsArray = UnitTest::CreateTestDataArray(buildDS, "Quats", cellTupleShape, {4}, cellAM->getId()); + auto& quatsStore = quatsArray->getDataStoreRef(); + + // Create Phases array (int32, 1-component) - all phase 1 + auto* phasesArray = UnitTest::CreateTestDataArray(buildDS, "Phases", cellTupleShape, {1}, cellAM->getId()); + auto& phasesStore = phasesArray->getDataStoreRef(); + + // Fill quaternions: divide into 25-voxel blocks, each block gets a distinct orientation + constexpr usize k_BlockSize = 25; + for(usize z = 0; z < k_DimZ; z++) + { + for(usize y = 0; y < k_DimY; y++) + { + for(usize x = 0; x < k_DimX; x++) + { + const usize idx = z * k_DimX * k_DimY + y * k_DimX + x; + phasesStore[idx] = 1; + + usize bx = x / k_BlockSize; + usize by = y / k_BlockSize; + usize bz = z / k_BlockSize; + float angle = static_cast((bx * 73 + by * 137 + bz * 251) % 360) * (3.14159265f / 180.0f); + float halfAngle = angle * 0.5f; + quatsStore[idx * 4 + 0] = std::cos(halfAngle); + quatsStore[idx * 4 + 1] = 0.0f; + quatsStore[idx * 4 + 2] = 0.0f; + quatsStore[idx * 4 + 3] = std::sin(halfAngle); + } + } + } + + // Create CellEnsembleData with CrystalStructures + const ShapeType ensembleTupleShape = {2}; + auto* ensembleAM = AttributeMatrix::Create(buildDS, "CellEnsembleData", ensembleTupleShape, imageGeom->getId()); + auto* crystalStructsArray = UnitTest::CreateTestDataArray(buildDS, "CrystalStructures", ensembleTupleShape, {1}, ensembleAM->getId()); + auto& crystalStructsStore = crystalStructsArray->getDataStoreRef(); + crystalStructsStore[0] = 999; // Phase 0: Unknown + crystalStructsStore[1] = 1; // Phase 1: Cubic_High + + UnitTest::WriteTestDataStructure(buildDS, benchmarkFile); + } + + // Stage 2: Reload (arrays become ZarrStore in OOC) and run filter + DataStructure dataStructure = UnitTest::LoadDataStructure(benchmarkFile); + + { + EBSDSegmentFeaturesFilter filter; + Arguments args; + + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0F)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(0)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_UseMask_Key, std::make_any(false)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(DataPath{})); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(DataPath({"DataContainer"}))); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(DataPath({"DataContainer", "CellData", "Quats"}))); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(DataPath({"DataContainer", "CellData", "Phases"}))); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(DataPath({"DataContainer", "CellEnsembleData", "CrystalStructures"}))); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any("FeatureIds")); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any("Grain Data")); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any("Active")); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + } + + fs::remove(benchmarkFile); +} diff --git a/src/Plugins/SimplnxCore/CMakeLists.txt b/src/Plugins/SimplnxCore/CMakeLists.txt index 8175d378fe..5227fdfc73 100644 --- a/src/Plugins/SimplnxCore/CMakeLists.txt +++ b/src/Plugins/SimplnxCore/CMakeLists.txt @@ -247,11 +247,15 @@ set(AlgorithmList ExtractVertexGeometry FeatureFaceCurvature FillBadData + FillBadDataBFS + FillBadDataCCL FindNRingNeighbors FlyingEdges3D HierarchicalSmooth IdentifyDuplicateVertices IdentifySample + IdentifySampleBFS + IdentifySampleCCL InitializeData InitializeImageGeomCellData InterpolatePointCloudToRegularGrid diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.cpp index 5cd59ae953..e9f4521ff0 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.cpp @@ -1,265 +1,15 @@ #include "FillBadData.hpp" -#include "simplnx/DataStructure/DataArray.hpp" -#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" -#include "simplnx/Utilities/DataGroupUtilities.hpp" -#include "simplnx/Utilities/FilterUtilities.hpp" -#include "simplnx/Utilities/MessageHelper.hpp" -#include "simplnx/Utilities/NeighborUtilities.hpp" +#include "FillBadDataBFS.hpp" +#include "FillBadDataCCL.hpp" -#include -#include +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" using namespace nx::core; -// ============================================================================= -// FillBadData Algorithm Overview -// ============================================================================= -// -// This file implements an optimized algorithm for filling bad data (voxels with -// FeatureId == 0) in image geometries. The algorithm handles out-of-core datasets -// efficiently by processing data in chunks and uses a four-phase approach: -// -// Phase 1: Chunk-Sequential Connected Component Labeling (CCL) -// - Process chunks sequentially, assigning provisional labels to bad data regions -// - Use Union-Find to track equivalences between labels across chunk boundaries -// - Track size of each connected component -// -// Phase 2: Global Resolution -// - Flatten Union-Find structure to resolve all equivalences -// - Accumulate region sizes to root labels -// -// Phase 3: Region Classification and Relabeling -// - Classify regions as "small" (below threshold) or "large" (above threshold) -// - Small regions: mark with -1 for filling in Phase 4 -// - Large regions: keep as 0 or assign to new phase (if requested) -// -// Phase 4: Iterative Morphological Fill -// - Iteratively fill -1 voxels by assigning them to the most common neighbor -// - Update all cell data arrays to match the filled voxels -// -// ============================================================================= - -namespace -{ -// ----------------------------------------------------------------------------- -// Helper function: Update data array tuples based on neighbor assignments -// ----------------------------------------------------------------------------- -// Copies data from neighbor voxels to fill bad data voxels (-1 values) -// This is used to propagate cell data attributes during the filling process -// -// @param featureIds The feature IDs array indicating which voxels are bad data -// @param outputDataStore The data array to update -// @param neighbors The neighbor assignments (index of the neighbor to copy from) -template -void FillBadDataUpdateTuples(const Int32AbstractDataStore& featureIds, AbstractDataStore& outputDataStore, const std::vector& neighbors) -{ - usize start = 0; - usize stop = outputDataStore.getNumberOfTuples(); - const usize numComponents = outputDataStore.getNumberOfComponents(); - - // Loop through all tuples in the data array - for(usize tupleIndex = start; tupleIndex < stop; tupleIndex++) - { - const int32 featureName = featureIds[tupleIndex]; - const int32 neighbor = neighbors[tupleIndex]; - - // Skip if no neighbor assignment - if(neighbor == tupleIndex) - { - continue; - } - - // Copy data from the valid neighbor to bad data voxel - // Only copy if the current voxel is bad data (-1) and the neighbor is valid (>0) - if(featureName < 0 && neighbor != -1 && featureIds[static_cast(neighbor)] > 0) - { - // Copy all components from neighbor tuple to current tuple - for(usize i = 0; i < numComponents; i++) - { - auto value = outputDataStore[neighbor * numComponents + i]; - outputDataStore[tupleIndex * numComponents + i] = value; - } - } - } -} - -// ----------------------------------------------------------------------------- -// Functor for type-dispatched tuple updates -// ----------------------------------------------------------------------------- -// Allows the FillBadDataUpdateTuples function to be called with runtime type dispatch -struct FillBadDataUpdateTuplesFunctor -{ - template - void operator()(const Int32AbstractDataStore& featureIds, IDataArray* outputIDataArray, const std::vector& neighbors) - { - auto& outputStore = outputIDataArray->template getIDataStoreRefAs>(); - FillBadDataUpdateTuples(featureIds, outputStore, neighbors); - } -}; -} // namespace - -// ============================================================================= -// ChunkAwareUnionFind Implementation -// ============================================================================= -// -// A Union-Find (Disjoint Set) data structure optimized for tracking connected -// component equivalences during chunk-sequential processing. Uses union-by-rank -// for efficient merging and defers path compression to a single flatten() pass -// to avoid redundant updates during construction. -// -// Key features: -// - Lazily creates entries as labels are encountered -// - Tracks rank for balanced union operations -// - Accumulates sizes at each label (not root) during construction -// - Single-pass path compression and size accumulation in flatten() -// ============================================================================= - -// ----------------------------------------------------------------------------- -// Find the root representative of a label's equivalence class -// ----------------------------------------------------------------------------- -// This performs a simple root lookup without path compression. Path compression -// is deferred to the flatten() method to avoid wasting cycles updating paths -// that will be modified again during later merges. -// -// @param x The label to find the root for -// @return The root label of the equivalence class -int64 ChunkAwareUnionFind::find(int64 x) -{ - // Create a parent entry if it doesn't exist (lazy initialization) - if(!m_Parent.contains(x)) - { - m_Parent[x] = x; - m_Rank[x] = 0; - m_Size[x] = 0; - } - - // Find root iteratively without using the path compression algorithm - // Path compression is deferred to flatten() to avoid wasting cycles - // during frequent merges where paths would be updated repeatedly - int64 root = x; - while(m_Parent[root] != root) - { - root = m_Parent[root]; - } - - return root; -} - -// ----------------------------------------------------------------------------- -// Unite two labels into the same equivalence class -// ----------------------------------------------------------------------------- -// Merges the sets containing labels a and b using union-by-rank heuristic. -// This keeps the tree balanced for better performance. -// -// @param a First label -// @param b Second label -void ChunkAwareUnionFind::unite(int64 a, int64 b) -{ - int64 rootA = find(a); - int64 rootB = find(b); - - // Already in the same set - if(rootA == rootB) - { - return; - } - - // Union by rank: attach the smaller tree object under the root of the larger tree - // This keeps the tree height logarithmic for better find() performance - if(m_Rank[rootA] < m_Rank[rootB]) - { - m_Parent[rootA] = rootB; - } - else if(m_Rank[rootA] > m_Rank[rootB]) - { - m_Parent[rootB] = rootA; - } - else - { - // Equal rank: arbitrarily choose rootA as the parent and increment its rank - m_Parent[rootB] = rootA; - m_Rank[rootA]++; - } -} - -// ----------------------------------------------------------------------------- -// Add voxel count to a label's size -// ----------------------------------------------------------------------------- -// During construction, sizes are accumulated at each label (not root). -// This allows concurrent size updates without needing to find roots. -// All sizes will be accumulated to roots during flatten(). -// -// @param label The label to add size to -// @param count Number of voxels to add -void ChunkAwareUnionFind::addSize(int64 label, uint64 count) -{ - // Add size to the label itself, not the root - // Sizes will be accumulated to roots during flatten() - m_Size[label] += count; -} - // ----------------------------------------------------------------------------- -// Get the total size of a label's equivalence class -// ----------------------------------------------------------------------------- -// Returns the accumulated size for a label's root. Should only be called -// after flatten() has been executed to get accurate totals. -// -// @param label The label to query -// @return Total number of voxels in the equivalence class -uint64 ChunkAwareUnionFind::getSize(int64 label) -{ - int64 root = find(label); - auto it = m_Size.find(root); - if(it == m_Size.end()) - { - return 0; - } - return it->second; -} - -// ----------------------------------------------------------------------------- -// Flatten the Union-Find structure with path compression -// ----------------------------------------------------------------------------- -// Performs a single-pass path compression and size accumulation after all -// merges are complete. This is more efficient than doing path compression -// during every find() operation when there are frequent merges. -// -// After flatten(): -// - Every label points directly to its root (fully compressed paths) -// - All sizes are accumulated at root labels -// - Subsequent find() and getSize() operations are O(1) -void ChunkAwareUnionFind::flatten() -{ - // First pass: flatten all parents with path compression - // Make every label point directly to its root for O(1) lookups - // This is done in a single pass after all merges to avoid wasting - // cycles updating paths repeatedly during construction - std::unordered_map finalRoots; - for(auto& [label, parent] : m_Parent) - { - int64 root = find(label); - finalRoots[label] = root; - } - - // Second pass: accumulate sizes to roots - // Sum up all the sizes from individual labels to their root representatives - std::unordered_map rootSizes; - for(const auto& [label, root] : finalRoots) - { - rootSizes[root] += m_Size[label]; - } - - // Replace maps with flattened versions for O(1) access - m_Parent = finalRoots; - m_Size = rootSizes; -} - -// ============================================================================= -// FillBadData Implementation -// ============================================================================= - -FillBadData::FillBadData(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, FillBadDataInputValues* inputValues) +FillBadData::FillBadData(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const FillBadDataInputValues* inputValues) : m_DataStructure(dataStructure) , m_InputValues(inputValues) , m_ShouldCancel(shouldCancel) @@ -271,493 +21,9 @@ FillBadData::FillBadData(DataStructure& dataStructure, const IFilter::MessageHan FillBadData::~FillBadData() noexcept = default; // ----------------------------------------------------------------------------- -const std::atomic_bool& FillBadData::getCancel() const -{ - return m_ShouldCancel; -} - -// ============================================================================= -// PHASE 1: Chunk-Sequential Connected Component Labeling (CCL) -// ============================================================================= -// -// Performs connected component labeling on bad data voxels (FeatureId == 0) -// using a chunk-sequential scanline algorithm. This approach is optimized for -// out-of-core datasets where data is stored in chunks on the disk. -// -// Algorithm: -// 1. Process chunks sequentially, loading one chunk at a time -// 2. For each bad data voxel, check already-processed neighbors (-X, -Y, -Z) -// 3. If neighbors exist, reuse their label; otherwise assign new label -// 4. Track label equivalences in Union-Find structure -// 5. Track size of each connected component -// -// The scanline order ensures we only need to check 3 neighbors (previous in -// X, Y, and Z directions) instead of all 6 face neighbors, because later -// neighbors haven't been processed yet. -// -// @param featureIdsStore The feature IDs data store (maybe out-of-core) -// @param unionFind Union-Find structure for tracking label equivalences -// @param provisionalLabels Map from voxel index to assigned provisional label -// @param dims Image dimensions [X, Y, Z] -// ============================================================================= -void FillBadData::phaseOneCCL(Int32AbstractDataStore& featureIdsStore, ChunkAwareUnionFind& unionFind, std::unordered_map& provisionalLabels, const std::array& dims) -{ - // Use negative labels for bad data regions to distinguish from positive feature IDs - int64 nextLabel = -1; - - const uint64 numChunks = featureIdsStore.getNumberOfChunks(); - - // Process each chunk sequentially (load, process, unload) - for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) - { - // Load the current chunk into memory - featureIdsStore.loadChunk(chunkIdx); - - // Get chunk bounds (INCLUSIVE ranges in [Z, Y, X] order) - const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); - const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); - - // Process voxels in this chunk using scanline algorithm - // Iterate in Z-Y-X order (slowest to fastest) to maintain scanline consistency - // Note: chunk bounds are INCLUSIVE and in [Z, Y, X] order (slowest to fastest) - for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) - { - for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) - { - for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) - { - // Calculate linear index for current voxel - const usize index = z * dims[0] * dims[1] + y * dims[0] + x; - - // Only process bad data voxels (FeatureId == 0) - // Skip valid feature voxels (FeatureId > 0) - if(featureIdsStore[index] != 0) - { - continue; - } - - // Check already-processed neighbors (scanline order: -Z, -Y, -X) - // We only check "backward" neighbors because "forward" neighbors - // haven't been processed yet in the scanline order - std::vector neighborLabels; - - // Check -X neighbor - if(x > 0) - { - const usize neighborIdx = index - 1; - if(provisionalLabels.contains(neighborIdx) && featureIdsStore[neighborIdx] == 0) - { - neighborLabels.push_back(provisionalLabels[neighborIdx]); - } - } - - // Check -Y neighbor - if(y > 0) - { - const usize neighborIdx = index - dims[0]; - if(provisionalLabels.contains(neighborIdx) && featureIdsStore[neighborIdx] == 0) - { - neighborLabels.push_back(provisionalLabels[neighborIdx]); - } - } - - // Check -Z neighbor - if(z > 0) - { - const usize neighborIdx = index - dims[0] * dims[1]; - if(provisionalLabels.contains(neighborIdx) && featureIdsStore[neighborIdx] == 0) - { - neighborLabels.push_back(provisionalLabels[neighborIdx]); - } - } - - // Assign label based on neighbors - int64 assignedLabel; - if(neighborLabels.empty()) - { - // No labeled neighbors found - this is a new connected component - // Assign a new negative label and initialize in union-find - assignedLabel = nextLabel--; - unionFind.find(assignedLabel); // Initialize in union-find (creates entry) - } - else - { - // One or more labeled neighbors found - join their equivalence class - // Use the first neighbor's label as the representative - assignedLabel = neighborLabels[0]; - - // If multiple neighbors have different labels, unite them - // This handles the case where different regions merge at this voxel - for(usize i = 1; i < neighborLabels.size(); i++) - { - if(neighborLabels[i] != assignedLabel) - { - unionFind.unite(assignedLabel, neighborLabels[i]); - } - } - } - - // Store the assigned label for this voxel - provisionalLabels[index] = assignedLabel; - - // Increment the size count for this label (will be accumulated to root in flatten()) - unionFind.addSize(assignedLabel, 1); - } - } - } - } - - // Flush to ensure all chunks are written back to storage - featureIdsStore.flush(); -} - -// ============================================================================= -// PHASE 2: Global Resolution of Equivalences -// ============================================================================= -// -// Resolves all label equivalences from Phase 1 and accumulates region sizes. -// After this phase: -// - All labels point directly to their root representatives -// - All sizes are accumulated at root labels -// - Region sizes can be queried in O(1) time -// -// @param unionFind Union-Find structure containing label equivalences -// @param smallRegions Unused in current implementation (kept for interface compatibility) -// ============================================================================= -void FillBadData::phaseTwoGlobalResolution(ChunkAwareUnionFind& unionFind, std::unordered_set& smallRegions) -{ - // Flatten the union-find structure to: - // 1. Compress all paths (make every label point directly to root) - // 2. Accumulate all sizes to root labels - unionFind.flatten(); -} - -// ============================================================================= -// PHASE 3: Region Classification and Relabeling -// ============================================================================= -// -// Classifies bad data regions as "small" or "large" based on size threshold: -// - Small regions (< minAllowedDefectSize): marked with -1 for filling in Phase 4 -// - Large regions (>= minAllowedDefectSize): kept as 0 (or assigned new phase) -// -// This phase processes chunks to relabel voxels based on their region classification. -// Large regions may optionally be assigned to a new phase (if storeAsNewPhase is true). -// -// @param featureIdsStore The feature IDs data store -// @param cellPhasesPtr Cell phases array (maybe null) -// @param provisionalLabels Map from voxel index to provisional label (from Phase 1) -// @param smallRegions Unused in current implementation (kept for interface compatibility) -// @param unionFind Union-Find structure with resolved equivalences (from Phase 2) -// @param maxPhase Maximum existing phase value (for new phase assignment) -// ============================================================================= -void FillBadData::phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, Int32Array* cellPhasesPtr, const std::unordered_map& provisionalLabels, - const std::unordered_set& smallRegions, ChunkAwareUnionFind& unionFind, usize maxPhase) const -{ - const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); - const SizeVec3 udims = selectedImageGeom.getDimensions(); - const uint64 numChunks = featureIdsStore.getNumberOfChunks(); - - // Collect all unique root labels and their sizes - // After flatten(), all labels point to roots and sizes are accumulated - std::unordered_map rootSizes; - for(const auto& [index, label] : provisionalLabels) - { - int64 root = unionFind.find(label); - if(!rootSizes.contains(root)) - { - rootSizes[root] = unionFind.getSize(root); - } - } - - // Classify regions as small (need filling) or large (keep or assign to a new phase) - std::unordered_set localSmallRegions; - for(const auto& [root, size] : rootSizes) - { - if(static_cast(size) < m_InputValues->minAllowedDefectSizeValue) - { - localSmallRegions.insert(root); - } - } - - // Process each chunk to relabel voxels based on region classification - for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) - { - // Load chunk into memory - featureIdsStore.loadChunk(chunkIdx); - - // Get chunk bounds (INCLUSIVE ranges in [Z, Y, X] order) - const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); - const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); - - // Iterate through all voxels in this chunk - // Note: chunk bounds are INCLUSIVE and in [Z, Y, X] order (slowest to fastest) - for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) - { - for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) - { - for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) - { - const usize index = z * udims[0] * udims[1] + y * udims[0] + x; - - // Check if this voxel was labeled as bad data in Phase 1 - auto labelIter = provisionalLabels.find(index); - if(labelIter != provisionalLabels.end()) - { - // Find the root label for this voxel's connected component - int64 root = unionFind.find(labelIter->second); - - if(localSmallRegions.contains(root)) - { - // Small region - mark with -1 for filling in Phase 4 - featureIdsStore[index] = -1; - } - else - { - // Large region - keep as bad data (0) or assign to a new phase - featureIdsStore[index] = 0; - - // Optionally assign large bad data regions to a new phase - if(m_InputValues->storeAsNewPhase && cellPhasesPtr != nullptr) - { - (*cellPhasesPtr)[index] = static_cast(maxPhase) + 1; - } - } - } - } - } - } - } - - // Write all chunks back to storage - featureIdsStore.flush(); -} - -// ============================================================================= -// PHASE 4: Iterative Morphological Fill -// ============================================================================= -// -// Fills small bad data regions (marked with -1 in Phase 3) using iterative -// morphological dilation. Each iteration: -// 1. For each -1 voxel, find the most common positive feature among its neighbors -// 2. Assign that voxel to the most common neighbor's feature -// 3. Update all cell data arrays to match the filled voxels -// -// This process repeats until all -1 voxels have been filled. The algorithm -// gradually fills small defects from the edges inward, ensuring smooth boundaries. -// -// @param featureIdsStore The feature IDs data store -// @param dims Image dimensions [X, Y, Z] -// @param numFeatures Number of features in the dataset -// ============================================================================= -void FillBadData::phaseFourIterativeFill(Int32AbstractDataStore& featureIdsStore, const std::array& dims, usize numFeatures) const -{ - const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); - const usize totalPoints = featureIdsStore.getNumberOfTuples(); - - std::array neighborVoxelIndexOffsets = initializeFaceNeighborOffsets(dims); - std::array faceNeighborInternalIdx = initializeFaceNeighborInternalIdx(); - - // Neighbor assignment array: neighbors[i] = index of the neighbor to copy from - std::vector neighbors(totalPoints, -1); - - // Feature vote counter: tracks how many times each feature appears as the neighbor - std::vector featureNumber(numFeatures + 1, 0); - - // Get a list of all cell arrays that need to be updated during filling - // Exclude arrays specified in ignoredDataArrayPaths - std::optional> allChildArrays = GetAllChildDataPaths(m_DataStructure, selectedImageGeom.getCellDataPath(), DataObject::Type::DataArray, m_InputValues->ignoredDataArrayPaths); - std::vector voxelArrayNames; - if(allChildArrays.has_value()) - { - voxelArrayNames = allChildArrays.value(); - } - - // Create a message helper for throttled progress updates (1 update per second) - MessageHelper messageHelper(m_MessageHandler, std::chrono::milliseconds(1000)); - auto throttledMessenger = messageHelper.createThrottledMessenger(std::chrono::milliseconds(1000)); - - usize count = 1; // Number of voxels with -1 value that remain - usize iteration = 0; // Current iteration number - - // Iteratively fill until no voxels with -1 value remain - while(count != 0) - { - iteration++; - count = 0; // Reset count of voxels with a -1 value for this iteration - - // Pass 1: Determine neighbor assignments for all -1 voxels - // For each -1 voxel, find the most common positive feature among neighbors - for(int64 voxelIndex = 0; voxelIndex < totalPoints; voxelIndex++) - { - int32 featureName = featureIdsStore[voxelIndex]; - - // Only process voxels marked for filling (-1) - if(featureName < 0) - { - count++; // Count this voxel as needing filling - int32 most = 0; // Highest vote count seen so far - - // Compute 3D position from the linear index - int64 xIdx = voxelIndex % dims[0]; - int64 yIdx = (voxelIndex / dims[0]) % dims[1]; - int64 zIdx = voxelIndex / (dims[0] * dims[1]); - - // Vote for the most common positive neighbor feature - // Loop over the 6 face neighbors of the voxel - std::array isValidFaceNeighbor = computeValidFaceNeighbors(xIdx, yIdx, zIdx, dims); - for(const auto& faceIndex : faceNeighborInternalIdx) - { - // Skip neighbors outside image bounds - if(!isValidFaceNeighbor[faceIndex]) - { - continue; - } - - auto neighborPoint = voxelIndex + neighborVoxelIndexOffsets[faceIndex]; - int32 feature = featureIdsStore[neighborPoint]; - - // Only vote for positive features (valid data) - if(feature > 0) - { - // Increment vote count for this feature - featureNumber[feature]++; - int32 current = featureNumber[feature]; - - // Track the feature with the most votes - if(current > most) - { - most = current; - neighbors[voxelIndex] = static_cast(neighborPoint); // Store neighbor to copy from - } - } - } - - // Reset vote counters for next voxel - // Only reset features that were actually counted to save time - // Loop over the 6 face neighbors of the voxel - isValidFaceNeighbor = computeValidFaceNeighbors(xIdx, yIdx, zIdx, dims); - for(const auto& faceIndex : faceNeighborInternalIdx) - { - if(!isValidFaceNeighbor[faceIndex]) - { - continue; - } - - int64 neighborPoint = voxelIndex + neighborVoxelIndexOffsets[faceIndex]; - int32 feature = featureIdsStore[neighborPoint]; - - if(feature > 0) - { - featureNumber[feature] = 0; - } - } - } - } - - // Pass 2: Update all cell data arrays based on neighbor assignments - // This propagates all cell data attributes (not just feature IDs) to filled voxels - for(const auto& cellArrayPath : voxelArrayNames) - { - // Skip the feature IDs array (will be updated separately below) - if(cellArrayPath == m_InputValues->featureIdsArrayPath) - { - continue; - } - - auto* oldCellArray = m_DataStructure.getDataAs(cellArrayPath); - - // Use the type-dispatched update function to handle all data types - ExecuteDataFunction(FillBadDataUpdateTuplesFunctor{}, oldCellArray->getDataType(), featureIdsStore, oldCellArray, neighbors); - } - - // Update FeatureIds array last to finalize the iteration - FillBadDataUpdateTuples(featureIdsStore, featureIdsStore, neighbors); - - // Send throttled progress update (max 1 per second) - throttledMessenger.sendThrottledMessage([iteration, count]() { return fmt::format(" Iteration {}: {} voxels remaining to fill", iteration, count); }); - } - - // Send final completion summary - m_MessageHandler({IFilter::Message::Type::Info, fmt::format(" Completed in {} iteration{}", iteration, iteration == 1 ? "" : "s")}); -} - -// ============================================================================= -// Main Algorithm Entry Point -// ============================================================================= -// -// Executes the four-phase bad data filling algorithm: -// 1. Chunk-Sequential CCL: Label connected components of bad data -// 2. Global Resolution: Resolve equivalences and accumulate sizes -// 3. Region Classification: Classify regions as small or large -// 4. Iterative Fill: Fill small regions using morphological dilation -// -// @return Result indicating success or failure -// ============================================================================= -Result<> FillBadData::operator()() const +Result<> FillBadData::operator()() { - // Get feature IDs array and image geometry - auto& featureIdsStore = m_DataStructure.getDataAs(m_InputValues->featureIdsArrayPath)->getDataStoreRef(); - const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); - const SizeVec3 udims = selectedImageGeom.getDimensions(); - - // Convert dimensions to signed integers for offset calculations - std::array dims = { - static_cast(udims[0]), - static_cast(udims[1]), - static_cast(udims[2]), - }; - - const usize totalPoints = featureIdsStore.getNumberOfTuples(); - - // Get cell phases array if we need to assign large regions to a new phase - Int32Array* cellPhasesPtr = nullptr; - usize maxPhase = 0; - - if(m_InputValues->storeAsNewPhase) - { - cellPhasesPtr = m_DataStructure.getDataAs(m_InputValues->cellPhasesArrayPath); - - // Find the maximum existing phase value - for(usize i = 0; i < totalPoints; i++) - { - if((*cellPhasesPtr)[i] > maxPhase) - { - maxPhase = (*cellPhasesPtr)[i]; - } - } - } - - // Count the number of existing features for array sizing - usize numFeatures = 0; - for(usize i = 0; i < totalPoints; i++) - { - int32 featureName = featureIdsStore[i]; - if(featureName > numFeatures) - { - numFeatures = featureName; - } - } - - // Initialize data structures for chunk-aware connected component labeling - ChunkAwareUnionFind unionFind; // Tracks label equivalences and sizes - std::unordered_map provisionalLabels; // Maps voxel index to provisional label - std::unordered_set smallRegions; // Set of small region roots (unused currently) - - // Phase 1: Chunk-Sequential Connected Component Labeling - m_MessageHandler({IFilter::Message::Type::Info, "Phase 1/4: Labeling connected components..."}); - phaseOneCCL(featureIdsStore, unionFind, provisionalLabels, dims); - - // Phase 2: Global Resolution of equivalences - m_MessageHandler({IFilter::Message::Type::Info, "Phase 2/4: Resolving region equivalences..."}); - phaseTwoGlobalResolution(unionFind, smallRegions); - - // Phase 3: Relabeling based on region size classification - m_MessageHandler({IFilter::Message::Type::Info, "Phase 3/4: Classifying region sizes..."}); - phaseThreeRelabeling(featureIdsStore, cellPhasesPtr, provisionalLabels, smallRegions, unionFind, maxPhase); - - // Phase 4: Iterative morphological fill - m_MessageHandler({IFilter::Message::Type::Info, "Phase 4/4: Filling small defects..."}); - phaseFourIterativeFill(featureIdsStore, dims, numFeatures); + auto* featureIdsArray = m_DataStructure.getDataAs(m_InputValues->featureIdsArrayPath); - return {}; + return DispatchAlgorithm({featureIdsArray}, m_DataStructure, m_MessageHandler, m_ShouldCancel, m_InputValues); } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.hpp index 1e994f2948..db21d91419 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.hpp @@ -1,4 +1,3 @@ - #pragma once #include "SimplnxCore/SimplnxCore_export.hpp" @@ -7,71 +6,11 @@ #include "simplnx/DataStructure/DataStructure.hpp" #include "simplnx/Filter/IFilter.hpp" -#include -#include #include namespace nx::core { -// Forward declarations -template -class DataArray; -using Int32Array = DataArray; - -template -class AbstractDataStore; -using Int32AbstractDataStore = AbstractDataStore; - -/** - * @class ChunkAwareUnionFind - * @brief Union-Find data structure for tracking connected component equivalences across chunks - */ -class SIMPLNXCORE_EXPORT ChunkAwareUnionFind -{ -public: - ChunkAwareUnionFind() = default; - ~ChunkAwareUnionFind() = default; - - /** - * @brief Find the root label with path compression - * @param x Label to find - * @return Root label - */ - int64 find(int64 x); - - /** - * @brief Unite two labels into the same equivalence class - * @param a First label - * @param b Second label - */ - void unite(int64 a, int64 b); - - /** - * @brief Add to the size count for a label - * @param label Label to update - * @param count Number of voxels to add - */ - void addSize(int64 label, uint64 count); - - /** - * @brief Get the total size of a label's equivalence class - * @param label Label to query - * @return Total number of voxels in the equivalence class - */ - uint64 getSize(int64 label); - - /** - * @brief Flatten the union-find structure and sum sizes to roots - */ - void flatten(); - -private: - std::unordered_map m_Parent; - std::unordered_map m_Rank; - std::unordered_map m_Size; -}; - struct SIMPLNXCORE_EXPORT FillBadDataInputValues { int32 minAllowedDefectSizeValue; @@ -84,12 +23,23 @@ struct SIMPLNXCORE_EXPORT FillBadDataInputValues /** * @class FillBadData - + * @brief Dispatcher that selects between BFS (in-core) and CCL (out-of-core) algorithms. + * + * @see FillBadDataBFS for the in-core-optimized implementation. + * @see FillBadDataCCL for the out-of-core-optimized implementation. + * @see AlgorithmDispatch.hpp for the dispatch mechanism. */ class SIMPLNXCORE_EXPORT FillBadData { public: - FillBadData(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, FillBadDataInputValues* inputValues); + /** + * @brief Constructs the dispatcher with the required context for algorithm selection. + * @param dataStructure The data structure containing the arrays to process. + * @param mesgHandler Handler for progress and informational messages. + * @param shouldCancel Cancellation flag checked during execution. + * @param inputValues Filter parameter values controlling fill behavior. + */ + FillBadData(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const FillBadDataInputValues* inputValues); ~FillBadData() noexcept; FillBadData(const FillBadData&) = delete; @@ -97,47 +47,13 @@ class SIMPLNXCORE_EXPORT FillBadData FillBadData& operator=(const FillBadData&) = delete; FillBadData& operator=(FillBadData&&) noexcept = delete; - Result<> operator()() const; - - const std::atomic_bool& getCancel() const; - -private: /** - * @brief Phase 1: Chunk-sequential connected component labeling - * @param featureIdsStore Feature IDs data store - * @param unionFind Union-find structure for tracking equivalences - * @param provisionalLabels Map from voxel index to provisional label - * @param dims Image geometry dimensions + * @brief Dispatches to either BFS or CCL algorithm based on data residency. + * @return Result indicating success or an error with a descriptive message. */ - static void phaseOneCCL(Int32AbstractDataStore& featureIdsStore, ChunkAwareUnionFind& unionFind, std::unordered_map& provisionalLabels, const std::array& dims); - - /** - * @brief Phase 2: Global resolution of equivalences and region classification - * @param unionFind Union-find structure to flatten - * @param smallRegions Output set of labels for small regions that need filling - */ - static void phaseTwoGlobalResolution(ChunkAwareUnionFind& unionFind, std::unordered_set& smallRegions); - - /** - * @brief Phase 3: Relabel voxels based on region classification - * @param featureIdsStore Feature IDs data store - * @param cellPhasesPtr Cell phases array (could be null) - * @param provisionalLabels Map from voxel index to provisional label - * @param smallRegions Set of labels for small regions - * @param unionFind Union-find for looking up equivalences - * @param maxPhase Maximum phase value (for new phase assignment) - */ - void phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, Int32Array* cellPhasesPtr, const std::unordered_map& provisionalLabels, - const std::unordered_set& smallRegions, ChunkAwareUnionFind& unionFind, size_t maxPhase) const; - - /** - * @brief Phase 4: Iterative morphological fill - * @param featureIdsStore Feature IDs data store - * @param dims Image geometry dimensions - * @param numFeatures Number of features - */ - void phaseFourIterativeFill(Int32AbstractDataStore& featureIdsStore, const std::array& dims, size_t numFeatures) const; + Result<> operator()(); +private: DataStructure& m_DataStructure; const FillBadDataInputValues* m_InputValues = nullptr; const std::atomic_bool& m_ShouldCancel; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataBFS.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataBFS.cpp new file mode 100644 index 0000000000..ed42dcc247 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataBFS.cpp @@ -0,0 +1,307 @@ +#include "FillBadDataBFS.hpp" + +#include "FillBadData.hpp" + +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Utilities/DataGroupUtilities.hpp" +#include "simplnx/Utilities/FilterUtilities.hpp" + +using namespace nx::core; + +namespace +{ +template +void FillBadDataUpdateTuples(const Int32AbstractDataStore& featureIds, AbstractDataStore& outputDataStore, const std::vector& neighbors) +{ + usize start = 0; + usize stop = outputDataStore.getNumberOfTuples(); + const usize numComponents = outputDataStore.getNumberOfComponents(); + for(usize tupleIndex = start; tupleIndex < stop; tupleIndex++) + { + const int32 featureName = featureIds[tupleIndex]; + const int32 neighbor = neighbors[tupleIndex]; + if(neighbor == tupleIndex) + { + continue; + } + + if(featureName < 0 && neighbor != -1 && featureIds[static_cast(neighbor)] > 0) + { + for(usize i = 0; i < numComponents; i++) + { + auto value = outputDataStore[neighbor * numComponents + i]; + outputDataStore[tupleIndex * numComponents + i] = value; + } + } + } +} + +struct FillBadDataUpdateTuplesFunctor +{ + template + void operator()(const Int32AbstractDataStore& featureIds, IDataArray* outputIDataArray, const std::vector& neighbors) + { + auto& outputStore = outputIDataArray->template getIDataStoreRefAs>(); + FillBadDataUpdateTuples(featureIds, outputStore, neighbors); + } +}; +} // namespace + +// ============================================================================= +FillBadDataBFS::FillBadDataBFS(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const FillBadDataInputValues* inputValues) +: m_DataStructure(dataStructure) +, m_InputValues(inputValues) +, m_ShouldCancel(shouldCancel) +, m_MessageHandler(mesgHandler) +{ +} + +// ----------------------------------------------------------------------------- +FillBadDataBFS::~FillBadDataBFS() noexcept = default; + +// ============================================================================= +Result<> FillBadDataBFS::operator()() +{ + auto& featureIdsStore = m_DataStructure.getDataAs(m_InputValues->featureIdsArrayPath)->getDataStoreRef(); + const size_t totalPoints = featureIdsStore.getNumberOfTuples(); + + std::vector neighbors(totalPoints, -1); + std::vector alreadyChecked(totalPoints, false); + + const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); + const SizeVec3 udims = selectedImageGeom.getDimensions(); + + Int32Array* cellPhasesPtr = nullptr; + + if(m_InputValues->storeAsNewPhase) + { + cellPhasesPtr = m_DataStructure.getDataAs(m_InputValues->cellPhasesArrayPath); + } + + std::array dims = { + static_cast(udims[0]), + static_cast(udims[1]), + static_cast(udims[2]), + }; + + size_t count = 1; + size_t numFeatures = 0; + size_t maxPhase = 0; + + for(size_t i = 0; i < totalPoints; i++) + { + int32 featureName = featureIdsStore[i]; + if(featureName > numFeatures) + { + numFeatures = featureName; + } + } + + if(m_InputValues->storeAsNewPhase) + { + for(size_t i = 0; i < totalPoints; i++) + { + if((*cellPhasesPtr)[i] > maxPhase) + { + maxPhase = (*cellPhasesPtr)[i]; + } + } + } + + std::array neighborPoints = {-dims[0] * dims[1], -dims[0], -1, 1, dims[0], dims[0] * dims[1]}; + std::vector currentVisitedList; + + for(size_t iter = 0; iter < totalPoints; iter++) + { + alreadyChecked[iter] = false; + if(featureIdsStore[iter] != 0) + { + alreadyChecked[iter] = true; + } + } + + for(size_t i = 0; i < totalPoints; i++) + { + if(!alreadyChecked[i] && featureIdsStore[i] == 0) + { + currentVisitedList.push_back(static_cast(i)); + count = 0; + while(count < currentVisitedList.size()) + { + int64_t index = currentVisitedList[count]; + int64 column = index % dims[0]; + int64 row = (index / dims[0]) % dims[1]; + int64 plane = index / (dims[0] * dims[1]); + for(int32_t j = 0; j < 6; j++) + { + int64_t neighbor = index + neighborPoints[j]; + if(j == 0 && plane == 0) + { + continue; + } + if(j == 5 && plane == (dims[2] - 1)) + { + continue; + } + if(j == 1 && row == 0) + { + continue; + } + if(j == 4 && row == (dims[1] - 1)) + { + continue; + } + if(j == 2 && column == 0) + { + continue; + } + if(j == 3 && column == (dims[0] - 1)) + { + continue; + } + if(featureIdsStore[neighbor] == 0 && !alreadyChecked[neighbor]) + { + currentVisitedList.push_back(neighbor); + alreadyChecked[neighbor] = true; + } + } + count++; + } + if((int32_t)currentVisitedList.size() >= m_InputValues->minAllowedDefectSizeValue) + { + for(const auto& currentIndex : currentVisitedList) + { + featureIdsStore[currentIndex] = 0; + if(m_InputValues->storeAsNewPhase) + { + (*cellPhasesPtr)[currentIndex] = static_cast(maxPhase) + 1; + } + } + } + if((int32_t)currentVisitedList.size() < m_InputValues->minAllowedDefectSizeValue) + { + for(const auto& currentIndex : currentVisitedList) + { + featureIdsStore[currentIndex] = -1; + } + } + currentVisitedList.clear(); + } + } + + std::vector featureNumber(numFeatures + 1, 0); + + std::optional> allChildArrays = GetAllChildDataPaths(m_DataStructure, selectedImageGeom.getCellDataPath(), DataObject::Type::DataArray, m_InputValues->ignoredDataArrayPaths); + std::vector voxelArrayNames; + if(allChildArrays.has_value()) + { + voxelArrayNames = allChildArrays.value(); + } + + while(count != 0) + { + count = 0; + for(size_t i = 0; i < totalPoints; i++) + { + int32 featureName = featureIdsStore[i]; + if(featureName < 0) + { + count++; + int32 most = 0; + int64 xIndex = static_cast(i % dims[0]); + int64 yIndex = static_cast((i / dims[0]) % dims[1]); + int64 zIndex = static_cast(i / (dims[0] * dims[1])); + for(int32_t j = 0; j < 6; j++) + { + auto neighborPoint = static_cast(i + neighborPoints[j]); + if(j == 0 && zIndex == 0) + { + continue; + } + if(j == 5 && zIndex == (dims[2] - 1)) + { + continue; + } + if(j == 1 && yIndex == 0) + { + continue; + } + if(j == 4 && yIndex == (dims[1] - 1)) + { + continue; + } + if(j == 2 && xIndex == 0) + { + continue; + } + if(j == 3 && xIndex == (dims[0] - 1)) + { + continue; + } + + int32 feature = featureIdsStore[neighborPoint]; + if(feature > 0) + { + featureNumber[feature]++; + int32 current = featureNumber[feature]; + if(current > most) + { + most = current; + neighbors[i] = static_cast(neighborPoint); + } + } + } + for(int32_t j = 0; j < 6; j++) + { + int64 neighborPoint = static_cast(i) + neighborPoints[j]; + if(j == 0 && zIndex == 0) + { + continue; + } + if(j == 5 && zIndex == (dims[2] - 1)) + { + continue; + } + if(j == 1 && yIndex == 0) + { + continue; + } + if(j == 4 && yIndex == (dims[1] - 1)) + { + continue; + } + if(j == 2 && xIndex == 0) + { + continue; + } + if(j == 3 && xIndex == (dims[0] - 1)) + { + continue; + } + + int32 feature = featureIdsStore[neighborPoint]; + if(feature > 0) + { + featureNumber[feature] = 0; + } + } + } + } + + for(const auto& cellArrayPath : voxelArrayNames) + { + if(cellArrayPath == m_InputValues->featureIdsArrayPath) + { + continue; + } + auto* oldCellArray = m_DataStructure.getDataAs(cellArrayPath); + + ExecuteDataFunction(FillBadDataUpdateTuplesFunctor{}, oldCellArray->getDataType(), featureIdsStore, oldCellArray, neighbors); + } + + // We need to update the FeatureIds array _LAST_ since the above operations depend on that values in that array + FillBadDataUpdateTuples(featureIdsStore, featureIdsStore, neighbors); + } + return {}; +} diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataBFS.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataBFS.hpp new file mode 100644 index 0000000000..6291ce0e3c --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataBFS.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include "SimplnxCore/SimplnxCore_export.hpp" + +#include "simplnx/DataStructure/DataPath.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/Filter/IFilter.hpp" + +namespace nx::core +{ + +struct FillBadDataInputValues; + +/** + * @class FillBadDataBFS + * @brief BFS flood-fill algorithm for filling bad data regions. + * + * This is the in-core-optimized implementation. It uses BFS (breadth-first search) + * to identify connected components of bad data, then iteratively fills small regions + * by voting among face neighbors. Uses O(N) temporary buffers (neighbors, alreadyChecked) + * which is efficient when data fits in RAM. + * + * @see FillBadDataCCL for the out-of-core-optimized alternative. + * @see AlgorithmDispatch.hpp for the dispatch mechanism that selects between them. + */ +class SIMPLNXCORE_EXPORT FillBadDataBFS +{ +public: + /** + * @brief Constructs the BFS fill algorithm with the required context. + * @param dataStructure The data structure containing the arrays to process. + * @param mesgHandler Handler for progress and informational messages. + * @param shouldCancel Cancellation flag checked during execution. + * @param inputValues Filter parameter values controlling fill behavior. + */ + FillBadDataBFS(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const FillBadDataInputValues* inputValues); + ~FillBadDataBFS() noexcept; + + FillBadDataBFS(const FillBadDataBFS&) = delete; + FillBadDataBFS(FillBadDataBFS&&) noexcept = delete; + FillBadDataBFS& operator=(const FillBadDataBFS&) = delete; + FillBadDataBFS& operator=(FillBadDataBFS&&) noexcept = delete; + + /** + * @brief Executes the BFS flood-fill algorithm to identify and fill bad data regions. + * @return Result indicating success or an error with a descriptive message. + */ + Result<> operator()(); + +private: + DataStructure& m_DataStructure; + const FillBadDataInputValues* m_InputValues = nullptr; + const std::atomic_bool& m_ShouldCancel; + const IFilter::MessageHandler& m_MessageHandler; +}; + +} // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataCCL.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataCCL.cpp new file mode 100644 index 0000000000..fc66055139 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataCCL.cpp @@ -0,0 +1,565 @@ +#include "FillBadDataCCL.hpp" + +#include "FillBadData.hpp" + +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Utilities/DataGroupUtilities.hpp" +#include "simplnx/Utilities/FilterUtilities.hpp" +#include "simplnx/Utilities/MessageHelper.hpp" +#include "simplnx/Utilities/NeighborUtilities.hpp" + +#include + +using namespace nx::core; + +// ============================================================================= +// FillBadData Algorithm Overview +// ============================================================================= +// +// This file implements an optimized algorithm for filling bad data (voxels with +// FeatureId == 0) in image geometries. The algorithm handles out-of-core datasets +// efficiently by processing data in chunks and uses a four-phase approach: +// +// Phase 1: Chunk-Sequential Connected Component Labeling (CCL) +// - Process chunks sequentially, assigning provisional labels to bad data regions +// - Use Union-Find to track equivalences between labels across chunk boundaries +// - Track size of each connected component +// +// Phase 2: Global Resolution +// - Flatten Union-Find structure to resolve all equivalences +// - Accumulate region sizes to root labels +// +// Phase 3: Region Classification and Relabeling +// - Classify regions as "small" (below threshold) or "large" (above threshold) +// - Small regions: mark with -1 for filling in Phase 4 +// - Large regions: keep as 0 or assign to new phase (if requested) +// +// Phase 4: Iterative Morphological Fill (On-Disk Deferred) +// - Uses a temporary file to defer fills: Pass 1 writes (dest, src) pairs, +// Pass 2 reads them back and applies fills. +// - No O(N) memory allocations — uses O(features) vote counters + temp file I/O. +// +// ============================================================================= + +namespace +{ +// ----------------------------------------------------------------------------- +// Helper: Copy all components of a single tuple from src to dest in a data store. +// ----------------------------------------------------------------------------- +template +void copyTuple(AbstractDataStore& store, int64 dest, int64 src) +{ + const usize numComp = store.getNumberOfComponents(); + for(usize c = 0; c < numComp; c++) + { + store[dest * numComp + c] = store[src * numComp + c]; + } +} + +// Functor for type-dispatched single-tuple copy +struct CopyTupleFunctor +{ + template + void operator()(IDataArray* dataArray, int64 dest, int64 src) + { + auto& store = dataArray->template getIDataStoreRefAs>(); + copyTuple(store, dest, src); + } +}; + +// RAII wrapper for std::FILE* that auto-closes on destruction +struct TempFileGuard +{ + std::FILE* file = nullptr; + + TempFileGuard() = default; + ~TempFileGuard() + { + if(file != nullptr) + { + std::fclose(file); + } + } + + TempFileGuard(const TempFileGuard&) = delete; + TempFileGuard& operator=(const TempFileGuard&) = delete; +}; +} // namespace + +// ============================================================================= +// FillBadData Implementation +// ============================================================================= + +FillBadDataCCL::FillBadDataCCL(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const FillBadDataInputValues* inputValues) +: m_DataStructure(dataStructure) +, m_InputValues(inputValues) +, m_ShouldCancel(shouldCancel) +, m_MessageHandler(mesgHandler) +{ +} + +// ----------------------------------------------------------------------------- +FillBadDataCCL::~FillBadDataCCL() noexcept = default; + +// ----------------------------------------------------------------------------- +const std::atomic_bool& FillBadDataCCL::getCancel() const +{ + return m_ShouldCancel; +} + +// ============================================================================= +// PHASE 1: Chunk-Sequential Connected Component Labeling (CCL) +// ============================================================================= +// +// Performs connected component labeling on bad data voxels (FeatureId == 0) +// using a chunk-sequential scanline algorithm. Uses positive labels and an +// in-memory provisional labels buffer to avoid cross-chunk OOC reads. +// +// @param featureIdsStore The feature IDs data store (maybe out-of-core) +// @param unionFind Union-Find structure for tracking label equivalences +// @param nextLabel Next label to assign (incremented as new labels are created) +// @param dims Image dimensions [X, Y, Z] +// ============================================================================= +void FillBadDataCCL::phaseOneCCL(Int32AbstractDataStore& featureIdsStore, UnionFind& unionFind, int32& nextLabel, const std::array& dims) +{ + const uint64 numChunks = featureIdsStore.getNumberOfChunks(); + const usize sliceSize = static_cast(dims[0]) * static_cast(dims[1]); + + // Rolling 2-slice buffer for backward neighbor label reads. + // Only current + previous Z-slice are needed. O(slice) memory. + std::vector labelBuffer(2 * sliceSize, 0); + + // Track last cleared Z-slice to avoid re-clearing when a Z-slice spans multiple chunks + int64 lastClearedZ = -1; + + // Process each chunk sequentially + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + featureIdsStore.loadChunk(chunkIdx); + + const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); + const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); + + for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) + { + // Clear current slice in rolling buffer only when entering a NEW z value. + // A single Z-slice may span multiple chunks, so we must not re-clear + // data written by a previous chunk for the same z. + const usize curOff = (z % 2) * sliceSize; + if(static_cast(z) != lastClearedZ) + { + std::fill(labelBuffer.begin() + curOff, labelBuffer.begin() + curOff + sliceSize, 0); + lastClearedZ = static_cast(z); + } + const usize prevOff = ((z + 1) % 2) * sliceSize; + + for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) + { + for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) + { + const usize index = z * sliceSize + y * static_cast(dims[0]) + x; + const usize inSlice = y * static_cast(dims[0]) + x; + + // Only process bad data voxels (FeatureId == 0) + if(featureIdsStore[index] != 0) + { + continue; + } + + // Check backward neighbors using rolling buffer + int32 assignedLabel = 0; + + if(x > 0) + { + int32 neighLabel = labelBuffer[curOff + inSlice - 1]; + if(neighLabel > 0) + { + assignedLabel = neighLabel; + } + } + + if(y > 0) + { + int32 neighLabel = labelBuffer[curOff + inSlice - static_cast(dims[0])]; + if(neighLabel > 0) + { + if(assignedLabel == 0) + { + assignedLabel = neighLabel; + } + else if(assignedLabel != neighLabel) + { + unionFind.unite(assignedLabel, neighLabel); + } + } + } + + if(z > 0) + { + int32 neighLabel = labelBuffer[prevOff + inSlice]; + if(neighLabel > 0) + { + if(assignedLabel == 0) + { + assignedLabel = neighLabel; + } + else if(assignedLabel != neighLabel) + { + unionFind.unite(assignedLabel, neighLabel); + } + } + } + + if(assignedLabel == 0) + { + assignedLabel = nextLabel++; + unionFind.find(assignedLabel); + } + + // Write to rolling buffer AND featureIds store + labelBuffer[curOff + inSlice] = assignedLabel; + featureIdsStore[index] = assignedLabel; + + unionFind.addSize(assignedLabel, 1); + } + } + } + } + + featureIdsStore.flush(); +} + +// ============================================================================= +// PHASE 2: Global Resolution of Equivalences +// ============================================================================= +void FillBadDataCCL::phaseTwoGlobalResolution(UnionFind& unionFind) +{ + unionFind.flatten(); +} + +// ============================================================================= +// PHASE 3: Region Classification and Relabeling +// ============================================================================= +// +// Classifies bad data regions as "small" or "large" based on size threshold: +// - Small regions (< minAllowedDefectSize): marked with -1 for filling in Phase 4 +// - Large regions (>= minAllowedDefectSize): kept as 0 (or assigned new phase) +// ============================================================================= +void FillBadDataCCL::phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, Int32Array* cellPhasesPtr, int32 startLabel, int32 nextLabel, UnionFind& unionFind, usize maxPhase) const +{ + const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); + const SizeVec3 udims = selectedImageGeom.getDimensions(); + const uint64 numChunks = featureIdsStore.getNumberOfChunks(); + + // Build a vector-based classification: isSmallRoot[label] = 1 if small, 0 if large + // Only provisional labels [startLabel, nextLabel) are CCL labels; others are original feature IDs. + std::vector isSmallRoot(static_cast(nextLabel), 0); + for(int32 label = startLabel; label < nextLabel; label++) + { + int64 root = unionFind.find(label); + if(root == label) + { + uint64 regionSize = unionFind.getSize(root); + if(regionSize < static_cast(m_InputValues->minAllowedDefectSizeValue)) + { + isSmallRoot[root] = 1; + } + } + } + + // Read provisional labels from featureIds store (written during Phase 1) + // and relabel based on region classification. + // Only voxels with label >= startLabel are provisional CCL labels (bad data). + // Voxels with label in [1, startLabel) are original good feature IDs — leave them alone. + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + featureIdsStore.loadChunk(chunkIdx); + + const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); + const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); + + for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) + { + for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) + { + for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) + { + const usize index = z * udims[0] * udims[1] + y * udims[0] + x; + + int32 label = featureIdsStore[index]; + if(label >= startLabel) + { + int64 root = unionFind.find(label); + + if(isSmallRoot[root] != 0) + { + featureIdsStore[index] = -1; + } + else + { + featureIdsStore[index] = 0; + + if(m_InputValues->storeAsNewPhase && cellPhasesPtr != nullptr) + { + (*cellPhasesPtr)[index] = static_cast(maxPhase) + 1; + } + } + } + } + } + } + } + + featureIdsStore.flush(); +} + +// ============================================================================= +// PHASE 4: Iterative Morphological Fill (On-Disk Deferred) +// ============================================================================= +// +// Uses a temporary file to avoid O(N) memory allocations. Each iteration: +// Pass 1 (Vote): Scan voxels chunk-sequentially. For each -1 voxel, find the +// best positive-featureId neighbor via majority vote. Write (dest, src) pairs +// to a temp file. featureIds is read-only during this pass. +// Pass 2 (Apply): Read pairs back from the temp file. Copy all cell data array +// components from src to dest. Update featureIds last. +// ============================================================================= +Result<> FillBadDataCCL::phaseFourIterativeFill(Int32AbstractDataStore& featureIdsStore, const std::array& dims, usize numFeatures) const +{ + const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); + + std::array neighborVoxelIndexOffsets = initializeFaceNeighborOffsets(dims); + std::array faceNeighborInternalIdx = initializeFaceNeighborInternalIdx(); + + // Feature vote counter: O(features) not O(voxels) + std::vector featureNumber(numFeatures + 1, 0); + + // Get cell arrays that need updating during filling + std::optional> allChildArrays = GetAllChildDataPaths(m_DataStructure, selectedImageGeom.getCellDataPath(), DataObject::Type::DataArray, m_InputValues->ignoredDataArrayPaths); + std::vector voxelArrayNames; + if(allChildArrays.has_value()) + { + voxelArrayNames = allChildArrays.value(); + } + + // Open temp file for deferred fill pairs + TempFileGuard tmpGuard; + tmpGuard.file = std::tmpfile(); + if(tmpGuard.file == nullptr) + { + return MakeErrorResult(-87010, "Phase 4/4: Failed to create temporary file for deferred fill"); + } + + MessageHelper messageHelper(m_MessageHandler, std::chrono::milliseconds(1000)); + auto throttledMessenger = messageHelper.createThrottledMessenger(std::chrono::milliseconds(1000)); + + usize count = 1; + usize iteration = 0; + const uint64 numChunks = featureIdsStore.getNumberOfChunks(); + + while(count != 0) + { + iteration++; + count = 0; + + // Rewind for this iteration's writes + std::rewind(tmpGuard.file); + + // Pass 1 (Vote): Chunk-sequential scan writing (dest, src) pairs to temp file. + // featureIds is read-only during this pass — two-pass semantics are automatic. + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + featureIdsStore.loadChunk(chunkIdx); + const auto lower = featureIdsStore.getChunkLowerBounds(chunkIdx); + const auto upper = featureIdsStore.getChunkUpperBounds(chunkIdx); + + for(usize z = lower[0]; z <= upper[0]; z++) + { + for(usize y = lower[1]; y <= upper[1]; y++) + { + for(usize x = lower[2]; x <= upper[2]; x++) + { + const int64 voxelIndex = static_cast(z) * dims[0] * dims[1] + static_cast(y) * dims[0] + static_cast(x); + int32 featureName = featureIdsStore[voxelIndex]; + + if(featureName < 0) + { + count++; + int32 most = 0; + int64 bestNeighbor = -1; + + std::array isValidFaceNeighbor = computeValidFaceNeighbors(static_cast(x), static_cast(y), static_cast(z), dims); + for(const auto& faceIndex : faceNeighborInternalIdx) + { + if(!isValidFaceNeighbor[faceIndex]) + { + continue; + } + + auto neighborPoint = voxelIndex + neighborVoxelIndexOffsets[faceIndex]; + int32 feature = featureIdsStore[neighborPoint]; + + if(feature > 0) + { + featureNumber[feature]++; + int32 current = featureNumber[feature]; + if(current > most) + { + most = current; + bestNeighbor = neighborPoint; + } + } + } + + // Reset vote counters + for(const auto& faceIndex : faceNeighborInternalIdx) + { + if(!isValidFaceNeighbor[faceIndex]) + { + continue; + } + auto neighborPoint = voxelIndex + neighborVoxelIndexOffsets[faceIndex]; + int32 feature = featureIdsStore[neighborPoint]; + if(feature > 0) + { + featureNumber[feature] = 0; + } + } + + // Write (dest, src) pair to temp file if a valid neighbor was found + if(bestNeighbor >= 0) + { + std::array pair = {voxelIndex, bestNeighbor}; + std::fwrite(pair.data(), sizeof(int64), 2, tmpGuard.file); + } + } + } + } + } + } + + // Record file position after Pass 1 writes so Pass 2 doesn't read + // stale pairs from a previous iteration (rewind doesn't truncate). + long writeEnd = std::ftell(tmpGuard.file); + if(writeEnd < 0) + { + return MakeErrorResult(-87011, "Phase 4/4: Failed to get file position for temporary file"); + } + + if(count == 0) + { + break; + } + + // Pass 2 (Apply): Read (dest, src) pairs from temp file and apply fills. + // Update all cell arrays except featureIds first, then featureIds last. + std::rewind(tmpGuard.file); + std::array pair; + + // First pass over pairs: update all non-featureIds cell arrays + while(std::ftell(tmpGuard.file) < writeEnd && std::fread(pair.data(), sizeof(int64), 2, tmpGuard.file) == 2) + { + int64 dest = pair[0]; + int64 src = pair[1]; + + for(const auto& cellArrayPath : voxelArrayNames) + { + if(cellArrayPath == m_InputValues->featureIdsArrayPath) + { + continue; + } + auto* cellArray = m_DataStructure.getDataAs(cellArrayPath); + ExecuteDataFunction(CopyTupleFunctor{}, cellArray->getDataType(), cellArray, dest, src); + } + } + + // Second pass over pairs: update featureIds last + std::rewind(tmpGuard.file); + while(std::ftell(tmpGuard.file) < writeEnd && std::fread(pair.data(), sizeof(int64), 2, tmpGuard.file) == 2) + { + int64 dest = pair[0]; + int64 src = pair[1]; + featureIdsStore[dest] = featureIdsStore[src]; + } + + featureIdsStore.flush(); + + throttledMessenger.sendThrottledMessage([iteration, count]() { return fmt::format(" Iteration {}: {} voxels remaining to fill", iteration, count); }); + } + + m_MessageHandler({IFilter::Message::Type::Info, fmt::format(" Completed in {} iteration{}", iteration, iteration == 1 ? "" : "s")}); + return {}; +} + +// ============================================================================= +// Main Algorithm Entry Point +// ============================================================================= +Result<> FillBadDataCCL::operator()() +{ + auto& featureIdsStore = m_DataStructure.getDataAs(m_InputValues->featureIdsArrayPath)->getDataStoreRef(); + const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); + const SizeVec3 udims = selectedImageGeom.getDimensions(); + + std::array dims = { + static_cast(udims[0]), + static_cast(udims[1]), + static_cast(udims[2]), + }; + + const usize totalPoints = featureIdsStore.getNumberOfTuples(); + + // Get cell phases array if we need to assign large regions to a new phase + Int32Array* cellPhasesPtr = nullptr; + usize maxPhase = 0; + + if(m_InputValues->storeAsNewPhase) + { + cellPhasesPtr = m_DataStructure.getDataAs(m_InputValues->cellPhasesArrayPath); + + for(usize i = 0; i < totalPoints; i++) + { + if((*cellPhasesPtr)[i] > maxPhase) + { + maxPhase = (*cellPhasesPtr)[i]; + } + } + } + + // Count the number of existing features for array sizing + usize numFeatures = 0; + for(usize i = 0; i < totalPoints; i++) + { + int32 featureName = featureIdsStore[i]; + if(featureName > numFeatures) + { + numFeatures = featureName; + } + } + + // Initialize data structures for connected component labeling. + // Start provisional labels AFTER the max existing feature ID to avoid collisions. + // Existing feature IDs are in [1, numFeatures], so provisional labels start at numFeatures+1. + UnionFind unionFind; + const int32 startLabel = static_cast(numFeatures) + 1; + int32 nextLabel = startLabel; + + // Phase 1: Chunk-Sequential Connected Component Labeling + // Uses a 2-slice rolling buffer (O(slice) memory) for backward neighbor reads. + // Writes provisional labels to featureIds store for Phases 2-3. + m_MessageHandler({IFilter::Message::Type::Info, "Phase 1/4: Labeling connected components..."}); + phaseOneCCL(featureIdsStore, unionFind, nextLabel, dims); + + // Phase 2: Global Resolution of equivalences + m_MessageHandler({IFilter::Message::Type::Info, "Phase 2/4: Resolving region equivalences..."}); + phaseTwoGlobalResolution(unionFind); + + // Phase 3: Relabeling based on region size classification + // Reads provisional labels from featureIds store (written during Phase 1) + m_MessageHandler({IFilter::Message::Type::Info, "Phase 3/4: Classifying region sizes..."}); + phaseThreeRelabeling(featureIdsStore, cellPhasesPtr, startLabel, nextLabel, unionFind, maxPhase); + + // Phase 4: Iterative morphological fill + m_MessageHandler({IFilter::Message::Type::Info, "Phase 4/4: Filling small defects..."}); + return phaseFourIterativeFill(featureIdsStore, dims, numFeatures); +} diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataCCL.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataCCL.hpp new file mode 100644 index 0000000000..c11cc99356 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataCCL.hpp @@ -0,0 +1,76 @@ +#pragma once + +#include "SimplnxCore/SimplnxCore_export.hpp" + +#include "simplnx/DataStructure/DataPath.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/Filter/IFilter.hpp" +#include "simplnx/Utilities/UnionFind.hpp" + +namespace nx::core +{ + +// Forward declarations +template +class DataArray; +using Int32Array = DataArray; + +template +class AbstractDataStore; +using Int32AbstractDataStore = AbstractDataStore; + +struct FillBadDataInputValues; + +/** + * @class FillBadDataCCL + * @brief CCL-based algorithm for filling bad data regions, optimized for out-of-core. + * + * Uses chunk-sequential connected component labeling with a 2-slice rolling buffer + * to avoid O(N) memory allocations. Designed for datasets that may exceed available RAM. + * + * @see FillBadDataBFS for the in-core-optimized alternative. + * @see AlgorithmDispatch.hpp for the dispatch mechanism that selects between them. + */ +class SIMPLNXCORE_EXPORT FillBadDataCCL +{ +public: + /** + * @brief Constructs the CCL fill algorithm with the required context. + * @param dataStructure The data structure containing the arrays to process. + * @param mesgHandler Handler for progress and informational messages. + * @param shouldCancel Cancellation flag checked during execution. + * @param inputValues Filter parameter values controlling fill behavior. + */ + FillBadDataCCL(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const FillBadDataInputValues* inputValues); + ~FillBadDataCCL() noexcept; + + FillBadDataCCL(const FillBadDataCCL&) = delete; + FillBadDataCCL(FillBadDataCCL&&) noexcept = delete; + FillBadDataCCL& operator=(const FillBadDataCCL&) = delete; + FillBadDataCCL& operator=(FillBadDataCCL&&) noexcept = delete; + + /** + * @brief Executes the CCL-based algorithm to identify and fill bad data regions. + * @return Result indicating success or an error with a descriptive message. + */ + Result<> operator()(); + + /** + * @brief Returns the cancellation flag reference. + * @return Reference to the atomic cancellation flag. + */ + const std::atomic_bool& getCancel() const; + +private: + static void phaseOneCCL(Int32AbstractDataStore& featureIdsStore, UnionFind& unionFind, int32& nextLabel, const std::array& dims); + static void phaseTwoGlobalResolution(UnionFind& unionFind); + void phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, Int32Array* cellPhasesPtr, int32 startLabel, int32 nextLabel, UnionFind& unionFind, size_t maxPhase) const; + Result<> phaseFourIterativeFill(Int32AbstractDataStore& featureIdsStore, const std::array& dims, size_t numFeatures) const; + + DataStructure& m_DataStructure; + const FillBadDataInputValues* m_InputValues = nullptr; + const std::atomic_bool& m_ShouldCancel; + const IFilter::MessageHandler& m_MessageHandler; +}; + +} // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySample.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySample.cpp index c96d0e33ff..8988072a47 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySample.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySample.cpp @@ -1,390 +1,13 @@ #include "IdentifySample.hpp" +#include "IdentifySampleBFS.hpp" +#include "IdentifySampleCCL.hpp" + #include "simplnx/DataStructure/DataArray.hpp" -#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" -#include "simplnx/Utilities/FilterUtilities.hpp" -#include "simplnx/Utilities/NeighborUtilities.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" using namespace nx::core; -namespace -{ -struct IdentifySampleFunctor -{ - template - void operator()(const ImageGeom* imageGeom, IDataArray* goodVoxelsPtr, bool fillHoles, const IFilter::MessageHandler& messageHandler, const std::atomic_bool& shouldCancel) - { - ShapeType cDims = {1}; - auto& goodVoxels = goodVoxelsPtr->template getIDataStoreRefAs>(); - - const auto totalPoints = static_cast(goodVoxelsPtr->getNumberOfTuples()); - - SizeVec3 udims = imageGeom->getDimensions(); - - std::array dims = { - static_cast(udims[0]), - static_cast(udims[1]), - static_cast(udims[2]), - }; - - int64_t neighborPoint = 0; - std::array neighborVoxelIndexOffsets = initializeFaceNeighborOffsets(dims); - std::array faceNeighborInternalIdx = initializeFaceNeighborInternalIdx(); - - std::vector currentVList; - std::vector checked(totalPoints, false); - std::vector sample(totalPoints, false); - int64 biggestBlock = 0; - - // In this loop over the data we are finding the biggest contiguous set of GoodVoxels and calling that the 'sample' All GoodVoxels that do not touch the 'sample' - // are flipped to be called 'bad' voxels or 'not sample' - float threshold = 0.0f; - for(int64 voxelIndex = 0; voxelIndex < totalPoints; voxelIndex++) - { - if(shouldCancel) - { - return; - } - const float percentIncrement = static_cast(voxelIndex) / static_cast(totalPoints) * 100.0f; - if(percentIncrement > threshold) - { - messageHandler(IFilter::Message::Type::Info, fmt::format("Completed: {}", percentIncrement)); - threshold = threshold + 5.0f; - if(threshold < percentIncrement) - { - threshold = percentIncrement; - } - } - - if(!checked[voxelIndex] && goodVoxels.getValue(voxelIndex)) - { - currentVList.push_back(voxelIndex); - usize count = 0; - while(count < currentVList.size()) - { - int64 index = currentVList[count]; - int64 xIdx = index % dims[0]; - int64 yIdx = (index / dims[0]) % dims[1]; - int64 zIdx = index / (dims[0] * dims[1]); - std::array isValidFaceNeighbor = computeValidFaceNeighbors(xIdx, yIdx, zIdx, dims); - for(const auto& faceIndex : faceNeighborInternalIdx) - { - if(!isValidFaceNeighbor[faceIndex]) - { - continue; - } - neighborPoint = index + neighborVoxelIndexOffsets[faceIndex]; - - if(!checked[neighborPoint] && goodVoxels.getValue(neighborPoint)) - { - currentVList.push_back(neighborPoint); - checked[neighborPoint] = true; - } - } - count++; - } - if(static_cast(currentVList.size()) >= biggestBlock) - { - biggestBlock = currentVList.size(); - sample.assign(totalPoints, false); - for(int64 j = 0; j < biggestBlock; j++) - { - sample[currentVList[j]] = true; - } - } - currentVList.clear(); - } - } - for(int64 i = 0; i < totalPoints; i++) - { - if(!sample[i] && goodVoxels.getValue(i)) - { - goodVoxels.setValue(i, false); - } - } - sample.clear(); - checked.assign(totalPoints, false); - - // In this loop we are going to 'close' all the 'holes' inside the region already identified as the 'sample' if the user chose to do so. - // This is done by flipping all 'bad' voxel features that do not touch the outside of the sample (i.e. they are fully contained inside the 'sample'). - threshold = 0.0F; - if(fillHoles) - { - messageHandler(IFilter::Message::Type::Info, fmt::format("Filling holes in sample...")); - - bool touchesBoundary = false; - for(int64 voxelIndex = 0; voxelIndex < totalPoints; voxelIndex++) - { - if(shouldCancel) - { - return; - } - const float percentIncrement = static_cast(voxelIndex) / static_cast(totalPoints) * 100.0f; - if(percentIncrement > threshold) - { - threshold = threshold + 5.0f; - if(threshold < percentIncrement) - { - threshold = percentIncrement; - } - } - - if(!checked[voxelIndex] && !goodVoxels.getValue(voxelIndex)) - { - currentVList.push_back(voxelIndex); - usize count = 0; - touchesBoundary = false; - while(count < currentVList.size()) - { - int64 index = currentVList[count]; - int64 xIdx = index % dims[0]; - int64 yIdx = (index / dims[0]) % dims[1]; - int64 zIdx = index / (dims[0] * dims[1]); - if(xIdx == 0 || xIdx == (dims[0] - 1) || yIdx == 0 || yIdx == (dims[1] - 1) || zIdx == 0 || zIdx == (dims[2] - 1)) - { - touchesBoundary = true; - } - // Loop over the 6 face neighbors of the voxel - std::array isValidFaceNeighbor = computeValidFaceNeighbors(xIdx, yIdx, zIdx, dims); - for(const auto& faceIndex : faceNeighborInternalIdx) - { - if(!isValidFaceNeighbor[faceIndex]) - { - continue; - } - neighborPoint = index + neighborVoxelIndexOffsets[faceIndex]; - - if(!checked[neighborPoint] && !goodVoxels.getValue(neighborPoint)) - { - currentVList.push_back(neighborPoint); - checked[neighborPoint] = true; - } - } - count++; - } - if(!touchesBoundary) - { - for(int64_t j : currentVList) - { - goodVoxels.setValue(j, true); - } - } - currentVList.clear(); - } - } - } - checked.clear(); - } -}; - -struct IdentifySampleSliceBySliceFunctor -{ - enum class Plane - { - XY, - XZ, - YZ - }; - - template - void operator()(const ImageGeom* imageGeom, IDataArray* goodVoxelsPtr, bool fillHoles, Plane plane, const IFilter::MessageHandler& messageHandler, const std::atomic_bool& shouldCancel) - { - auto& goodVoxels = goodVoxelsPtr->template getIDataStoreRefAs>(); - - SizeVec3 uDims = imageGeom->getDimensions(); - const int64 dimX = static_cast(uDims[0]); - const int64 dimY = static_cast(uDims[1]); - const int64 dimZ = static_cast(uDims[2]); - - int64 planeDim1, planeDim2, fixedDim; - int64 stride1, stride2, fixedStride; - - switch(plane) - { - case Plane::XY: - planeDim1 = dimX; - planeDim2 = dimY; - fixedDim = dimZ; - stride1 = 1; - stride2 = dimX; - fixedStride = dimX * dimY; - break; - - case Plane::XZ: - planeDim1 = dimX; - planeDim2 = dimZ; - fixedDim = dimY; - stride1 = 1; - stride2 = dimX * dimY; - fixedStride = dimX; - break; - - case Plane::YZ: - planeDim1 = dimY; - planeDim2 = dimZ; - fixedDim = dimX; - stride1 = dimX; - stride2 = dimX * dimY; - fixedStride = 1; - break; - } - - for(int64 fixedIdx = 0; fixedIdx < fixedDim; ++fixedIdx) // Process each slice - { - if(shouldCancel) - { - return; - } - messageHandler(IFilter::Message::Type::Info, fmt::format("Slice {}", fixedIdx)); - - std::vector checked(planeDim1 * planeDim2, false); - std::vector sample(planeDim1 * planeDim2, false); - std::vector currentVList; - int64 biggestBlock = 0; - - // Identify the largest contiguous set of good voxels in the slice - for(int64 p2 = 0; p2 < planeDim2; ++p2) - { - for(int64 p1 = 0; p1 < planeDim1; ++p1) - { - int64 planeIndex = p2 * planeDim1 + p1; - int64 globalIndex = fixedIdx * fixedStride + p2 * stride2 + p1 * stride1; - - if(!checked[planeIndex] && goodVoxels.getValue(globalIndex)) - { - currentVList.push_back(planeIndex); - int64 count = 0; - - while(count < currentVList.size()) - { - int64 localIdx = currentVList[count]; - int64 localP1 = localIdx % planeDim1; - int64 localP2 = localIdx / planeDim1; - - for(int j = 0; j < 4; ++j) - { - int64 dp1[4] = {0, 0, -1, 1}; - int64 dp2[4] = {-1, 1, 0, 0}; - - int64 neighborP1 = localP1 + dp1[j]; - int64 neighborP2 = localP2 + dp2[j]; - - if(neighborP1 >= 0 && neighborP1 < planeDim1 && neighborP2 >= 0 && neighborP2 < planeDim2) - { - int64 neighborIdx = neighborP2 * planeDim1 + neighborP1; - int64 globalNeighborIdx = fixedIdx * fixedStride + neighborP2 * stride2 + neighborP1 * stride1; - - if(!checked[neighborIdx] && goodVoxels.getValue(globalNeighborIdx)) - { - currentVList.push_back(neighborIdx); - checked[neighborIdx] = true; - } - } - } - count++; - } - - if(static_cast(currentVList.size()) > biggestBlock) - { - biggestBlock = currentVList.size(); - sample.assign(planeDim1 * planeDim2, false); - for(int64 idx : currentVList) - { - sample[idx] = true; - } - } - currentVList.clear(); - } - } - } - if(shouldCancel) - { - return; - } - - for(int64 p2 = 0; p2 < planeDim2; ++p2) - { - for(int64 p1 = 0; p1 < planeDim1; ++p1) - { - int64 planeIndex = p2 * planeDim1 + p1; - int64 globalIndex = fixedIdx * fixedStride + p2 * stride2 + p1 * stride1; - - if(!sample[planeIndex]) - { - goodVoxels.setValue(globalIndex, false); - } - } - } - if(shouldCancel) - { - return; - } - if(fillHoles) - { - for(int64 p2 = 0; p2 < planeDim2; ++p2) - { - for(int64 p1 = 0; p1 < planeDim1; ++p1) - { - int64 planeIndex = p2 * planeDim1 + p1; - int64 globalIndex = fixedIdx * fixedStride + p2 * stride2 + p1 * stride1; - - if(!checked[planeIndex] && !goodVoxels.getValue(globalIndex)) - { - currentVList.push_back(planeIndex); - int64 count = 0; - bool touchesBoundary = false; - - while(count < currentVList.size()) - { - int64 localIdx = currentVList[count]; - int64 localP1 = localIdx % planeDim1; - int64 localP2 = localIdx / planeDim1; - - if(localP1 == 0 || localP1 == planeDim1 - 1 || localP2 == 0 || localP2 == planeDim2 - 1) - { - touchesBoundary = true; - } - - for(int j = 0; j < 4; ++j) - { - int64 dp1[4] = {0, 0, -1, 1}; - int64 dp2[4] = {-1, 1, 0, 0}; - - int64 neighborP1 = localP1 + dp1[j]; - int64 neighborP2 = localP2 + dp2[j]; - - if(neighborP1 >= 0 && neighborP1 < planeDim1 && neighborP2 >= 0 && neighborP2 < planeDim2) - { - int64 neighborIdx = neighborP2 * planeDim1 + neighborP1; - int64 globalNeighborIdx = fixedIdx * fixedStride + neighborP2 * stride2 + neighborP1 * stride1; - - if(!checked[neighborIdx] && !goodVoxels.getValue(globalNeighborIdx)) - { - currentVList.push_back(neighborIdx); - checked[neighborIdx] = true; - } - } - } - count++; - } - - if(!touchesBoundary) - { - for(int64 idx : currentVList) - { - goodVoxels.setValue(fixedIdx * fixedStride + idx, true); - } - } - currentVList.clear(); - } - } - } - } - } - } -}; -} // namespace - // ----------------------------------------------------------------------------- IdentifySample::IdentifySample(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, IdentifySampleInputValues* inputValues) : m_DataStructure(dataStructure) @@ -401,17 +24,6 @@ IdentifySample::~IdentifySample() noexcept = default; Result<> IdentifySample::operator()() { auto* inputData = m_DataStructure.getDataAs(m_InputValues->MaskArrayPath); - const auto* imageGeom = m_DataStructure.getDataAs(m_InputValues->InputImageGeometryPath); - - if(m_InputValues->SliceBySlice) - { - ExecuteDataFunction(IdentifySampleSliceBySliceFunctor{}, inputData->getDataType(), imageGeom, inputData, m_InputValues->FillHoles, - static_cast(m_InputValues->SliceBySlicePlaneIndex), m_MessageHandler, m_ShouldCancel); - } - else - { - ExecuteDataFunction(IdentifySampleFunctor{}, inputData->getDataType(), imageGeom, inputData, m_InputValues->FillHoles, m_MessageHandler, m_ShouldCancel); - } - return {}; + return DispatchAlgorithm({inputData}, m_DataStructure, m_MessageHandler, m_ShouldCancel, m_InputValues); } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleBFS.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleBFS.cpp new file mode 100644 index 0000000000..22cdd8a293 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleBFS.cpp @@ -0,0 +1,213 @@ +#include "IdentifySampleBFS.hpp" + +#include "IdentifySample.hpp" +#include "IdentifySampleCommon.hpp" + +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Utilities/FilterUtilities.hpp" +#include "simplnx/Utilities/NeighborUtilities.hpp" + +using namespace nx::core; + +namespace +{ +// BFS flood-fill algorithm for identifying the largest connected component. +// Uses std::vector (1 bit per voxel) for minimal memory overhead. +// Fast for in-core data where random access is O(1), but causes chunk +// thrashing in OOC mode due to BFS visiting neighbors across chunk boundaries. +struct IdentifySampleBFSFunctor +{ + template + void operator()(const ImageGeom* imageGeom, IDataArray* goodVoxelsPtr, bool fillHoles, const IFilter::MessageHandler& messageHandler, const std::atomic_bool& shouldCancel) + { + auto& goodVoxels = goodVoxelsPtr->template getIDataStoreRefAs>(); + + const auto totalPoints = static_cast(goodVoxelsPtr->getNumberOfTuples()); + + SizeVec3 udims = imageGeom->getDimensions(); + + std::array dims = { + static_cast(udims[0]), + static_cast(udims[1]), + static_cast(udims[2]), + }; + + int64 neighborPoint = 0; + std::array neighborVoxelIndexOffsets = initializeFaceNeighborOffsets(dims); + std::array faceNeighborInternalIdx = initializeFaceNeighborInternalIdx(); + + std::vector currentVList; + std::vector checked(totalPoints, false); + std::vector sample(totalPoints, false); + int64 biggestBlock = 0; + + // Find the largest contiguous set of good voxels using BFS flood-fill + float threshold = 0.0f; + for(int64 voxelIndex = 0; voxelIndex < totalPoints; voxelIndex++) + { + if(shouldCancel) + { + return; + } + const float percentIncrement = static_cast(voxelIndex) / static_cast(totalPoints) * 100.0f; + if(percentIncrement > threshold) + { + messageHandler(IFilter::Message::Type::Info, fmt::format("Completed: {}", percentIncrement)); + threshold = threshold + 5.0f; + if(threshold < percentIncrement) + { + threshold = percentIncrement; + } + } + + if(!checked[voxelIndex] && goodVoxels.getValue(voxelIndex)) + { + currentVList.push_back(voxelIndex); + usize count = 0; + while(count < currentVList.size()) + { + int64 index = currentVList[count]; + int64 xIdx = index % dims[0]; + int64 yIdx = (index / dims[0]) % dims[1]; + int64 zIdx = index / (dims[0] * dims[1]); + std::array isValidFaceNeighbor = computeValidFaceNeighbors(xIdx, yIdx, zIdx, dims); + for(const auto& faceIndex : faceNeighborInternalIdx) + { + if(!isValidFaceNeighbor[faceIndex]) + { + continue; + } + neighborPoint = index + neighborVoxelIndexOffsets[faceIndex]; + + if(!checked[neighborPoint] && goodVoxels.getValue(neighborPoint)) + { + currentVList.push_back(neighborPoint); + checked[neighborPoint] = true; + } + } + count++; + } + if(static_cast(currentVList.size()) >= biggestBlock) + { + biggestBlock = currentVList.size(); + sample.assign(totalPoints, false); + for(int64 j = 0; j < biggestBlock; j++) + { + sample[currentVList[j]] = true; + } + } + currentVList.clear(); + } + } + for(int64 i = 0; i < totalPoints; i++) + { + if(!sample[i] && goodVoxels.getValue(i)) + { + goodVoxels.setValue(i, false); + } + } + sample.clear(); + checked.assign(totalPoints, false); + + // Fill holes: flip bad voxels that are fully enclosed by the sample + threshold = 0.0F; + if(fillHoles) + { + messageHandler(IFilter::Message::Type::Info, fmt::format("Filling holes in sample...")); + + bool touchesBoundary = false; + for(int64 voxelIndex = 0; voxelIndex < totalPoints; voxelIndex++) + { + if(shouldCancel) + { + return; + } + const float percentIncrement = static_cast(voxelIndex) / static_cast(totalPoints) * 100.0f; + if(percentIncrement > threshold) + { + threshold = threshold + 5.0f; + if(threshold < percentIncrement) + { + threshold = percentIncrement; + } + } + + if(!checked[voxelIndex] && !goodVoxels.getValue(voxelIndex)) + { + currentVList.push_back(voxelIndex); + usize count = 0; + touchesBoundary = false; + while(count < currentVList.size()) + { + int64 index = currentVList[count]; + int64 xIdx = index % dims[0]; + int64 yIdx = (index / dims[0]) % dims[1]; + int64 zIdx = index / (dims[0] * dims[1]); + if(xIdx == 0 || xIdx == (dims[0] - 1) || yIdx == 0 || yIdx == (dims[1] - 1) || zIdx == 0 || zIdx == (dims[2] - 1)) + { + touchesBoundary = true; + } + std::array isValidFaceNeighbor = computeValidFaceNeighbors(xIdx, yIdx, zIdx, dims); + for(const auto& faceIndex : faceNeighborInternalIdx) + { + if(!isValidFaceNeighbor[faceIndex]) + { + continue; + } + neighborPoint = index + neighborVoxelIndexOffsets[faceIndex]; + + if(!checked[neighborPoint] && !goodVoxels.getValue(neighborPoint)) + { + currentVList.push_back(neighborPoint); + checked[neighborPoint] = true; + } + } + count++; + } + if(!touchesBoundary) + { + for(int64 j : currentVList) + { + goodVoxels.setValue(j, true); + } + } + currentVList.clear(); + } + } + } + checked.clear(); + } +}; +} // namespace + +// ----------------------------------------------------------------------------- +IdentifySampleBFS::IdentifySampleBFS(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const IdentifySampleInputValues* inputValues) +: m_DataStructure(dataStructure) +, m_InputValues(inputValues) +, m_ShouldCancel(shouldCancel) +, m_MessageHandler(mesgHandler) +{ +} + +// ----------------------------------------------------------------------------- +IdentifySampleBFS::~IdentifySampleBFS() noexcept = default; + +// ----------------------------------------------------------------------------- +Result<> IdentifySampleBFS::operator()() +{ + auto* inputData = m_DataStructure.getDataAs(m_InputValues->MaskArrayPath); + const auto* imageGeom = m_DataStructure.getDataAs(m_InputValues->InputImageGeometryPath); + + if(m_InputValues->SliceBySlice) + { + ExecuteDataFunction(IdentifySampleSliceBySliceFunctor{}, inputData->getDataType(), imageGeom, inputData, m_InputValues->FillHoles, + static_cast(m_InputValues->SliceBySlicePlaneIndex), m_MessageHandler, m_ShouldCancel); + } + else + { + ExecuteDataFunction(IdentifySampleBFSFunctor{}, inputData->getDataType(), imageGeom, inputData, m_InputValues->FillHoles, m_MessageHandler, m_ShouldCancel); + } + + return {}; +} diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleBFS.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleBFS.hpp new file mode 100644 index 0000000000..e506bcb158 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleBFS.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include "SimplnxCore/SimplnxCore_export.hpp" + +#include "simplnx/DataStructure/DataPath.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/Filter/IFilter.hpp" + +namespace nx::core +{ + +struct IdentifySampleInputValues; + +/** + * @class IdentifySampleBFS + * @brief BFS flood-fill algorithm for identifying the largest sample region. + * + * This is the in-core-optimized implementation. It uses BFS (breadth-first search) + * with std::vector for tracking visited voxels, which is memory-efficient + * (1 bit per voxel) and fast when data is in contiguous memory. However, the random + * access pattern of BFS causes severe chunk thrashing in out-of-core mode. + * + * @see IdentifySampleCCL for the out-of-core-optimized alternative. + * @see AlgorithmDispatch.hpp for the dispatch mechanism that selects between them. + */ +class SIMPLNXCORE_EXPORT IdentifySampleBFS +{ +public: + /** + * @brief Constructs the BFS sample identification algorithm with the required context. + * @param dataStructure The data structure containing the arrays to process. + * @param mesgHandler Handler for progress and informational messages. + * @param shouldCancel Cancellation flag checked during execution. + * @param inputValues Filter parameter values controlling identification behavior. + */ + IdentifySampleBFS(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const IdentifySampleInputValues* inputValues); + ~IdentifySampleBFS() noexcept; + + IdentifySampleBFS(const IdentifySampleBFS&) = delete; + IdentifySampleBFS(IdentifySampleBFS&&) noexcept = delete; + IdentifySampleBFS& operator=(const IdentifySampleBFS&) = delete; + IdentifySampleBFS& operator=(IdentifySampleBFS&&) noexcept = delete; + + /** + * @brief Executes the BFS flood-fill algorithm to identify the largest sample region. + * @return Result indicating success or an error with a descriptive message. + */ + Result<> operator()(); + +private: + DataStructure& m_DataStructure; + const IdentifySampleInputValues* m_InputValues = nullptr; + const std::atomic_bool& m_ShouldCancel; + const IFilter::MessageHandler& m_MessageHandler; +}; + +} // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCCL.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCCL.cpp new file mode 100644 index 0000000000..d693be70c7 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCCL.cpp @@ -0,0 +1,368 @@ +#include "IdentifySampleCCL.hpp" + +#include "IdentifySample.hpp" +#include "IdentifySampleCommon.hpp" + +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Utilities/FilterUtilities.hpp" + +using namespace nx::core; + +namespace +{ +// Helper: Run forward CCL on a boolean condition using a 2-slice rolling buffer. +// Returns the VectorUnionFind, rootSizes, and the next label. +// The condition lambda takes (goodVoxels store, index) and returns true for voxels to label. +struct CCLResult +{ + VectorUnionFind unionFind; + std::vector rootSizes; + int64 nextLabel = 1; + int64 largestRoot = -1; + uint64 largestSize = 0; +}; + +template +CCLResult runForwardCCL(AbstractDataStore& store, int64 dimX, int64 dimY, int64 dimZ, ConditionFn condition, const std::atomic_bool& shouldCancel) +{ + CCLResult result; + const usize sliceSize = static_cast(dimX * dimY); + + // Rolling 2-slice buffer: only current + previous Z-slice labels + std::vector labelBuffer(2 * sliceSize, 0); + // Size tracking per label for finding largest component without a rescan + std::vector labelSizes; + labelSizes.push_back(0); // index 0 unused + + const uint64 numChunks = store.getNumberOfChunks(); + // Track last cleared Z-slice to avoid re-clearing when a Z-slice spans multiple chunks + int64 lastClearedZ = -1; + + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + if(shouldCancel) + { + return result; + } + store.loadChunk(chunkIdx); + const auto lower = store.getChunkLowerBounds(chunkIdx); + const auto upper = store.getChunkUpperBounds(chunkIdx); + + for(usize z = lower[0]; z <= upper[0]; z++) + { + // Clear current slice in rolling buffer only when entering a NEW z value. + // A single Z-slice may span multiple chunks (e.g., chunk shape 1x3x25 with dimY=5), + // so we must not re-clear data written by a previous chunk for the same z. + const usize curOff = (z % 2) * sliceSize; + if(static_cast(z) != lastClearedZ) + { + std::fill(labelBuffer.begin() + curOff, labelBuffer.begin() + curOff + sliceSize, 0); + lastClearedZ = static_cast(z); + } + const usize prevOff = ((z + 1) % 2) * sliceSize; + + for(usize y = lower[1]; y <= upper[1]; y++) + { + for(usize x = lower[2]; x <= upper[2]; x++) + { + const usize index = z * sliceSize + y * static_cast(dimX) + x; + + if(!condition(store, index)) + { + continue; + } + + const usize inSlice = y * static_cast(dimX) + x; + int64 nbrA = 0, nbrB = 0, nbrC = 0; + + if(x > 0) + { + nbrA = labelBuffer[curOff + inSlice - 1]; + } + if(y > 0) + { + nbrB = labelBuffer[curOff + inSlice - static_cast(dimX)]; + } + if(z > 0) + { + nbrC = labelBuffer[prevOff + inSlice]; + } + + int64 minLabel = 0; + if(nbrA > 0) + { + minLabel = nbrA; + } + if(nbrB > 0 && (minLabel == 0 || nbrB < minLabel)) + { + minLabel = nbrB; + } + if(nbrC > 0 && (minLabel == 0 || nbrC < minLabel)) + { + minLabel = nbrC; + } + + int64 assignedLabel; + if(minLabel == 0) + { + assignedLabel = result.nextLabel++; + result.unionFind.makeSet(assignedLabel); + labelSizes.resize(result.nextLabel, 0); + } + else + { + assignedLabel = minLabel; + if(nbrA > 0 && nbrA != assignedLabel) + { + result.unionFind.unite(assignedLabel, nbrA); + } + if(nbrB > 0 && nbrB != assignedLabel) + { + result.unionFind.unite(assignedLabel, nbrB); + } + if(nbrC > 0 && nbrC != assignedLabel) + { + result.unionFind.unite(assignedLabel, nbrC); + } + } + + labelBuffer[curOff + inSlice] = assignedLabel; + labelSizes[assignedLabel]++; + } + } + } + } + + // Flatten union-find and accumulate sizes to roots + result.rootSizes.resize(result.nextLabel, 0); + for(int64 lbl = 1; lbl < result.nextLabel; lbl++) + { + int64 root = result.unionFind.find(lbl); + result.rootSizes[root] += labelSizes[lbl]; + } + + // Find largest root + for(int64 r = 1; r < result.nextLabel; r++) + { + if(result.rootSizes[r] >= result.largestSize) + { + result.largestSize = result.rootSizes[r]; + result.largestRoot = r; + } + } + + return result; +} + +// Helper: Re-derive labels for each voxel using a second forward CCL pass +// with a rolling buffer, then apply an action lambda for each labeled voxel. +// This avoids storing labels for the entire volume. +template +void replayForwardCCL(AbstractDataStore& store, int64 dimX, int64 dimY, int64 dimZ, VectorUnionFind& unionFind, ConditionFn condition, ActionFn action, const std::atomic_bool& shouldCancel) +{ + const usize sliceSize = static_cast(dimX * dimY); + std::vector labelBuffer(2 * sliceSize, 0); + int64 nextLabel = 1; + + const uint64 numChunks = store.getNumberOfChunks(); + int64 lastClearedZ = -1; + + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + if(shouldCancel) + { + return; + } + store.loadChunk(chunkIdx); + const auto lower = store.getChunkLowerBounds(chunkIdx); + const auto upper = store.getChunkUpperBounds(chunkIdx); + + for(usize z = lower[0]; z <= upper[0]; z++) + { + const usize curOff = (z % 2) * sliceSize; + if(static_cast(z) != lastClearedZ) + { + std::fill(labelBuffer.begin() + curOff, labelBuffer.begin() + curOff + sliceSize, 0); + lastClearedZ = static_cast(z); + } + const usize prevOff = ((z + 1) % 2) * sliceSize; + + for(usize y = lower[1]; y <= upper[1]; y++) + { + for(usize x = lower[2]; x <= upper[2]; x++) + { + const usize index = z * sliceSize + y * static_cast(dimX) + x; + + if(!condition(store, index)) + { + continue; + } + + const usize inSlice = y * static_cast(dimX) + x; + int64 nbrA = 0, nbrB = 0, nbrC = 0; + + if(x > 0) + { + nbrA = labelBuffer[curOff + inSlice - 1]; + } + if(y > 0) + { + nbrB = labelBuffer[curOff + inSlice - static_cast(dimX)]; + } + if(z > 0) + { + nbrC = labelBuffer[prevOff + inSlice]; + } + + int64 minLabel = 0; + if(nbrA > 0) + { + minLabel = nbrA; + } + if(nbrB > 0 && (minLabel == 0 || nbrB < minLabel)) + { + minLabel = nbrB; + } + if(nbrC > 0 && (minLabel == 0 || nbrC < minLabel)) + { + minLabel = nbrC; + } + + int64 assignedLabel; + if(minLabel == 0) + { + assignedLabel = nextLabel++; + } + else + { + assignedLabel = minLabel; + } + + labelBuffer[curOff + inSlice] = assignedLabel; + + // Apply the action with the re-derived label + int64 root = unionFind.find(assignedLabel); + action(store, index, root, x, y, z); + } + } + } + } +} + +// Chunk-sequential scanline CCL implementation for 3D volumes. +// Processes data in chunk order to avoid random chunk access in OOC mode. +// Uses a 2-slice rolling buffer (O(slice) memory) instead of O(volume). +struct IdentifySampleCCLFunctor +{ + template + void operator()(const ImageGeom* imageGeom, IDataArray* goodVoxelsPtr, bool fillHoles, const IFilter::MessageHandler& messageHandler, const std::atomic_bool& shouldCancel) + { + auto& goodVoxels = goodVoxelsPtr->template getIDataStoreRefAs>(); + + SizeVec3 udims = imageGeom->getDimensions(); + const int64 dimX = static_cast(udims[0]); + const int64 dimY = static_cast(udims[1]); + const int64 dimZ = static_cast(udims[2]); + + const uint64 numChunks = goodVoxels.getNumberOfChunks(); + + // Phase 1: Forward CCL on good voxels using rolling buffer + messageHandler(IFilter::Message::Type::Info, "Identifying sample regions..."); + auto goodCondition = [](const AbstractDataStore& s, usize idx) -> bool { return static_cast(s[idx]); }; + auto cclResult = runForwardCCL(goodVoxels, dimX, dimY, dimZ, goodCondition, shouldCancel); + + if(shouldCancel || cclResult.largestRoot < 0) + { + return; + } + + // Phase 2: Mask out non-sample voxels by replaying CCL + // Re-derive labels using a second forward pass with rolling buffer, + // then set non-largest-component voxels to false. + messageHandler(IFilter::Message::Type::Info, "Masking non-sample voxels..."); + const int64 largestRoot = cclResult.largestRoot; + replayForwardCCL( + goodVoxels, dimX, dimY, dimZ, cclResult.unionFind, goodCondition, + [&largestRoot](AbstractDataStore& s, usize idx, int64 root, usize /*x*/, usize /*y*/, usize /*z*/) { + if(root != largestRoot) + { + s.setValue(idx, static_cast(false)); + } + }, + shouldCancel); + goodVoxels.flush(); + + // Phase 3: Hole-fill CCL on bad voxels (if fillHoles is true) + if(fillHoles) + { + messageHandler(IFilter::Message::Type::Info, "Filling holes in sample..."); + + // Forward CCL on non-good voxels (holes) + auto holeCondition = [](const AbstractDataStore& s, usize idx) -> bool { return !static_cast(s[idx]); }; + auto holeCCL = runForwardCCL(goodVoxels, dimX, dimY, dimZ, holeCondition, shouldCancel); + + if(shouldCancel) + { + return; + } + + // Determine which hole roots touch the domain boundary + // Replay CCL to check boundary status without storing full labels + std::vector boundaryRoots(holeCCL.nextLabel, false); + replayForwardCCL( + goodVoxels, dimX, dimY, dimZ, holeCCL.unionFind, holeCondition, + [&boundaryRoots, dimX, dimY, dimZ](AbstractDataStore& /*s*/, usize /*idx*/, int64 root, usize x, usize y, usize z) { + if(x == 0 || x == static_cast(dimX - 1) || y == 0 || y == static_cast(dimY - 1) || z == 0 || z == static_cast(dimZ - 1)) + { + boundaryRoots[root] = true; + } + }, + shouldCancel); + + // Phase 4: Fill interior holes by replaying CCL once more + replayForwardCCL( + goodVoxels, dimX, dimY, dimZ, holeCCL.unionFind, holeCondition, + [&boundaryRoots](AbstractDataStore& s, usize idx, int64 root, usize /*x*/, usize /*y*/, usize /*z*/) { + if(!boundaryRoots[root]) + { + s.setValue(idx, static_cast(true)); + } + }, + shouldCancel); + goodVoxels.flush(); + } + } +}; +} // namespace + +// ----------------------------------------------------------------------------- +IdentifySampleCCL::IdentifySampleCCL(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const IdentifySampleInputValues* inputValues) +: m_DataStructure(dataStructure) +, m_InputValues(inputValues) +, m_ShouldCancel(shouldCancel) +, m_MessageHandler(mesgHandler) +{ +} + +// ----------------------------------------------------------------------------- +IdentifySampleCCL::~IdentifySampleCCL() noexcept = default; + +// ----------------------------------------------------------------------------- +Result<> IdentifySampleCCL::operator()() +{ + auto* inputData = m_DataStructure.getDataAs(m_InputValues->MaskArrayPath); + const auto* imageGeom = m_DataStructure.getDataAs(m_InputValues->InputImageGeometryPath); + + if(m_InputValues->SliceBySlice) + { + ExecuteDataFunction(IdentifySampleSliceBySliceFunctor{}, inputData->getDataType(), imageGeom, inputData, m_InputValues->FillHoles, + static_cast(m_InputValues->SliceBySlicePlaneIndex), m_MessageHandler, m_ShouldCancel); + } + else + { + ExecuteDataFunction(IdentifySampleCCLFunctor{}, inputData->getDataType(), imageGeom, inputData, m_InputValues->FillHoles, m_MessageHandler, m_ShouldCancel); + } + + return {}; +} diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCCL.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCCL.hpp new file mode 100644 index 0000000000..cba07dface --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCCL.hpp @@ -0,0 +1,61 @@ +#pragma once + +#include "SimplnxCore/SimplnxCore_export.hpp" + +#include "simplnx/DataStructure/DataPath.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/Filter/IFilter.hpp" + +namespace nx::core +{ + +struct IdentifySampleInputValues; + +/** + * @class IdentifySampleCCL + * @brief Chunk-sequential CCL algorithm for identifying the largest sample region. + * + * This is the out-of-core-optimized implementation. It uses scanline Connected + * Component Labeling (CCL) with a union-find structure, processing data in chunk + * order to minimize disk I/O. The algorithm only accesses backward neighbors + * (-X, -Y, -Z) during labeling, ensuring sequential chunk access. + * + * Trade-off: Uses a std::vector label array (8 bytes per voxel) which is + * more memory than the BFS approach (1 bit per voxel), but avoids the random + * access pattern that causes chunk thrashing in OOC mode. + * + * @see IdentifySampleBFS for the in-core-optimized alternative. + * @see AlgorithmDispatch.hpp for the dispatch mechanism that selects between them. + */ +class SIMPLNXCORE_EXPORT IdentifySampleCCL +{ +public: + /** + * @brief Constructs the CCL sample identification algorithm with the required context. + * @param dataStructure The data structure containing the arrays to process. + * @param mesgHandler Handler for progress and informational messages. + * @param shouldCancel Cancellation flag checked during execution. + * @param inputValues Filter parameter values controlling identification behavior. + */ + IdentifySampleCCL(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const IdentifySampleInputValues* inputValues); + ~IdentifySampleCCL() noexcept; + + IdentifySampleCCL(const IdentifySampleCCL&) = delete; + IdentifySampleCCL(IdentifySampleCCL&&) noexcept = delete; + IdentifySampleCCL& operator=(const IdentifySampleCCL&) = delete; + IdentifySampleCCL& operator=(IdentifySampleCCL&&) noexcept = delete; + + /** + * @brief Executes the CCL-based algorithm to identify the largest sample region. + * @return Result indicating success or an error with a descriptive message. + */ + Result<> operator()(); + +private: + DataStructure& m_DataStructure; + const IdentifySampleInputValues* m_InputValues = nullptr; + const std::atomic_bool& m_ShouldCancel; + const IFilter::MessageHandler& m_MessageHandler; +}; + +} // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCommon.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCommon.hpp new file mode 100644 index 0000000000..73a6268a38 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCommon.hpp @@ -0,0 +1,322 @@ +#pragma once + +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Filter/IFilter.hpp" +#include "simplnx/Utilities/FilterUtilities.hpp" + +#include +#include + +namespace nx::core +{ + +/** + * @class VectorUnionFind + * @brief Vector-based union-find for dense label sets (labels 1..N). + * + * Uses flat vectors instead of hash maps for O(1) access. Suitable for + * connected component labeling where labels are assigned sequentially. + */ +class VectorUnionFind +{ +public: + VectorUnionFind() = default; + + /** + * @brief Pre-allocates internal storage for the expected number of labels. + * @param capacity Maximum expected label value. + */ + void reserve(usize capacity) + { + m_Parent.reserve(capacity + 1); + m_Rank.reserve(capacity + 1); + } + + /** + * @brief Creates a new singleton set for label x if it does not already exist. + * @param x Label to initialize. + */ + void makeSet(int64 x) + { + if(static_cast(x) >= m_Parent.size()) + { + m_Parent.resize(x + 1, 0); + m_Rank.resize(x + 1, 0); + } + if(m_Parent[x] == 0) + { + m_Parent[x] = x; + } + } + + /** + * @brief Finds the root label with path-halving compression. + * @param x Label to find the root for. + * @return Root label of the equivalence class. + */ + int64 find(int64 x) + { + while(m_Parent[x] != x) + { + m_Parent[x] = m_Parent[m_Parent[x]]; // path halving + x = m_Parent[x]; + } + return x; + } + + /** + * @brief Merges the equivalence classes of two labels using union-by-rank. + * @param a First label. + * @param b Second label. + */ + void unite(int64 a, int64 b) + { + a = find(a); + b = find(b); + if(a == b) + { + return; + } + if(m_Rank[a] < m_Rank[b]) + { + std::swap(a, b); + } + m_Parent[b] = a; + if(m_Rank[a] == m_Rank[b]) + { + m_Rank[a]++; + } + } + +private: + std::vector m_Parent; + std::vector m_Rank; +}; + +/** + * @struct IdentifySampleSliceBySliceFunctor + * @brief BFS-based implementation for slice-by-slice mode. + * + * Slices are 2D and small relative to the full volume, so OOC chunk + * thrashing is not a concern. This functor is used by both the in-core + * and OOC algorithm classes when slice-by-slice mode is enabled. + */ +struct IdentifySampleSliceBySliceFunctor +{ + /** + * @brief Enumerates the three orthogonal slice planes. + */ + enum class Plane + { + XY, + XZ, + YZ + }; + + static constexpr int64 k_Dp1[4] = {0, 0, -1, 1}; + static constexpr int64 k_Dp2[4] = {-1, 1, 0, 0}; + + /** + * @brief Performs BFS-based sample identification on each 2D slice of the given plane. + * @param imageGeom The image geometry providing dimensions. + * @param goodVoxelsPtr The mask array marking sample vs. non-sample voxels. + * @param fillHoles Whether to fill interior holes in each slice. + * @param plane Which orthogonal plane to slice along. + * @param messageHandler Handler for progress messages. + * @param shouldCancel Cancellation flag checked between slices. + */ + template + void operator()(const ImageGeom* imageGeom, IDataArray* goodVoxelsPtr, bool fillHoles, Plane plane, const IFilter::MessageHandler& messageHandler, const std::atomic_bool& shouldCancel) + { + auto& goodVoxels = goodVoxelsPtr->template getIDataStoreRefAs>(); + + SizeVec3 uDims = imageGeom->getDimensions(); + const int64 dimX = static_cast(uDims[0]); + const int64 dimY = static_cast(uDims[1]); + const int64 dimZ = static_cast(uDims[2]); + + int64 planeDim1, planeDim2, fixedDim; + int64 stride1, stride2, fixedStride; + + switch(plane) + { + case Plane::XY: + planeDim1 = dimX; + planeDim2 = dimY; + fixedDim = dimZ; + stride1 = 1; + stride2 = dimX; + fixedStride = dimX * dimY; + break; + + case Plane::XZ: + planeDim1 = dimX; + planeDim2 = dimZ; + fixedDim = dimY; + stride1 = 1; + stride2 = dimX * dimY; + fixedStride = dimX; + break; + + case Plane::YZ: + planeDim1 = dimY; + planeDim2 = dimZ; + fixedDim = dimX; + stride1 = dimX; + stride2 = dimX * dimY; + fixedStride = 1; + break; + } + + for(int64 fixedIdx = 0; fixedIdx < fixedDim; ++fixedIdx) + { + if(shouldCancel) + { + return; + } + messageHandler(IFilter::Message::Type::Info, fmt::format("Slice {}", fixedIdx)); + + std::vector checked(planeDim1 * planeDim2, false); + std::vector sample(planeDim1 * planeDim2, false); + std::vector currentVList; + int64 biggestBlock = 0; + + for(int64 p2 = 0; p2 < planeDim2; ++p2) + { + for(int64 p1 = 0; p1 < planeDim1; ++p1) + { + int64 planeIndex = p2 * planeDim1 + p1; + int64 globalIndex = fixedIdx * fixedStride + p2 * stride2 + p1 * stride1; + + if(!checked[planeIndex] && goodVoxels.getValue(globalIndex)) + { + currentVList.push_back(planeIndex); + int64 count = 0; + + while(count < currentVList.size()) + { + int64 localIdx = currentVList[count]; + int64 localP1 = localIdx % planeDim1; + int64 localP2 = localIdx / planeDim1; + + for(int j = 0; j < 4; ++j) + { + int64 neighborP1 = localP1 + k_Dp1[j]; + int64 neighborP2 = localP2 + k_Dp2[j]; + + if(neighborP1 >= 0 && neighborP1 < planeDim1 && neighborP2 >= 0 && neighborP2 < planeDim2) + { + int64 neighborIdx = neighborP2 * planeDim1 + neighborP1; + int64 globalNeighborIdx = fixedIdx * fixedStride + neighborP2 * stride2 + neighborP1 * stride1; + + if(!checked[neighborIdx] && goodVoxels.getValue(globalNeighborIdx)) + { + currentVList.push_back(neighborIdx); + checked[neighborIdx] = true; + } + } + } + count++; + } + + if(static_cast(currentVList.size()) > biggestBlock) + { + biggestBlock = currentVList.size(); + sample.assign(planeDim1 * planeDim2, false); + for(int64 idx : currentVList) + { + sample[idx] = true; + } + } + currentVList.clear(); + } + } + } + if(shouldCancel) + { + return; + } + + for(int64 p2 = 0; p2 < planeDim2; ++p2) + { + for(int64 p1 = 0; p1 < planeDim1; ++p1) + { + int64 planeIndex = p2 * planeDim1 + p1; + int64 globalIndex = fixedIdx * fixedStride + p2 * stride2 + p1 * stride1; + + if(!sample[planeIndex]) + { + goodVoxels.setValue(globalIndex, false); + } + } + } + if(shouldCancel) + { + return; + } + if(fillHoles) + { + for(int64 p2 = 0; p2 < planeDim2; ++p2) + { + for(int64 p1 = 0; p1 < planeDim1; ++p1) + { + int64 planeIndex = p2 * planeDim1 + p1; + int64 globalIndex = fixedIdx * fixedStride + p2 * stride2 + p1 * stride1; + + if(!checked[planeIndex] && !goodVoxels.getValue(globalIndex)) + { + currentVList.push_back(planeIndex); + int64 count = 0; + bool touchesBoundary = false; + + while(count < currentVList.size()) + { + int64 localIdx = currentVList[count]; + int64 localP1 = localIdx % planeDim1; + int64 localP2 = localIdx / planeDim1; + + if(localP1 == 0 || localP1 == planeDim1 - 1 || localP2 == 0 || localP2 == planeDim2 - 1) + { + touchesBoundary = true; + } + + for(int j = 0; j < 4; ++j) + { + int64 neighborP1 = localP1 + k_Dp1[j]; + int64 neighborP2 = localP2 + k_Dp2[j]; + + if(neighborP1 >= 0 && neighborP1 < planeDim1 && neighborP2 >= 0 && neighborP2 < planeDim2) + { + int64 neighborIdx = neighborP2 * planeDim1 + neighborP1; + int64 globalNeighborIdx = fixedIdx * fixedStride + neighborP2 * stride2 + neighborP1 * stride1; + + if(!checked[neighborIdx] && !goodVoxels.getValue(globalNeighborIdx)) + { + currentVList.push_back(neighborIdx); + checked[neighborIdx] = true; + } + } + } + count++; + } + + if(!touchesBoundary) + { + for(int64 idx : currentVList) + { + int64 fillP1 = idx % planeDim1; + int64 fillP2 = idx / planeDim1; + goodVoxels.setValue(fixedIdx * fixedStride + fillP2 * stride2 + fillP1 * stride1, true); + } + } + currentVList.clear(); + } + } + } + } + } + } +}; + +} // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.cpp index 51805d1765..1278978c4e 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.cpp @@ -5,6 +5,7 @@ #include "simplnx/DataStructure/DataStore.hpp" #include "simplnx/DataStructure/Geometry/IGridGeometry.hpp" #include "simplnx/Filter/Actions/CreateArrayAction.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" using namespace nx::core; @@ -54,6 +55,15 @@ class TSpecificCompareFunctorBool : public SegmentFeatures::CompareFunctor return false; } + bool compare(int64 index, int64 neighIndex) override + { + if(index >= m_Length || neighIndex >= m_Length) + { + return false; + } + return (*m_Data)[neighIndex] == (*m_Data)[index]; + } + private: int64 m_Length = 0; // Length of the Data Array AbstractDataStore* m_FeatureIdsArray = nullptr; // The Feature Ids @@ -109,6 +119,20 @@ class TSpecificCompareFunctor : public SegmentFeatures::CompareFunctor return false; } + bool compare(int64 index, int64 neighIndex) override + { + if(index >= m_Length || neighIndex >= m_Length) + { + return false; + } + + if(m_Data[index] >= m_Data[neighIndex]) + { + return (m_Data[index] - m_Data[neighIndex]) <= m_Tolerance; + } + return (m_Data[neighIndex] - m_Data[index]) <= m_Tolerance; + } + private: int64 m_Length = 0; // Length of the Data Array T m_Tolerance = static_cast(0); // The tolerance of the comparison @@ -209,8 +233,16 @@ Result<> ScalarSegmentFeatures::operator()() m_CompareFunctor = std::make_shared(); // The default CompareFunctor which ALWAYS returns false for the comparison } - // Run the segmentation algorithm - execute(gridGeom); + // Dispatch between DFS (in-core) and CCL (OOC) algorithms + if(IsOutOfCore(*m_FeatureIdsArray) || ForceOocAlgorithm()) + { + auto& featureIdsStore = m_FeatureIdsArray->getDataStoreRef(); + executeCCL(gridGeom, featureIdsStore); + } + else + { + execute(gridGeom); + } // Sanity check the result. if(this->m_FoundFeatures < 1) { @@ -283,3 +315,24 @@ bool ScalarSegmentFeatures::determineGrouping(int64 referencepoint, int64 neighb return false; } + +// ----------------------------------------------------------------------------- +bool ScalarSegmentFeatures::isValidVoxel(int64 point) const +{ + if(m_InputValues->UseMask && !m_GoodVoxels->isTrue(point)) + { + return false; + } + return true; +} + +// ----------------------------------------------------------------------------- +bool ScalarSegmentFeatures::areNeighborsSimilar(int64 point1, int64 point2) const +{ + // Both voxels must be valid + if(!isValidVoxel(point2)) + { + return false; + } + return m_CompareFunctor->compare(point1, point2); +} diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.hpp index 6a1393eeae..0ee2ad20a0 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.hpp @@ -51,26 +51,23 @@ class SIMPLNXCORE_EXPORT ScalarSegmentFeatures : public SegmentFeatures Result<> operator()(); protected: + int64_t getSeed(int32 gnum, int64 nextSeed) const override; + bool determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const override; + /** - * @brief - * @param data - * @param args - * @param gnum - * @param nextSeed - * @return int64 + * @brief Checks whether a voxel can participate in scalar segmentation based on the mask. + * @param point Linear voxel index. + * @return true if the voxel passes the mask check (or no mask is used). */ - int64_t getSeed(int32 gnum, int64 nextSeed) const override; + bool isValidVoxel(int64 point) const override; /** - * @brief - * @param data - * @param args - * @param referencePoint - * @param neighborPoint - * @param gnum - * @return bool + * @brief Determines whether two neighboring voxels belong to the same scalar segment. + * @param point1 First voxel index. + * @param point2 Second (neighbor) voxel index. + * @return true if both voxels are valid and their scalar values are within tolerance. */ - bool determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const override; + bool areNeighborsSimilar(int64 point1, int64 point2) const override; private: const ScalarSegmentFeaturesInputValues* m_InputValues = nullptr; diff --git a/src/Plugins/SimplnxCore/test/FillBadDataTest.cpp b/src/Plugins/SimplnxCore/test/FillBadDataTest.cpp index fc47364d82..c747b02345 100644 --- a/src/Plugins/SimplnxCore/test/FillBadDataTest.cpp +++ b/src/Plugins/SimplnxCore/test/FillBadDataTest.cpp @@ -1,9 +1,12 @@ #include +#include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" #include "simplnx/Parameters/MultiArraySelectionParameter.hpp" #include "simplnx/Pipeline/AbstractPipelineNode.hpp" #include "simplnx/Pipeline/Pipeline.hpp" #include "simplnx/UnitTest/UnitTestCommon.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" #include "SimplnxCore/Filters/FillBadDataFilter.hpp" #include "SimplnxCore/Filters/ReadDREAM3DFilter.hpp" @@ -20,6 +23,10 @@ TEST_CASE("SimplnxCore::FillBadData_SmallIN100", "[Core][FillBadDataFilter]") { // Load the Simplnx Application instance and load the plugins UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // FillBadData SmallIN100: 117x201x189, EulerAngles (float32, 3-comp) => 201*189*3*4 = 455,868 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 455868, true); const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "6_5_fill_bad_data.tar.gz", "6_5_fill_bad_data"); // Read Exemplar DREAM3D File Filter @@ -66,6 +73,8 @@ TEST_CASE("SimplnxCore::FillBadData_SmallIN100", "[Core][FillBadDataFilter]") TEST_CASE("SimplnxCore::FillBadData::Test01_SingleSmallDefect", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 100, true); // 100 bytes - force very small arrays to OOC @@ -104,6 +113,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test01_SingleSmallDefect", "[Core][FillBadD TEST_CASE("SimplnxCore::FillBadData::Test02_SingleLargeDefect", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 100, true); @@ -143,6 +154,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test02_SingleLargeDefect", "[Core][FillBadD TEST_CASE("SimplnxCore::FillBadData::Test03_ThresholdBoundary", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 100, true); @@ -177,6 +190,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test03_ThresholdBoundary", "[Core][FillBadD TEST_CASE("SimplnxCore::FillBadData::Test04_MultipleSmallDefects", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 500, true); // Slightly larger for 10x10x10 @@ -211,6 +226,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test04_MultipleSmallDefects", "[Core][FillB TEST_CASE("SimplnxCore::FillBadData::Test05_MixedSmallAndLarge", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 500, true); @@ -245,6 +262,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test05_MixedSmallAndLarge", "[Core][FillBad TEST_CASE("SimplnxCore::FillBadData::Test06_SingleVoxelDefects", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 100, true); @@ -279,6 +298,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test06_SingleVoxelDefects", "[Core][FillBad TEST_CASE("SimplnxCore::FillBadData::Test07_DefectsAtBoundaries", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 100, true); @@ -313,6 +334,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test07_DefectsAtBoundaries", "[Core][FillBa TEST_CASE("SimplnxCore::FillBadData::Test11_NeighborTieBreaking", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 50, true); // Very small for 3x3x3 @@ -352,6 +375,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test11_NeighborTieBreaking", "[Core][FillBa TEST_CASE("SimplnxCore::FillBadData::Test13_StoreAsNewPhase", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 100, true); @@ -387,3 +412,86 @@ TEST_CASE("SimplnxCore::FillBadData::Test13_StoreAsNewPhase", "[Core][FillBadDat UnitTest::CheckArraysInheritTupleDims(dataStructure); } + +TEST_CASE("SimplnxCore::FillBadData: Benchmark 200x200x200", "[Core][FillBadDataFilter][Benchmark]") +{ + UnitTest::LoadPlugins(); + // 200x200x200, FeatureIds int32 1-comp => 200*200*4 = 160,000 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 160000, true); + + constexpr usize k_DimX = 200; + constexpr usize k_DimY = 200; + constexpr usize k_DimZ = 200; + constexpr usize k_TotalVoxels = k_DimX * k_DimY * k_DimZ; + const ShapeType cellTupleShape = {k_DimZ, k_DimY, k_DimX}; + const auto benchmarkFile = fs::path(fmt::format("{}/fill_bad_data_benchmark.dream3d", unit_test::k_BinaryTestOutputDir)); + + // Stage 1: Build data programmatically and write to .dream3d + { + DataStructure buildDS; + auto* imageGeom = ImageGeom::Create(buildDS, "DataContainer"); + imageGeom->setDimensions({k_DimX, k_DimY, k_DimZ}); + imageGeom->setSpacing({1.0f, 1.0f, 1.0f}); + imageGeom->setOrigin({0.0f, 0.0f, 0.0f}); + + auto* cellAM = AttributeMatrix::Create(buildDS, "CellData", cellTupleShape, imageGeom->getId()); + imageGeom->setCellData(*cellAM); + + // Create FeatureIds array (int32, 1-component) - grid of features with scattered bad voxels + auto* featureIdsArray = UnitTest::CreateTestDataArray(buildDS, "FeatureIds", cellTupleShape, {1}, cellAM->getId()); + auto& featureIdsStore = featureIdsArray->getDataStoreRef(); + + // Create Phases array (int32, 1-component) - all phase 1 + auto* phasesArray = UnitTest::CreateTestDataArray(buildDS, "Phases", cellTupleShape, {1}, cellAM->getId()); + auto& phasesStore = phasesArray->getDataStoreRef(); + + // Fill: divide into 25-voxel blocks, each block = one feature (1-based). + // Scatter ~10% bad voxels (FeatureId=0) using a deterministic pattern. + constexpr usize k_BlockSize = 25; + constexpr usize k_BlocksPerDim = k_DimX / k_BlockSize; // 8 + for(usize z = 0; z < k_DimZ; z++) + { + for(usize y = 0; y < k_DimY; y++) + { + for(usize x = 0; x < k_DimX; x++) + { + const usize idx = z * k_DimX * k_DimY + y * k_DimX + x; + phasesStore[idx] = 1; + + usize bx = x / k_BlockSize; + usize by = y / k_BlockSize; + usize bz = z / k_BlockSize; + int32 blockFeatureId = static_cast(bz * k_BlocksPerDim * k_BlocksPerDim + by * k_BlocksPerDim + bx + 1); + + // Scatter bad voxels: ~10% of voxels become bad (FeatureId=0) + bool isBad = ((x * 7 + y * 13 + z * 29) % 10 == 0); + featureIdsStore[idx] = isBad ? 0 : blockFeatureId; + } + } + } + + UnitTest::WriteTestDataStructure(buildDS, benchmarkFile); + } + + // Stage 2: Reload (arrays become ZarrStore in OOC) and run filter + DataStructure dataStructure = UnitTest::LoadDataStructure(benchmarkFile); + + { + FillBadDataFilter filter; + Arguments args; + + args.insertOrAssign(FillBadDataFilter::k_MinAllowedDefectSize_Key, std::make_any(50)); + args.insertOrAssign(FillBadDataFilter::k_StoreAsNewPhase_Key, std::make_any(false)); + args.insertOrAssign(FillBadDataFilter::k_CellFeatureIdsArrayPath_Key, std::make_any(DataPath({"DataContainer", "CellData", "FeatureIds"}))); + args.insertOrAssign(FillBadDataFilter::k_CellPhasesArrayPath_Key, std::make_any(DataPath({"DataContainer", "CellData", "Phases"}))); + args.insertOrAssign(FillBadDataFilter::k_IgnoredDataArrayPaths_Key, std::make_any(MultiArraySelectionParameter::ValueType{})); + args.insertOrAssign(FillBadDataFilter::k_SelectedImageGeometryPath_Key, std::make_any(DataPath({"DataContainer"}))); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args, nullptr, IFilter::MessageHandler{[](const IFilter::Message& message) { fmt::print("{}\n", message.message); }}); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + } + + fs::remove(benchmarkFile); +} diff --git a/src/Plugins/SimplnxCore/test/IdentifySampleTest.cpp b/src/Plugins/SimplnxCore/test/IdentifySampleTest.cpp index 7871d98fad..d593df17bc 100644 --- a/src/Plugins/SimplnxCore/test/IdentifySampleTest.cpp +++ b/src/Plugins/SimplnxCore/test/IdentifySampleTest.cpp @@ -2,13 +2,17 @@ #include "SimplnxCore/Filters/IdentifySampleFilter.hpp" #include "SimplnxCore/SimplnxCore_test_dirs.hpp" +#include "simplnx/DataStructure/AttributeMatrix.hpp" #include "simplnx/DataStructure/Geometry/ImageGeom.hpp" #include "simplnx/DataStructure/IDataArray.hpp" #include "simplnx/Parameters/ChoicesParameter.hpp" #include "simplnx/UnitTest/UnitTestCommon.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" #include +#include + using namespace nx::core; using namespace nx::core::UnitTest; @@ -19,6 +23,10 @@ const DataPath k_ExemplarArrayPath = Constants::k_DataContainerPath.createChildP TEST_CASE("SimplnxCore::IdentifySampleFilter", "[SimplnxCore][IdentifySampleFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // 25x25x25 dataset, Mask (uint8, 1-comp) => 25*25*1 = 625 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 625, true); const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "identify_sample.tar.gz", "identify_sample", true, true); using TestArgType = std::tuple; @@ -74,3 +82,104 @@ TEST_CASE("SimplnxCore::IdentifySampleFilter", "[SimplnxCore][IdentifySampleFilt } } } + +TEST_CASE("SimplnxCore::IdentifySampleFilter: Benchmark 200x200x200", "[SimplnxCore][IdentifySampleFilter][Benchmark]") +{ + UnitTest::LoadPlugins(); + // 200*200 * 1 byte = 40000 bytes per Z-slice for uint8 mask + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 40000, true); + + constexpr usize k_DimX = 200; + constexpr usize k_DimY = 200; + constexpr usize k_DimZ = 200; + constexpr usize k_TotalVoxels = k_DimX * k_DimY * k_DimZ; + const ShapeType cellTupleShape = {k_DimZ, k_DimY, k_DimX}; + const auto benchmarkFile = fs::path(fmt::format("{}/identify_sample_benchmark.dream3d", unit_test::k_BinaryTestOutputDir)); + + // Stage 1: Build data programmatically and write to .dream3d + { + DataStructure buildDS; + auto* imageGeom = ImageGeom::Create(buildDS, "DataContainer"); + imageGeom->setDimensions({k_DimX, k_DimY, k_DimZ}); + imageGeom->setSpacing({1.0f, 1.0f, 1.0f}); + imageGeom->setOrigin({0.0f, 0.0f, 0.0f}); + + auto* cellAM = AttributeMatrix::Create(buildDS, "Cell Data", cellTupleShape, imageGeom->getId()); + imageGeom->setCellData(*cellAM); + + // Create mask array: a sphere of "good" voxels with interior holes and exterior noise + auto* maskArray = CreateTestDataArray(buildDS, "Mask", cellTupleShape, {1}, cellAM->getId()); + auto& maskStore = maskArray->getDataStoreRef(); + + const float cx = k_DimX / 2.0f; + const float cy = k_DimY / 2.0f; + const float cz = k_DimZ / 2.0f; + const float radius = 80.0f; + + for(usize z = 0; z < k_DimZ; z++) + { + for(usize y = 0; y < k_DimY; y++) + { + for(usize x = 0; x < k_DimX; x++) + { + const usize idx = z * k_DimX * k_DimY + y * k_DimX + x; + const float dx = static_cast(x) - cx; + const float dy = static_cast(y) - cy; + const float dz = static_cast(z) - cz; + const float dist = std::sqrt(dx * dx + dy * dy + dz * dz); + bool good = dist < radius; + + // Create interior holes (small sphere cavities) + if(good) + { + const float h1 = std::sqrt((static_cast(x) - 120.0f) * (static_cast(x) - 120.0f) + (static_cast(y) - 120.0f) * (static_cast(y) - 120.0f) + + (static_cast(z) - 120.0f) * (static_cast(z) - 120.0f)); + if(h1 < 10.0f) + { + good = false; + } + const float h2 = std::sqrt((static_cast(x) - 80.0f) * (static_cast(x) - 80.0f) + (static_cast(y) - 80.0f) * (static_cast(y) - 80.0f) + + (static_cast(z) - 80.0f) * (static_cast(z) - 80.0f)); + if(h2 < 8.0f) + { + good = false; + } + } + + // Add some isolated small clusters outside the main sphere + if(!good && dist < radius + 5.0f && dist > radius) + { + if((x + y + z) % 7 == 0) + { + good = true; + } + } + + maskStore[idx] = good ? 1 : 0; + } + } + } + + UnitTest::WriteTestDataStructure(buildDS, benchmarkFile); + } + + // Stage 2: Reload (arrays become ZarrStore in OOC) and run filter + DataStructure dataStructure = UnitTest::LoadDataStructure(benchmarkFile); + + { + IdentifySampleFilter filter; + Arguments args; + args.insert(IdentifySampleFilter::k_SelectedImageGeometryPath_Key, std::make_any(DataPath({"DataContainer"}))); + args.insert(IdentifySampleFilter::k_MaskArrayPath_Key, std::make_any(DataPath({"DataContainer", "Cell Data", "Mask"}))); + args.insert(IdentifySampleFilter::k_FillHoles_Key, std::make_any(true)); + args.insert(IdentifySampleFilter::k_SliceBySlice_Key, std::make_any(false)); + args.insert(IdentifySampleFilter::k_SliceBySlicePlane_Key, std::make_any(0)); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + } + + fs::remove(benchmarkFile); +} diff --git a/src/Plugins/SimplnxCore/test/ScalarSegmentFeaturesTest.cpp b/src/Plugins/SimplnxCore/test/ScalarSegmentFeaturesTest.cpp index 00cb47718f..5b68ce436b 100644 --- a/src/Plugins/SimplnxCore/test/ScalarSegmentFeaturesTest.cpp +++ b/src/Plugins/SimplnxCore/test/ScalarSegmentFeaturesTest.cpp @@ -1,11 +1,14 @@ #include "SimplnxCore/Filters/ScalarSegmentFeaturesFilter.hpp" #include "SimplnxCore/SimplnxCore_test_dirs.hpp" +#include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" #include "simplnx/DataStructure/IO/HDF5/DataStructureWriter.hpp" #include "simplnx/Parameters/ArrayCreationParameter.hpp" #include "simplnx/Parameters/BoolParameter.hpp" #include "simplnx/Parameters/ChoicesParameter.hpp" #include "simplnx/UnitTest/UnitTestCommon.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" #include "simplnx/Utilities/DataArrayUtilities.hpp" #include "simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.hpp" #include "simplnx/Utilities/Parsing/HDF5/IO/FileIO.hpp" @@ -34,6 +37,12 @@ const std::string k_ExemplaryCombinationAllConnectedFeatureIdsName = "Exemplary TEST_CASE("SimplnxCore::ScalarSegmentFeatures", "[SimplnxCore][ScalarSegmentFeatures]") { + UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // SmallIN100: 100x100x100, largest 1-comp int32/float32 array => 100*100*4 = 40,000 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 40000, true); + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "6_5_test_data_1_v2.tar.gz", "6_5_test_data_1_v2"); // Read the Small IN100 Data set @@ -99,6 +108,12 @@ TEST_CASE("SimplnxCore::ScalarSegmentFeatures", "[SimplnxCore][ScalarSegmentFeat TEST_CASE("SimplnxCore::ScalarSegmentFeatures: Neighbor Scheme", "[Reconstruction][ScalarSegmentFeatures]") { + UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // Neighbor scheme test: 8x5x10, largest int32 array => 5*10*4 = 200 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 200, true); + /** * We are going to use Catch2's GENERATE macro to create variations of parameter values. * EVERYTHING after the GENERATE macro will be run for each of the generated sets of values @@ -171,3 +186,76 @@ TEST_CASE("SimplnxCore::ScalarSegmentFeatures: Neighbor Scheme", "[Reconstructio } } } + +TEST_CASE("SimplnxCore::ScalarSegmentFeatures: Benchmark 200x200x200", "[SimplnxCore][ScalarSegmentFeatures][Benchmark]") +{ + UnitTest::LoadPlugins(); + // 200x200x200, largest array is int32 1-comp => 200*200*4 = 160,000 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 160000, true); + + constexpr usize k_DimX = 200; + constexpr usize k_DimY = 200; + constexpr usize k_DimZ = 200; + const ShapeType cellTupleShape = {k_DimZ, k_DimY, k_DimX}; + const auto benchmarkFile = fs::path(fmt::format("{}/scalar_segment_features_benchmark.dream3d", unit_test::k_BinaryTestOutputDir)); + + // Stage 1: Build data programmatically and write to .dream3d + { + DataStructure buildDS; + auto* imageGeom = ImageGeom::Create(buildDS, "DataContainer"); + imageGeom->setDimensions({k_DimX, k_DimY, k_DimZ}); + imageGeom->setSpacing({1.0f, 1.0f, 1.0f}); + imageGeom->setOrigin({0.0f, 0.0f, 0.0f}); + + auto* cellAM = AttributeMatrix::Create(buildDS, "Cell Data", cellTupleShape, imageGeom->getId()); + imageGeom->setCellData(*cellAM); + + // Create scalar input array: blocks of 25 voxels with distinct integer values + // Creates a grid of ~512 distinct regions (8x8x8 blocks) + auto* scalarArray = CreateTestDataArray(buildDS, "ScalarData", cellTupleShape, {1}, cellAM->getId()); + auto& scalarStore = scalarArray->getDataStoreRef(); + + constexpr usize k_BlockSize = 25; + for(usize z = 0; z < k_DimZ; z++) + { + for(usize y = 0; y < k_DimY; y++) + { + for(usize x = 0; x < k_DimX; x++) + { + const usize idx = z * k_DimX * k_DimY + y * k_DimX + x; + const usize bx = x / k_BlockSize; + const usize by = y / k_BlockSize; + const usize bz = z / k_BlockSize; + // Each block gets a unique scalar value; tolerance=0 means only identical values merge + scalarStore[idx] = static_cast(bz * 64 + by * 8 + bx); + } + } + } + + UnitTest::WriteTestDataStructure(buildDS, benchmarkFile); + } + + // Stage 2: Reload (arrays become ZarrStore in OOC) and run filter + DataStructure dataStructure = UnitTest::LoadDataStructure(benchmarkFile); + + { + ScalarSegmentFeaturesFilter filter; + Arguments args; + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_GridGeomPath_Key, std::make_any(DataPath({"DataContainer"}))); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_InputArrayPathKey, std::make_any(DataPath({"DataContainer", "Cell Data", "ScalarData"}))); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_ScalarToleranceKey, std::make_any(0)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_UseMask_Key, std::make_any(false)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(DataPath{})); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_FeatureIdsName_Key, std::make_any("FeatureIds")); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_CellFeatureName_Key, std::make_any("CellFeatureData")); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any("Active")); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_RandomizeFeatures_Key, std::make_any(false)); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + } + + fs::remove(benchmarkFile); +} diff --git a/src/simplnx/Utilities/SegmentFeatures.cpp b/src/simplnx/Utilities/SegmentFeatures.cpp index 037b7175b0..568db177c1 100644 --- a/src/simplnx/Utilities/SegmentFeatures.cpp +++ b/src/simplnx/Utilities/SegmentFeatures.cpp @@ -1,8 +1,10 @@ #include "SegmentFeatures.hpp" +#include "simplnx/DataStructure/AbstractDataStore.hpp" #include "simplnx/DataStructure/Geometry/IGridGeometry.hpp" #include "simplnx/Utilities/ClusteringUtilities.hpp" #include "simplnx/Utilities/MessageHelper.hpp" +#include "simplnx/Utilities/UnionFind.hpp" #include @@ -221,16 +223,354 @@ Result<> SegmentFeatures::execute(IGridGeometry* gridGeom) return {}; } +// ============================================================================= +// Chunk-Sequential Connected Component Labeling (CCL) Algorithm +// ============================================================================= +// +// Replaces the DFS flood-fill with a three-phase scanline algorithm optimized +// for out-of-core performance. +// +// Phase 1: Forward CCL pass - assign provisional labels using backward neighbors. +// Uses an in-memory buffer for labels to avoid cross-chunk reads from +// OOC storage (backward neighbors may be in evicted chunks). +// Phase 2: Resolution - flatten Union-Find and build contiguous renumbering. +// Operates entirely in-memory on the provisional labels buffer. +// Phase 3: Relabeling - write final contiguous feature IDs to the data store +// in chunk-sequential order for optimal OOC write performance. +// ============================================================================= +Result<> SegmentFeatures::executeCCL(IGridGeometry* gridGeom, AbstractDataStore& featureIdsStore) +{ + ThrottledMessenger throttledMessenger = m_MessageHelper.createThrottledMessenger(); + + const SizeVec3 udims = gridGeom->getDimensions(); + // getDimensions() returns [X, Y, Z] + const int64 dimX = static_cast(udims[0]); + const int64 dimY = static_cast(udims[1]); + const int64 dimZ = static_cast(udims[2]); + const usize totalVoxels = static_cast(dimX) * static_cast(dimY) * static_cast(dimZ); + + const int64 sliceStride = dimX * dimY; + + const bool useFaceOnly = (m_NeighborScheme == NeighborScheme::Face); + + UnionFind unionFind; + int32 nextLabel = 1; // Provisional labels start at 1 + + // Rolling 2-slice buffer for backward neighbor label lookups. + // Backward neighbors in CCL are always in the current Z-slice or the + // previous Z-slice, so 2 slices is sufficient. This uses O(slice) memory + // instead of O(volume), enabling processing of datasets larger than RAM. + // Buffer layout: slice (iz % 2) occupies [sliceOffset .. sliceOffset + sliceStride) + const usize sliceSize = static_cast(sliceStride); + std::vector labelBuffer(2 * sliceSize, 0); + + // ========================================================================= + // Phase 1: Forward CCL - assign provisional labels using backward neighbors + // ========================================================================= + m_MessageHelper.sendMessage("Phase 1/2: Forward CCL pass..."); + + for(int64 iz = 0; iz < dimZ; iz++) + { + if(m_ShouldCancel) + { + return {}; + } + + // Clear the current slice's portion of the rolling buffer + const usize currentSliceOffset = static_cast(iz % 2) * sliceSize; + std::fill(labelBuffer.begin() + currentSliceOffset, labelBuffer.begin() + currentSliceOffset + sliceSize, 0); + + for(int64 iy = 0; iy < dimY; iy++) + { + for(int64 ix = 0; ix < dimX; ix++) + { + const int64 index = iz * sliceStride + iy * dimX + ix; + const usize bufIdx = currentSliceOffset + static_cast(iy * dimX + ix); + + // Skip voxels that are not valid + if(!isValidVoxel(index)) + { + continue; + } + + // Check backward neighbors for existing labels + // "Backward" means already processed in Z-Y-X scanline order + // Read neighbor labels from the rolling buffer (direct memory access) + int32 assignedLabel = 0; + const usize prevSliceOffset = static_cast((iz + 1) % 2) * sliceSize; + + if(useFaceOnly) + { + // Face connectivity: 3 backward neighbors (-X, -Y, -Z) + // Check -X neighbor (same Z-slice, same buffer region) + if(ix > 0) + { + const int64 neighIdx = index - 1; + int32 neighLabel = labelBuffer[bufIdx - 1]; + if(neighLabel > 0 && areNeighborsSimilar(index, neighIdx)) + { + if(assignedLabel == 0) + { + assignedLabel = neighLabel; + } + else if(assignedLabel != neighLabel) + { + unionFind.unite(assignedLabel, neighLabel); + } + } + } + // Check -Y neighbor (same Z-slice, same buffer region) + if(iy > 0) + { + const int64 neighIdx = index - dimX; + int32 neighLabel = labelBuffer[currentSliceOffset + static_cast((iy - 1) * dimX + ix)]; + if(neighLabel > 0 && areNeighborsSimilar(index, neighIdx)) + { + if(assignedLabel == 0) + { + assignedLabel = neighLabel; + } + else if(assignedLabel != neighLabel) + { + unionFind.unite(assignedLabel, neighLabel); + } + } + } + // Check -Z neighbor (previous Z-slice, other buffer region) + if(iz > 0) + { + const int64 neighIdx = index - sliceStride; + int32 neighLabel = labelBuffer[prevSliceOffset + static_cast(iy * dimX + ix)]; + if(neighLabel > 0 && areNeighborsSimilar(index, neighIdx)) + { + if(assignedLabel == 0) + { + assignedLabel = neighLabel; + } + else if(assignedLabel != neighLabel) + { + unionFind.unite(assignedLabel, neighLabel); + } + } + } + } + else + { + // FaceEdgeVertex connectivity: 13 backward neighbors + for(int64 dz = -1; dz <= 0; ++dz) + { + const int64 nz = iz + dz; + if(nz < 0 || nz >= dimZ) + { + continue; + } + + const usize neighSliceOffset = (dz < 0) ? prevSliceOffset : currentSliceOffset; + + const int64 dyStart = -1; + const int64 dyEnd = (dz < 0) ? 1 : 0; + + for(int64 dy = dyStart; dy <= dyEnd; ++dy) + { + const int64 ny = iy + dy; + if(ny < 0 || ny >= dimY) + { + continue; + } + + int64 dxStart; + int64 dxEnd; + if(dz < 0) + { + dxStart = -1; + dxEnd = 1; + } + else if(dy < 0) + { + dxStart = -1; + dxEnd = 1; + } + else + { + dxStart = -1; + dxEnd = -1; + } + + for(int64 dx = dxStart; dx <= dxEnd; ++dx) + { + const int64 nx = ix + dx; + if(nx < 0 || nx >= dimX) + { + continue; + } + if(dx == 0 && dy == 0 && dz == 0) + { + continue; + } + + const int64 neighIdx = nz * sliceStride + ny * dimX + nx; + int32 neighLabel = labelBuffer[neighSliceOffset + static_cast(ny * dimX + nx)]; + if(neighLabel > 0 && areNeighborsSimilar(index, neighIdx)) + { + if(assignedLabel == 0) + { + assignedLabel = neighLabel; + } + else if(assignedLabel != neighLabel) + { + unionFind.unite(assignedLabel, neighLabel); + } + } + } + } + } + } + + // If no matching backward neighbor, assign new provisional label + if(assignedLabel == 0) + { + assignedLabel = nextLabel++; + unionFind.find(assignedLabel); // Initialize in union-find + } + + // Write label to both rolling buffer (for neighbor reads) and featureIds store + labelBuffer[bufIdx] = assignedLabel; + featureIdsStore[index] = assignedLabel; + } + } + + // Send progress per Z-slice + float percentComplete = static_cast(iz + 1) / static_cast(dimZ) * 100.0f; + throttledMessenger.sendThrottledMessage([percentComplete]() { return fmt::format("Phase 1/2: {:.1f}% complete", percentComplete); }); + } + + featureIdsStore.flush(); + + if(m_ShouldCancel) + { + return {}; + } + + // ========================================================================= + // Phase 2: Resolution - build direct provisional-label-to-final-ID lookup + // ========================================================================= + m_MessageHelper.sendMessage("Phase 2/2: Resolving labels and writing final feature IDs..."); + + unionFind.flatten(); + + // Build a direct lookup table: provisionalLabel -> finalFeatureId + // Read provisional labels from the featureIds store (written during Phase 1). + // Linear scan ensures feature IDs are assigned in the order that seeds + // are first encountered (matching DFS seed-discovery order). + std::vector labelToFinal(static_cast(nextLabel), 0); + int32 finalFeatureCount = 0; + + const uint64 numChunks = featureIdsStore.getNumberOfChunks(); + + // First pass: discover label-to-final mapping by reading provisional labels + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + if(m_ShouldCancel) + { + return {}; + } + + featureIdsStore.loadChunk(chunkIdx); + const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); + const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); + + for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) + { + for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) + { + for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) + { + const usize index = z * static_cast(sliceStride) + y * static_cast(dimX) + x; + int32 label = featureIdsStore[index]; + if(label > 0 && labelToFinal[label] == 0) + { + int32 root = static_cast(unionFind.find(label)); + if(labelToFinal[root] == 0) + { + finalFeatureCount++; + labelToFinal[root] = finalFeatureCount; + } + labelToFinal[label] = labelToFinal[root]; + } + } + } + } + } + + if(m_ShouldCancel) + { + return {}; + } + + // Second pass: write final feature IDs to the data store in chunk-sequential order + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + if(m_ShouldCancel) + { + return {}; + } + + featureIdsStore.loadChunk(chunkIdx); + const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); + const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); + + for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) + { + for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) + { + for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) + { + const usize index = z * static_cast(sliceStride) + y * static_cast(dimX) + x; + int32 provLabel = featureIdsStore[index]; + if(provLabel > 0) + { + featureIdsStore[index] = labelToFinal[provLabel]; + } + } + } + } + + // Send progress + float percentComplete = static_cast(chunkIdx + 1) / static_cast(numChunks) * 100.0f; + throttledMessenger.sendThrottledMessage([percentComplete]() { return fmt::format("Phase 2/2: {:.1f}% chunks relabeled", percentComplete); }); + } + + featureIdsStore.flush(); + + m_FoundFeatures = finalFeatureCount; + m_MessageHelper.sendMessage(fmt::format("Total Features Found: {}", m_FoundFeatures)); + return {}; +} + +// ----------------------------------------------------------------------------- int64 SegmentFeatures::getSeed(int32 gnum, int64 nextSeed) const { return -1; } +// ----------------------------------------------------------------------------- bool SegmentFeatures::determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const { return false; } +// ----------------------------------------------------------------------------- +bool SegmentFeatures::isValidVoxel(int64 point) const +{ + return true; +} + +// ----------------------------------------------------------------------------- +bool SegmentFeatures::areNeighborsSimilar(int64 point1, int64 point2) const +{ + return false; +} + // ----------------------------------------------------------------------------- SegmentFeatures::SeedGenerator SegmentFeatures::initializeStaticVoxelSeedGenerator() const { diff --git a/src/simplnx/Utilities/SegmentFeatures.hpp b/src/simplnx/Utilities/SegmentFeatures.hpp index 1c015485e2..3e0920305f 100644 --- a/src/simplnx/Utilities/SegmentFeatures.hpp +++ b/src/simplnx/Utilities/SegmentFeatures.hpp @@ -17,6 +17,8 @@ namespace nx::core { class IGridGeometry; +template +class AbstractDataStore; namespace segment_features { @@ -51,16 +53,25 @@ class SIMPLNX_EXPORT SegmentFeatures }; /** - * @brief execute + * @brief Original DFS-based segmentation (in-core optimized). * @param gridGeom * @return */ Result<> execute(IGridGeometry* gridGeom); + /** + * @brief Chunk-sequential CCL-based segmentation optimized for out-of-core. + * + * Subclasses must override isValidVoxel() and areNeighborsSimilar() to use this code path. + * + * @param gridGeom The grid geometry providing dimensions and neighbor offsets. + * @param featureIdsStore The data store to write assigned feature IDs into. + * @return Result indicating success or an error with a descriptive message. + */ + Result<> executeCCL(IGridGeometry* gridGeom, AbstractDataStore& featureIdsStore); + /** * @brief Returns the seed for the specified values. - * @param data - * @param args * @param gnum * @param nextSeed * @return int64 @@ -69,8 +80,6 @@ class SIMPLNX_EXPORT SegmentFeatures /** * @brief Determines the grouping for the specified values. - * @param data - * @param args * @param referencePoint * @param neighborPoint * @param gnum @@ -82,7 +91,6 @@ class SIMPLNX_EXPORT SegmentFeatures * @brief * @param featureIds * @param totalFeatures - * @param distribution */ void randomizeFeatureIds(Int32Array* featureIds, uint64 totalFeatures); @@ -106,8 +114,37 @@ class SIMPLNX_EXPORT SegmentFeatures { return false; } + + /** + * @brief Pure data comparison without featureId assignment. + * Used by the CCL algorithm which handles label assignment separately. + * @param index First voxel index + * @param neighIndex Second voxel index + * @return true if the two voxels should be in the same feature + */ + virtual bool compare(int64 index, int64 neighIndex) + { + return false; + } }; + /** + * @brief Can this voxel be a feature member? (mask + phase check, NO featureId check) + * Default returns true (all voxels are valid). + * @param point Linear voxel index + * @return true if this voxel can participate in segmentation + */ + virtual bool isValidVoxel(int64 point) const; + + /** + * @brief Should these two adjacent voxels be in the same feature? (data comparison only) + * Default returns false (no voxels are similar). + * @param point1 First voxel index + * @param point2 Second voxel index + * @return true if the two voxels should be grouped together + */ + virtual bool areNeighborsSimilar(int64 point1, int64 point2) const; + protected: DataStructure& m_DataStructure; bool m_IsPeriodic = false; diff --git a/src/simplnx/Utilities/UnionFind.hpp b/src/simplnx/Utilities/UnionFind.hpp new file mode 100644 index 0000000000..6fe6247748 --- /dev/null +++ b/src/simplnx/Utilities/UnionFind.hpp @@ -0,0 +1,188 @@ +#pragma once + +#include "simplnx/simplnx_export.hpp" + +#include "simplnx/Common/Types.hpp" + +#include +#include + +namespace nx::core +{ + +/** + * @class UnionFind + * @brief Vector-based Union-Find (Disjoint Set) data structure for tracking + * connected component equivalences during chunk-sequential processing. + * + * Uses union-by-rank and path-halving compression for near-O(1) amortized + * find() and unite() operations. Internal storage uses contiguous vectors + * indexed by label for cache-friendly access (no hash map overhead). + * + * Key features: + * - Labels are contiguous integers starting from 1 (0 is unused/invalid) + * - Grows dynamically as new labels are encountered + * - Path halving in find() for near-O(1) amortized lookups + * - Union-by-rank for balanced merges + * - Accumulates sizes at each label during construction + * - Single-pass flatten() for full path compression and size accumulation + */ +class SIMPLNX_EXPORT UnionFind +{ +public: + UnionFind() + { + // Index 0 is unused (labels start at 1). Initialize with a small capacity. + constexpr usize k_InitialCapacity = 64; + m_Parent.resize(k_InitialCapacity); + m_Rank.resize(k_InitialCapacity, 0); + m_Size.resize(k_InitialCapacity, 0); + // Initialize all entries as self-parents + for(usize i = 0; i < k_InitialCapacity; i++) + { + m_Parent[i] = static_cast(i); + } + } + + ~UnionFind() = default; + + UnionFind(const UnionFind&) = delete; + UnionFind(UnionFind&&) noexcept = default; + UnionFind& operator=(const UnionFind&) = delete; + UnionFind& operator=(UnionFind&&) noexcept = default; + + /** + * @brief Find the root label with path-halving compression. + * Each node on the path is redirected to its grandparent, giving + * near-O(1) amortized performance. + * @param x Label to find + * @return Root label + */ + int64 find(int64 x) + { + ensureCapacity(x); + + // Path halving: point each node to its grandparent while walking + while(m_Parent[x] != x) + { + m_Parent[x] = m_Parent[m_Parent[x]]; + x = m_Parent[x]; + } + return x; + } + + /** + * @brief Unite two labels into the same equivalence class using union-by-rank. + * @param a First label + * @param b Second label + */ + void unite(int64 a, int64 b) + { + int64 rootA = find(a); + int64 rootB = find(b); + + if(rootA == rootB) + { + return; + } + + if(m_Rank[rootA] < m_Rank[rootB]) + { + m_Parent[rootA] = rootB; + } + else if(m_Rank[rootA] > m_Rank[rootB]) + { + m_Parent[rootB] = rootA; + } + else + { + m_Parent[rootB] = rootA; + m_Rank[rootA]++; + } + } + + /** + * @brief Add to the size count for a label. + * Sizes are accumulated at each label, not the root. They are + * accumulated to roots during flatten(). + * @param label Label to update + * @param count Number of voxels to add + */ + void addSize(int64 label, uint64 count) + { + ensureCapacity(label); + m_Size[label] += count; + } + + /** + * @brief Get the total size of a label's equivalence class. + * Should only be called after flatten() for accurate totals. + * @param label Label to query + * @return Total number of voxels in the equivalence class + */ + uint64 getSize(int64 label) + { + int64 root = find(label); + return m_Size[root]; + } + + /** + * @brief Flatten the union-find structure with full path compression + * and accumulate all sizes to root labels. + * + * After flatten(): + * - Every label points directly to its root + * - All sizes are accumulated at root labels + * - Subsequent find() calls are O(1) (single lookup) + */ + void flatten() + { + const usize count = m_Parent.size(); + + // Full path compression: point every label directly to its root + for(usize i = 1; i < count; i++) + { + m_Parent[i] = find(static_cast(i)); + } + + // Accumulate sizes to roots + std::vector rootSizes(count, 0); + for(usize i = 1; i < count; i++) + { + rootSizes[m_Parent[i]] += m_Size[i]; + } + m_Size = std::move(rootSizes); + } + +private: + /** + * @brief Ensure the internal vectors can hold index x. + * Grows by doubling to amortize allocation cost. + */ + void ensureCapacity(int64 x) + { + auto idx = static_cast(x); + if(idx < m_Parent.size()) + { + return; + } + + usize newSize = std::max(idx + 1, m_Parent.size() * 2); + usize oldSize = m_Parent.size(); + m_Parent.resize(newSize); + m_Rank.resize(newSize, 0); + m_Size.resize(newSize, 0); + + // Initialize new entries as self-parents + for(usize i = oldSize; i < newSize; i++) + { + m_Parent[i] = static_cast(i); + } + } + + std::vector m_Parent; + std::vector m_Rank; + std::vector m_Size; +}; + +} // namespace nx::core