diff --git a/pyproject.toml b/pyproject.toml index 17be78f..b3e3a5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,80 +3,69 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["src/DD_tools"] +packages = ["src/TreeOfLife_toolbox"] [project] -name = "DD_tools" +name = "TreeOfLife_toolbox" dynamic = ["version"] authors = [ { name = "Andrey Kopanev", email = "kopanev.1@osu.edu" }, { name = "Elizabeth G. Campolongo", email = "e.campolongo479@gmail.com" }, { name = "Matthew J. Thompson", email = "thompson.m.j@outlook.com" }, ] -description = "A tool for downloading files from a list of URLs in parallel." +description = "A tool for processing datasets that was downloaded using the distributed-downloader package." readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.10, <3.12" classifiers = [ + "Development Status :: 4 - Beta", "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] dependencies = [ "attrs", "brotli", - "certifi", - "charset-normalizer", "cramjam", "cython", - "exceptiongroup", "fsspec", - "hatchling", - "idna", "inflate64", - "iniconfig", - "mpi4py < 4", + "mpi4py", "multivolumefile", - "numpy", "opencv-python", - "packaging", "pandas", "pathspec", "pillow", - "pip", - "pluggy", "psutil", - "py4j", "pyarrow", "pybcj", "pycryptodomex", "pyppmd", "pyspark", - "pytest", - "python-dateutil", "python-dotenv", - "pytz", "pyyaml", "pyzstd", "requests", "setuptools", - "six", "texttable", - "tomli", "trove-classifiers", "typing-extensions", - "tzdata", - "urllib3", "wheel" ] [project.optional-dependencies] -dev = ["pytest"] +dev = [ + "pytest", + "ruff" +] keywords = [ "parallel", "distributed", - "download", "url", + "mpi-applications", + "dataset-generation", ] [project.urls] @@ -84,5 +73,8 @@ Homepage = "https://github.com/Imageomics/distributed-downloader" Repository = "https://github.com/Imageomics/distributed-downloader.git" "Bug Tracker" = "https://github.com/Imageomics/distributed-downloader/issues" +[project.scripts] +tree_of_life_toolbox = "TreeOfLife_toolbox.main.main:main" + [tool.hatch.version] -path = "src/DD_tools/main/__about__.py" +path = "src/TreeOfLife_toolbox/main/__about__.py" diff --git a/scripts/tools_filter.slurm b/scripts/tools_filter.slurm index 4642e34..6aee3f6 100644 --- a/scripts/tools_filter.slurm +++ b/scripts/tools_filter.slurm @@ -19,11 +19,10 @@ executor_memory="64G" module load spark/3.4.1 module load miniconda3/23.3.1-py310 source "${REPO_ROOT}/.venv/bin/activate" -export PYTHONPATH=${PYTHONPATH}:"${REPO_ROOT}/src":"${REPO_ROOT}/distributed-downloader" pbs-spark-submit \ --driver-memory $driver_memory \ --executor-memory $executor_memory \ - "${REPO_ROOT}/src/distributed_downloader/tools/filter.py" \ + "${TOOLBOX_PATH}/main/filter.py" \ "${tool_name}" \ > "${logs_dir}/tool_filter.log" diff --git a/scripts/tools_scheduler.slurm b/scripts/tools_scheduler.slurm index e4fb6a2..ea35a32 100644 --- a/scripts/tools_scheduler.slurm +++ b/scripts/tools_scheduler.slurm @@ -19,7 +19,6 @@ module load miniconda3/23.3.1-py310 source "${REPO_ROOT}/.venv/bin/activate" export PYARROW_IGNORE_TIMEZONE=1 export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0 -export PYTHONPATH=${PYTHONPATH}:"${REPO_ROOT}/src":"${REPO_ROOT}/distributed-downloader" srun \ --mpi=pmi2 \ @@ -28,4 +27,4 @@ srun \ --cpus-per-task=1 \ --mem=0 \ --output="${logs_dir}/tool_scheduler.log" \ - python "${REPO_ROOT}/src/distributed_downloader/tools/scheduler.py" "${tool_name}" + python "${TOOLBOX_PATH}/main/scheduler.py" "${tool_name}" diff --git a/scripts/tools_verifier.slurm b/scripts/tools_verifier.slurm index 98ca024..6a3b75e 100644 --- a/scripts/tools_verifier.slurm +++ b/scripts/tools_verifier.slurm @@ -19,7 +19,6 @@ module load miniconda3/23.3.1-py310 source "${REPO_ROOT}/.venv/bin/activate" export PYARROW_IGNORE_TIMEZONE=1 export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0 -export PYTHONPATH=${PYTHONPATH}:"${REPO_ROOT}/src":"${REPO_ROOT}/distributed-downloader" srun \ --mpi=pmi2 \ @@ -28,4 +27,4 @@ srun \ --cpus-per-task=1 \ --mem=0 \ --output="${logs_dir}/tool_verifier.log" \ - python "${REPO_ROOT}/src/distributed_downloader/tools/verification.py" "${tool_name}" + python "${TOOLBOX_PATH}/main/verification.py" "${tool_name}" diff --git a/scripts/tools_worker.slurm b/scripts/tools_worker.slurm index 2ee2662..4856e62 100644 --- a/scripts/tools_worker.slurm +++ b/scripts/tools_worker.slurm @@ -19,7 +19,6 @@ module load miniconda3/23.3.1-py310 source "${REPO_ROOT}/.venv/bin/activate" export PYARROW_IGNORE_TIMEZONE=1 export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0 -export PYTHONPATH=${PYTHONPATH}:"${REPO_ROOT}/src":"${REPO_ROOT}/distributed-downloader" srun \ --mpi=pmi2 \ @@ -28,4 +27,4 @@ srun \ --cpus-per-task="$TOOLS_CPU_PER_WORKER" \ --mem=0 \ --output="${logs_dir}/tool_worker-%2t.log" \ - python "${REPO_ROOT}/src/distributed_downloader/tools/runner.py" "${tool_name}" + python "${TOOLBOX_PATH}/main/runner.py" "${tool_name}" diff --git a/src/DD_tools/__init__.py b/src/DD_tools/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/TreeOfLife_toolbox/__init__.py b/src/TreeOfLife_toolbox/__init__.py new file mode 100644 index 0000000..6d5b1f3 --- /dev/null +++ b/src/TreeOfLife_toolbox/__init__.py @@ -0,0 +1 @@ +from TreeOfLife_toolbox import lila_extra_noaa_processing diff --git a/src/TreeOfLife_toolbox/lila_extra_noaa_processing/README.md b/src/TreeOfLife_toolbox/lila_extra_noaa_processing/README.md new file mode 100644 index 0000000..15aa4d1 --- /dev/null +++ b/src/TreeOfLife_toolbox/lila_extra_noaa_processing/README.md @@ -0,0 +1,53 @@ +# LILA Extra NOAA Processing + +## Overview + +This tool processes the LILA NOAA (National Oceanic and Atmospheric Administration) dataset and converts it into the +standardized `TreeOfLife-toolbox` format compatible with the distributed-downloader ecosystem. The tool performs the +following key operations: + +1. **Filtering**: Loads the NOAA dataset, standardizes column names, generates UUIDs, and partitions the data +2. **Scheduling**: Creates a processing schedule to distribute work across compute resources +3. **Processing**: Loads and crops images according to bounding box coordinates, computes hash values, and saves + processed data in parquet format + +The tool was **specifically** developed to convert LILA NOAA dataset into `distributed-downloader` format. It is not +going to work on anything else. + +## Configuration Requirements + +### Required Fields in Config + +- `og_images_root`: Path to the root directory of the NOAA images (absolute path) + +## Assumptions/Pre-conditions + +- The NOAA images are available in the `og_images_root` directory. +- The input CSV file contains the following columns: + - `detection_id`: Unique identifier for each detection + - `detection_type`: Life stage of the detected organism + - `rgb_image_path`: Relative path to the image from the root directory + - `rgb_left`, `rgb_right`, `rgb_top`, `rgb_bottom`: Bounding box coordinates for cropping +- The paths in `rgb_image_path` are relative to the `og_images_root` directory. + +## Post-conditions + +After successful execution, the following is guaranteed: + +1. The processed data is available in the configured output directory with the structure: + ``` + {images_folder}/server_name=noaa/partition_id={id}/successes.parquet + ``` + +2. Each parquet file contains: + - `uuid`: Unique identifier for each entry + - `source_id`: Original detection ID + - `identifier`: Full path to the original image + - `is_license_full`: Set to False (NOAA data does not include license information) + - `original_size`: Dimensions of the original image + - `resized_size`: Dimensions of the cropped image + - `hashsum_original`: MD5 hash of the original image + - `hashsum_resized`: MD5 hash of the cropped image + - `image`: Binary data of the cropped image + +3. The verification tables confirm the completion of processing for all partitions. diff --git a/src/TreeOfLife_toolbox/lila_extra_noaa_processing/__init__.py b/src/TreeOfLife_toolbox/lila_extra_noaa_processing/__init__.py new file mode 100644 index 0000000..eff7455 --- /dev/null +++ b/src/TreeOfLife_toolbox/lila_extra_noaa_processing/__init__.py @@ -0,0 +1,5 @@ +from .classes import ( + LilaExtraNoaaScheduleCreation, + LilaExtraNoaaFilter, + LilaExtraNoaaRunner, +) diff --git a/src/TreeOfLife_toolbox/lila_extra_noaa_processing/classes.py b/src/TreeOfLife_toolbox/lila_extra_noaa_processing/classes.py new file mode 100644 index 0000000..506d9e1 --- /dev/null +++ b/src/TreeOfLife_toolbox/lila_extra_noaa_processing/classes.py @@ -0,0 +1,292 @@ +import hashlib +import os +import uuid +from typing import List + +import cv2 +import pandas as pd +import pyspark.sql.functions as F +from pyspark.sql import SparkSession, Window +from pyspark.sql.functions import udf +from pyspark.sql.types import StringType + +from TreeOfLife_toolbox.main.config import Config +from TreeOfLife_toolbox.main.filters import FilterRegister, SparkFilterToolBase +from TreeOfLife_toolbox.main.runners import MPIRunnerTool, RunnerRegister +from TreeOfLife_toolbox.main.schedulers import DefaultScheduler, SchedulerRegister +from TreeOfLife_toolbox.main.utils import load_dataframe + + +@udf(returnType=StringType()) +def get_uuid(): + """ + Generate a random UUID string for uniquely identifying each image entry. + + Returns: + str: A string representation of a random UUID. + """ + return str(uuid.uuid4()) + + +@FilterRegister("lila_extra_noaa_processing") +class LilaExtraNoaaFilter(SparkFilterToolBase): + """ + Filter to process LILA NOAA datasets and prepare them for further processing. + + This filter loads data from the input CSV file, standardizes column names, + generates unique identifiers, and splits the data into batches for parallel processing. + + Attributes: + filter_name (str): Name of the filter, used as identifier in the toolbox. + og_images_root (str): Root path to the original NOAA images, read from config. + """ + def __init__(self, cfg: Config, spark: SparkSession = None): + """ + Initialize the LilaExtraNoaaFilter with configuration and spark session. + + Args: + cfg (Config): Configuration object containing parameters for the filter. + spark (SparkSession, optional): Existing SparkSession to use. If None, a new one will be created. + """ + super().__init__(cfg, spark) + self.filter_name: str = "lila_extra_noaa_processing" + self.og_images_root = self.config["og_images_root"] + + def run(self): + """ + Execute the filtering process on LILA NOAA dataset. + + This method: + 1. Loads the input dataset and renames columns to match standard format + 2. Prepends the image root path to the identifier + 3. Sets server name to 'noaa' + 4. Generates unique UUIDs for each row + 5. Splits the dataset into batches (partitions) + 6. Saves the filtered data for downstream processing + """ + # Load the multimedia dataframe and standardize column names + multimedia_df = ( + load_dataframe(self.spark, self.config["path_to_input"]) + .repartition(20) + .withColumnsRenamed( + { + "detection_id": "source_id", + "detection_type": "life_stage", + "rgb_left": "left", + "rgb_right": "right", + "rgb_top": "top", + "rgb_bottom": "bottom", + } + ) + ) + + # Construct full image paths + multimedia_df_prep = multimedia_df.withColumn( + "identifier", F.concat(F.lit(self.og_images_root), F.col("rgb_image_path")) + ) + + # Set server name and generate UUID for each row + multimedia_df_prep = multimedia_df_prep.withColumn("server_name", F.lit("noaa")) + multimedia_df_prep = multimedia_df_prep.withColumn("uuid", get_uuid()) + + columns = multimedia_df_prep.columns + + self.logger.info("Starting batching") + + # Group by server name and calculate batch counts + servers_grouped = ( + multimedia_df_prep.select("server_name") + .groupBy("server_name") + .count() + .withColumn( + "batch_count", + F.floor( + F.col("count") / self.config["downloader_parameters"]["batch_size"] + ), + ) + ) + + # Partition the dataset + window_part = Window.partitionBy("server_name").orderBy("server_name") + master_df_filtered = ( + multimedia_df_prep.withColumn( + "row_number", F.row_number().over(window_part) + ) + .join(servers_grouped, ["server_name"]) + .withColumn("partition_id", F.col("row_number") % F.col("batch_count")) + .withColumn( + "partition_id", + ( + F.when(F.col("partition_id").isNull(), 0).otherwise( + F.col("partition_id") + ) + ), + ) + .select(*columns, "partition_id") + ) + + self.logger.info("Writing to parquet") + + # Write partitioned data as parquet files + ( + master_df_filtered.repartition("server_name", "partition_id") + .write.partitionBy("server_name", "partition_id") + .mode("overwrite") + .format("parquet") + .save(self.urls_path) + ) + + # Prepare the filter table with selected columns + filtered_df = master_df_filtered.select( + "uuid", + "source_id", + "identifier", + "left", + "right", + "top", + "bottom", + "server_name", + "partition_id", + ) + + # Save filter table for scheduler + self.save_filter(filtered_df) + + self.logger.info("Finished batching") + self.logger.info(f"Too small images number: {master_df_filtered.count()}") + + +@SchedulerRegister("lila_extra_noaa_processing") +class LilaExtraNoaaScheduleCreation(DefaultScheduler): + """ + Scheduler for the LILA NOAA processing pipeline. + + This scheduler leverages the default scheduling mechanism with a specific filter name. + It creates a schedule for distributing the workload across available workers. + """ + def __init__(self, cfg: Config): + """ + Initialize the scheduler with configuration. + + Args: + cfg (Config): Configuration object containing parameters for scheduling. + """ + super().__init__(cfg) + + self.filter_name: str = "lila_extra_noaa_processing" + + +@RunnerRegister("lila_extra_noaa_processing") +class LilaExtraNoaaRunner(MPIRunnerTool): + """ + MPI-based runner for processing LILA NOAA images. + + This runner processes the images according to the schedule created by the scheduler, + cropping images based on bounding box coordinates and saving them as parquet files. + + Attributes: + filter_name (str): Name of the filter, matching the one used in registry. + data_scheme (List[str]): Column names for data processing. + verification_scheme (List[str]): Column names for schedule verification. + total_time (int): Maximum processing time in seconds before timeout. + """ + def __init__(self, cfg: Config): + """ + Initialize the LILA NOAA runner. + + Args: + cfg (Config): Configuration object containing parameters for the runner. + """ + super().__init__(cfg) + + self.filter_name: str = "lila_extra_noaa_processing" + self.data_scheme: List[str] = [ + "uuid", + "source_id", + "left", + "right", + "top", + "bottom", + "server_name", + "partition_id", + ] + self.verification_scheme: List[str] = ["server_name", "partition_id"] + self.total_time = 1000 + + def apply_filter( + self, filtering_df: pd.DataFrame, server_name: str, partition_id: str + ) -> int: + """ + Process a batch of images from the LILA NOAA dataset. + + For each image in the batch: + 1. Loads the original image from the provided path + 2. Crops the image according to the bounding box coordinates + 3. Computes hash values for both original and cropped images + 4. Saves the processed data as a parquet file + + Args: + filtering_df (pd.DataFrame): DataFrame containing images to process + server_name (str): Name of the server (always 'noaa' for this dataset) + partition_id (str): ID of the partition being processed + + Returns: + int: Number of successfully processed images + + Raises: + TimeoutError: If processing exceeds the allocated time + """ + self.is_enough_time() + + # Create output directory for this partition + parquet_folder_path = os.path.join( + self.downloaded_images_path, + f"server_name={server_name}", + f"partition_id={partition_id}", + ) + os.makedirs(parquet_folder_path) + parquet_path = os.path.join(parquet_folder_path, "successes.parquet") + + images = [] + for _, row in filtering_df.iterrows(): + # Initialize an entry for the current image + new_entry = { + "uuid": row["uuid"], + "source_id": row["source_id"], + "identifier": row["identifier"], + "is_license_full": False, + "license": None, + "source": None, + "title": None, + "original_size": "", + "resized_size": "", + "hashsum_original": "", + "hashsum_resized": "", + "image": "", + } + + # Load and crop the image according to bounding box coordinates + image = cv2.imread(row["identifier"]) + cropped = image[row["bottom"]: row["top"], row["left"]: row["right"]] + cropped_binary = cropped.tobytes() + + # Set additional metadata for the image + new_entry["original_size"] = image.shape[:2] + new_entry["resized_size"] = cropped.shape[:2] + new_entry["hashsum_original"] = hashlib.md5(image.tobytes()).hexdigest() + new_entry["hashsum_resized"] = hashlib.md5(cropped_binary).hexdigest() + new_entry["image"] = cropped_binary + + images.append(new_entry) + + # Create DataFrame from processed images + filtered_parquet = pd.DataFrame(images) + + self.is_enough_time() + + # Save processed images as parquet file + filtered_parquet.to_parquet( + parquet_path, index=False, compression="zstd", compression_level=3 + ) + + return len(filtered_parquet) diff --git a/src/DD_tools/main/__about__.py b/src/TreeOfLife_toolbox/main/__about__.py similarity index 100% rename from src/DD_tools/main/__about__.py rename to src/TreeOfLife_toolbox/main/__about__.py diff --git a/src/DD_tools/main/checkpoint.py b/src/TreeOfLife_toolbox/main/checkpoint.py similarity index 100% rename from src/DD_tools/main/checkpoint.py rename to src/TreeOfLife_toolbox/main/checkpoint.py diff --git a/src/DD_tools/main/config.py b/src/TreeOfLife_toolbox/main/config.py similarity index 100% rename from src/DD_tools/main/config.py rename to src/TreeOfLife_toolbox/main/config.py diff --git a/src/DD_tools/main/config_templates/tools.yaml b/src/TreeOfLife_toolbox/main/config_templates/tools.yaml similarity index 100% rename from src/DD_tools/main/config_templates/tools.yaml rename to src/TreeOfLife_toolbox/main/config_templates/tools.yaml diff --git a/src/DD_tools/main/filter.py b/src/TreeOfLife_toolbox/main/filter.py similarity index 85% rename from src/DD_tools/main/filter.py rename to src/TreeOfLife_toolbox/main/filter.py index 080e1a2..ed526c5 100644 --- a/src/DD_tools/main/filter.py +++ b/src/TreeOfLife_toolbox/main/filter.py @@ -1,10 +1,10 @@ import argparse import os -from DD_tools.main.checkpoint import Checkpoint -from DD_tools.main.config import Config -from DD_tools.main.registry import ToolsRegistryBase -from DD_tools.main.utils import init_logger +from TreeOfLife_toolbox.main.checkpoint import Checkpoint +from TreeOfLife_toolbox.main.config import Config +from TreeOfLife_toolbox.main.registry import ToolsRegistryBase +from TreeOfLife_toolbox.main.utils import init_logger if __name__ == "__main__": config_path = os.environ.get("CONFIG_PATH") diff --git a/src/DD_tools/main/filters.py b/src/TreeOfLife_toolbox/main/filters.py similarity index 93% rename from src/DD_tools/main/filters.py rename to src/TreeOfLife_toolbox/main/filters.py index 11c9426..385f18e 100644 --- a/src/DD_tools/main/filters.py +++ b/src/TreeOfLife_toolbox/main/filters.py @@ -7,10 +7,10 @@ from pyspark.sql import SparkSession from pyspark.sql.types import StructType -from DD_tools.main.config import Config -from DD_tools.main.registry import ToolsBase -from DD_tools.main.registry import ToolsRegistryBase -from DD_tools.main.utils import SuccessEntry +from TreeOfLife_toolbox.main.config import Config +from TreeOfLife_toolbox.main.registry import ToolsBase +from TreeOfLife_toolbox.main.registry import ToolsRegistryBase +from TreeOfLife_toolbox.main.utils import SuccessEntry FilterRegister = partial(ToolsRegistryBase.register, "filter") diff --git a/src/DD_tools/main/main.py b/src/TreeOfLife_toolbox/main/main.py similarity index 96% rename from src/DD_tools/main/main.py rename to src/TreeOfLife_toolbox/main/main.py index b3d5732..5272354 100644 --- a/src/DD_tools/main/main.py +++ b/src/TreeOfLife_toolbox/main/main.py @@ -1,15 +1,16 @@ import argparse import os from logging import Logger +from pathlib import Path from typing import Dict, List, Optional, TextIO, Tuple import pandas as pd from attr import Factory, define, field -from DD_tools.main.checkpoint import Checkpoint -from DD_tools.main.config import Config -from DD_tools.main.registry import ToolsRegistryBase -from DD_tools.main.utils import ( +from TreeOfLife_toolbox.main.checkpoint import Checkpoint +from TreeOfLife_toolbox.main.config import Config +from TreeOfLife_toolbox.main.registry import ToolsRegistryBase +from TreeOfLife_toolbox.main.utils import ( init_logger, ensure_created, truncate_paths, @@ -78,6 +79,7 @@ def __attrs_post_init__(self): def __init_environment(self) -> None: os.environ["CONFIG_PATH"] = self.config.config_path + os.environ["TOOLBOX_PATH"] = str(Path(__file__).parent.parent.resolve()) os.environ["ACCOUNT"] = self.config["account"] os.environ["PATH_TO_INPUT"] = self.config["path_to_input"] diff --git a/src/DD_tools/main/registry.py b/src/TreeOfLife_toolbox/main/registry.py similarity index 94% rename from src/DD_tools/main/registry.py rename to src/TreeOfLife_toolbox/main/registry.py index 12774dd..03cf9d6 100644 --- a/src/DD_tools/main/registry.py +++ b/src/TreeOfLife_toolbox/main/registry.py @@ -1,7 +1,7 @@ from typing import Dict, Type, Optional -from DD_tools.main.config import Config -from DD_tools.main.utils import init_logger +from TreeOfLife_toolbox.main.config import Config +from TreeOfLife_toolbox.main.utils import init_logger class ToolsRegistryBase(type): diff --git a/src/DD_tools/main/runner.py b/src/TreeOfLife_toolbox/main/runner.py similarity index 84% rename from src/DD_tools/main/runner.py rename to src/TreeOfLife_toolbox/main/runner.py index 214237e..77dcefa 100644 --- a/src/DD_tools/main/runner.py +++ b/src/TreeOfLife_toolbox/main/runner.py @@ -1,10 +1,10 @@ import argparse import os -from DD_tools.main.checkpoint import Checkpoint -from DD_tools.main.config import Config -from DD_tools.main.registry import ToolsRegistryBase -from DD_tools.main.utils import init_logger +from TreeOfLife_toolbox.main.checkpoint import Checkpoint +from TreeOfLife_toolbox.main.config import Config +from TreeOfLife_toolbox.main.registry import ToolsRegistryBase +from TreeOfLife_toolbox.main.utils import init_logger if __name__ == "__main__": config_path = os.environ.get("CONFIG_PATH") diff --git a/src/DD_tools/main/runners.py b/src/TreeOfLife_toolbox/main/runners.py similarity index 98% rename from src/DD_tools/main/runners.py rename to src/TreeOfLife_toolbox/main/runners.py index cd875d3..bfb5d5e 100644 --- a/src/DD_tools/main/runners.py +++ b/src/TreeOfLife_toolbox/main/runners.py @@ -6,8 +6,8 @@ import pandas as pd -from DD_tools.main.config import Config -from DD_tools.main.registry import ToolsBase, ToolsRegistryBase +from TreeOfLife_toolbox.main.config import Config +from TreeOfLife_toolbox.main.registry import ToolsBase, ToolsRegistryBase RunnerRegister = partial(ToolsRegistryBase.register, "runner") diff --git a/src/DD_tools/main/scheduler.py b/src/TreeOfLife_toolbox/main/scheduler.py similarity index 87% rename from src/DD_tools/main/scheduler.py rename to src/TreeOfLife_toolbox/main/scheduler.py index 707b656..d686ae6 100644 --- a/src/DD_tools/main/scheduler.py +++ b/src/TreeOfLife_toolbox/main/scheduler.py @@ -1,10 +1,10 @@ import argparse import os -from DD_tools.main.checkpoint import Checkpoint -from DD_tools.main.config import Config -from DD_tools.main.registry import ToolsRegistryBase -from DD_tools.main.utils import init_logger +from TreeOfLife_toolbox.main.checkpoint import Checkpoint +from TreeOfLife_toolbox.main.config import Config +from TreeOfLife_toolbox.main.registry import ToolsRegistryBase +from TreeOfLife_toolbox.main.utils import init_logger if __name__ == "__main__": config_path = os.environ.get("CONFIG_PATH") diff --git a/src/DD_tools/main/schedulers.py b/src/TreeOfLife_toolbox/main/schedulers.py similarity index 90% rename from src/DD_tools/main/schedulers.py rename to src/TreeOfLife_toolbox/main/schedulers.py index ed70a9c..6b2c6e2 100644 --- a/src/DD_tools/main/schedulers.py +++ b/src/TreeOfLife_toolbox/main/schedulers.py @@ -5,8 +5,8 @@ import pandas as pd -from DD_tools.main.config import Config -from DD_tools.main.registry import ToolsBase, ToolsRegistryBase +from TreeOfLife_toolbox.main.config import Config +from TreeOfLife_toolbox.main.registry import ToolsBase, ToolsRegistryBase SchedulerRegister = partial(ToolsRegistryBase.register, "scheduler") diff --git a/src/DD_tools/main/utils.py b/src/TreeOfLife_toolbox/main/utils.py similarity index 100% rename from src/DD_tools/main/utils.py rename to src/TreeOfLife_toolbox/main/utils.py diff --git a/src/DD_tools/main/verification.py b/src/TreeOfLife_toolbox/main/verification.py similarity index 85% rename from src/DD_tools/main/verification.py rename to src/TreeOfLife_toolbox/main/verification.py index 742bb86..31d2561 100644 --- a/src/DD_tools/main/verification.py +++ b/src/TreeOfLife_toolbox/main/verification.py @@ -3,11 +3,11 @@ import pandas as pd -from DD_tools.main.checkpoint import Checkpoint -from DD_tools.main.config import Config -from DD_tools.main.registry import ToolsRegistryBase -from DD_tools.main.runners import MPIRunnerTool -from DD_tools.main.utils import init_logger +from TreeOfLife_toolbox.main.checkpoint import Checkpoint +from TreeOfLife_toolbox.main.config import Config +from TreeOfLife_toolbox.main.registry import ToolsRegistryBase +from TreeOfLife_toolbox.main.runners import MPIRunnerTool +from TreeOfLife_toolbox.main.utils import init_logger if __name__ == "__main__": config_path = os.environ.get("CONFIG_PATH")