From 78ca86deea5333a9fbe62eb2cce7e3b523d5e6d0 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 13:32:55 -0700 Subject: [PATCH 01/28] add upload script --- cellpack/bin/upload_to_client.py | 51 ++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 cellpack/bin/upload_to_client.py diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py new file mode 100644 index 00000000..93d94f3a --- /dev/null +++ b/cellpack/bin/upload_to_client.py @@ -0,0 +1,51 @@ +import json +import fire + +from cellpack.autopack.FirebaseHandler import FirebaseHandler +from cellpack.autopack.DBRecipeHandler import DBUploader +from cellpack.autopack.loaders.config_loader import ConfigLoader +from cellpack.autopack.loaders.recipe_loader import RecipeLoader +from cellpack.bin.upload import get_recipe_metadata + +def upload_to_client( + recipe: str, + config: str, + fields: str, + name: str +): + """ + Uploads recipe, config, and editable fields, read from specified + JSON files, to the database for client access + """ + db_handler = FirebaseHandler() + recipe_id = "" + config_id = "" + editable_fields_ids = [] + if FirebaseHandler._initialized: + db_handler = DBUploader(db_handler) + if recipe: + recipe_loader = RecipeLoader(recipe) + recipe_full_data = recipe_loader._read(resolve_inheritance=False) + recipe_meta_data = get_recipe_metadata(recipe_loader) + recipe_id = db_handler.upload_recipe(recipe_meta_data, recipe_full_data) + if config: + config_data = ConfigLoader(config).config + config_id = db_handler.upload_config(config_data, config) + if fields: + editable_fields_data = json.load(open(fields, "r")) + for field in editable_fields_data.get("editable_fields", []): + id, _ = db_handler.upload_data("editable_fields", field) + editable_fields_ids.append(id) + recipe_metadata = { + "name": name, + "recipe": recipe_id, + "config": config_id, + "editable_fields": editable_fields_ids, + } + db_handler.upload_data("client_recipes", recipe_metadata) + +def main(): + fire.Fire(upload_to_client) + +if __name__ == "__main__": + main() From 78b9b0a756443d7aa07d359ba63225fdfdb999a6 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 13:40:00 -0700 Subject: [PATCH 02/28] add example data and more documentation --- cellpack/bin/upload_to_client.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py index 93d94f3a..9536defc 100644 --- a/cellpack/bin/upload_to_client.py +++ b/cellpack/bin/upload_to_client.py @@ -16,6 +16,15 @@ def upload_to_client( """ Uploads recipe, config, and editable fields, read from specified JSON files, to the database for client access + + :param recipe: string argument + path to local recipe file to upload to firebase + :param config: string argument + path to local config file to upload to firebase + :param fields: string argument + path to local editable fields file to upload to firebase + :param name: string argument + display name for recipe in client selection menu """ db_handler = FirebaseHandler() recipe_id = "" From a9b056bc0919bfacd7bfb3c3cb29f4c912fa996e Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 13:42:16 -0700 Subject: [PATCH 03/28] point to correct collection --- cellpack/bin/upload_to_client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py index 9536defc..f778aa53 100644 --- a/cellpack/bin/upload_to_client.py +++ b/cellpack/bin/upload_to_client.py @@ -51,7 +51,9 @@ def upload_to_client( "config": config_id, "editable_fields": editable_fields_ids, } - db_handler.upload_data("client_recipes", recipe_metadata) + + # Upload the combined recipe metadata to example_packings collection for client + db_handler.upload_data("example_packings", recipe_metadata) def main(): fire.Fire(upload_to_client) From f5f7a6910a91aea4089ff861264dea6127c24c4c Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 14:29:03 -0700 Subject: [PATCH 04/28] have server accept recipe as json object in body of request --- cellpack/autopack/__init__.py | 8 +++++++- cellpack/autopack/loaders/recipe_loader.py | 5 +++-- cellpack/bin/pack.py | 4 ++-- docker/server.py | 14 +++++++++----- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 2219525b..4bfd996e 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -411,8 +411,14 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file( - filename, destination="", cache="geometries", force=None, use_docker=False + filename, destination="", cache="geometries", force=None, use_docker=False, recipe_obj=None ): + if recipe_obj is not None: + composition = DBRecipeLoader.remove_empty( + recipe_obj.get("composition", {}) + ) + recipe_obj["composition"] = composition + return recipe_obj, None, False if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 84cd78ac..4b2930c8 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -30,7 +30,7 @@ class RecipeLoader(object): # TODO: add all default values here default_values = default_recipe_values.copy() - def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False): + def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, recipe_obj=None): _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION self.file_path = input_file_path @@ -38,6 +38,7 @@ def __init__(self, input_file_path, save_converted_recipe=False, use_docker=Fals self.ingredient_list = [] self.compartment_list = [] self.save_converted_recipe = save_converted_recipe + self.recipe_obj = recipe_obj # set CURRENT_RECIPE_PATH appropriately for remote(firebase) vs local recipes if autopack.is_remote_path(self.file_path): @@ -169,7 +170,7 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker + self.file_path, cache="recipes", use_docker=use_docker, recipe_obj=self.recipe_obj ) if database_name == "firebase": if is_unnested_firebase: diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 9db53937..fdbfede0 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,7 +25,7 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True + recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, recipe_str=None ): """ Initializes an autopack packing from the command line @@ -40,7 +40,7 @@ def pack( packing_config_data = ConfigLoader(config_path, docker).config recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker + recipe, packing_config_data["save_converted_recipe"], docker, recipe_str ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} diff --git a/docker/server.py b/docker/server.py index 581e0151..6e625314 100644 --- a/docker/server.py +++ b/docker/server.py @@ -12,11 +12,11 @@ class CellpackServer: def __init__(self): self.packing_tasks = set() - async def run_packing(self, recipe, config, job_id): + async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - pack(recipe=recipe, config_path=config, docker=True) + pack(recipe=recipe, config_path=config, docker=True, recipe_str=body) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) @@ -37,8 +37,12 @@ async def health_check(self, request: web.Request) -> web.Response: return web.Response() async def pack_handler(self, request: web.Request) -> web.Response: - recipe = request.rel_url.query.get("recipe") - if recipe is None: + recipe = request.rel_url.query.get("recipe") or "" + if request.can_read_body: + body = await request.json() + else: + body = None + if not recipe and not body: raise web.HTTPBadRequest( "Pack requests must include recipe as a query param" ) @@ -46,7 +50,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: job_id = str(uuid.uuid4()) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(recipe, config, job_id)) + packing_task = asyncio.create_task(self.run_packing(recipe, config, job_id, body)) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion From f87915ac4c8ee4137333ee8ce1d721214446ad23 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 14:29:49 -0700 Subject: [PATCH 05/28] update documentation --- docker/Dockerfile.ecs | 3 ++- docs/DOCKER.md | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.ecs b/docker/Dockerfile.ecs index 7a303c02..2016c222 100644 --- a/docker/Dockerfile.ecs +++ b/docker/Dockerfile.ecs @@ -4,7 +4,8 @@ WORKDIR /cellpack COPY . /cellpack RUN python -m pip install --upgrade pip --root-user-action=ignore -RUN python -m pip install . -r requirements/linux/requirements.txt --root-user-action=ignore +RUN python -m pip install . +RUN python -m pip install aiohttp mdutils EXPOSE 80 diff --git a/docs/DOCKER.md b/docs/DOCKER.md index a1214a33..48e4b6dd 100644 --- a/docs/DOCKER.md +++ b/docs/DOCKER.md @@ -13,6 +13,6 @@ ## AWS ECS Docker Image 1. Build image, running `docker build -f docker/Dockerfile.ecs -t [CONTAINER-NAME] .` 2. Run packings in the container, running: `docker run -v ~/.aws:/root/.aws -p 80:80 [CONTAINER-NAME]` -3. Try hitting the test endpoint on the server, by navigating to `http://0.0.0.0:8443/hello` in your browser. -4. Try running a packing on the server, by hitting the `http://0.0.0.0:80/pack?recipe=firebase:recipes/one_sphere_v_1.0.0` in your browser. +3. Try hitting the test endpoint on the server, by navigating to `http://0.0.0.0:80/hello` in your browser. +4. Try running a packing on the server, by hitting the `http://0.0.0.0:80/start-packing?recipe=firebase:recipes/one_sphere_v_1.0.0` in your browser. 5. Verify that the packing result path was uploaded to the firebase results table, with the job id specified in the response from the request in step 4.The result simularium file can be found at the s3 path specified there. \ No newline at end of file From 1f2d2e345523914d135379c826bdc00e51fbe6c0 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 16:41:35 -0700 Subject: [PATCH 06/28] remove accidential dockerfile changes --- docker/Dockerfile.ecs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/Dockerfile.ecs b/docker/Dockerfile.ecs index 2016c222..5ed89b7b 100644 --- a/docker/Dockerfile.ecs +++ b/docker/Dockerfile.ecs @@ -4,8 +4,7 @@ WORKDIR /cellpack COPY . /cellpack RUN python -m pip install --upgrade pip --root-user-action=ignore -RUN python -m pip install . -RUN python -m pip install aiohttp mdutils +RUN python -m pip install . --root-user-action=ignore EXPOSE 80 From bd8ec42d33b005ec06316de1a386478eb4cd24f7 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Thu, 23 Oct 2025 13:42:23 -0700 Subject: [PATCH 07/28] rename param json_recipe --- cellpack/autopack/__init__.py | 10 +++++----- cellpack/autopack/loaders/recipe_loader.py | 6 +++--- cellpack/bin/data-manifest.json | 10 ++++++++++ cellpack/bin/pack.py | 4 ++-- docker/server.py | 2 +- 5 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 cellpack/bin/data-manifest.json diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 4bfd996e..65d32078 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -411,14 +411,14 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file( - filename, destination="", cache="geometries", force=None, use_docker=False, recipe_obj=None + filename, destination="", cache="geometries", force=None, use_docker=False, json_recipe=None ): - if recipe_obj is not None: + if json_recipe is not None: composition = DBRecipeLoader.remove_empty( - recipe_obj.get("composition", {}) + json_recipe.get("composition", {}) ) - recipe_obj["composition"] = composition - return recipe_obj, None, False + json_recipe["composition"] = composition + return json_recipe, None, False if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 4b2930c8..23044443 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -30,7 +30,7 @@ class RecipeLoader(object): # TODO: add all default values here default_values = default_recipe_values.copy() - def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, recipe_obj=None): + def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, json_recipe=None): _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION self.file_path = input_file_path @@ -38,7 +38,7 @@ def __init__(self, input_file_path, save_converted_recipe=False, use_docker=Fals self.ingredient_list = [] self.compartment_list = [] self.save_converted_recipe = save_converted_recipe - self.recipe_obj = recipe_obj + self.json_recipe = json_recipe # set CURRENT_RECIPE_PATH appropriately for remote(firebase) vs local recipes if autopack.is_remote_path(self.file_path): @@ -170,7 +170,7 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker, recipe_obj=self.recipe_obj + self.file_path, cache="recipes", use_docker=use_docker, json_recipe=self.json_recipe ) if database_name == "firebase": if is_unnested_firebase: diff --git a/cellpack/bin/data-manifest.json b/cellpack/bin/data-manifest.json new file mode 100644 index 00000000..0a5e1f2d --- /dev/null +++ b/cellpack/bin/data-manifest.json @@ -0,0 +1,10 @@ +{ + "assay-dev": { + "data_source": "https://s3-us-west-2.amazonaws.com/file-download-service.allencell.org/assay-dev_2018-10-03.csv?versionId=XVdmE.6g1kk77c7jYA2Ge54eehTjY_AP", + "static_files": [] + }, + "test": { + "data_source": "https://cellpack-demo.s3.us-west-2.amazonaws.com/alli-test/test-manifest.csv", + "static_files": [] + } +} \ No newline at end of file diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index fdbfede0..dadf0f31 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,7 +25,7 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, recipe_str=None + recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, json_recipe=None ): """ Initializes an autopack packing from the command line @@ -40,7 +40,7 @@ def pack( packing_config_data = ConfigLoader(config_path, docker).config recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker, recipe_str + recipe, packing_config_data["save_converted_recipe"], docker, json_recipe ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} diff --git a/docker/server.py b/docker/server.py index 6e625314..ce6da7ee 100644 --- a/docker/server.py +++ b/docker/server.py @@ -16,7 +16,7 @@ async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - pack(recipe=recipe, config_path=config, docker=True, recipe_str=body) + pack(recipe=recipe, config_path=config, docker=True, json_recipe=body) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) From 358158eaa5aae8641d8c4c14574a03dbe1a3cb65 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 9 Jan 2026 11:42:35 -0800 Subject: [PATCH 08/28] remove file that shouldn't be in this PR --- cellpack/bin/upload_to_client.py | 62 -------------------------------- 1 file changed, 62 deletions(-) delete mode 100644 cellpack/bin/upload_to_client.py diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py deleted file mode 100644 index f778aa53..00000000 --- a/cellpack/bin/upload_to_client.py +++ /dev/null @@ -1,62 +0,0 @@ -import json -import fire - -from cellpack.autopack.FirebaseHandler import FirebaseHandler -from cellpack.autopack.DBRecipeHandler import DBUploader -from cellpack.autopack.loaders.config_loader import ConfigLoader -from cellpack.autopack.loaders.recipe_loader import RecipeLoader -from cellpack.bin.upload import get_recipe_metadata - -def upload_to_client( - recipe: str, - config: str, - fields: str, - name: str -): - """ - Uploads recipe, config, and editable fields, read from specified - JSON files, to the database for client access - - :param recipe: string argument - path to local recipe file to upload to firebase - :param config: string argument - path to local config file to upload to firebase - :param fields: string argument - path to local editable fields file to upload to firebase - :param name: string argument - display name for recipe in client selection menu - """ - db_handler = FirebaseHandler() - recipe_id = "" - config_id = "" - editable_fields_ids = [] - if FirebaseHandler._initialized: - db_handler = DBUploader(db_handler) - if recipe: - recipe_loader = RecipeLoader(recipe) - recipe_full_data = recipe_loader._read(resolve_inheritance=False) - recipe_meta_data = get_recipe_metadata(recipe_loader) - recipe_id = db_handler.upload_recipe(recipe_meta_data, recipe_full_data) - if config: - config_data = ConfigLoader(config).config - config_id = db_handler.upload_config(config_data, config) - if fields: - editable_fields_data = json.load(open(fields, "r")) - for field in editable_fields_data.get("editable_fields", []): - id, _ = db_handler.upload_data("editable_fields", field) - editable_fields_ids.append(id) - recipe_metadata = { - "name": name, - "recipe": recipe_id, - "config": config_id, - "editable_fields": editable_fields_ids, - } - - # Upload the combined recipe metadata to example_packings collection for client - db_handler.upload_data("example_packings", recipe_metadata) - -def main(): - fire.Fire(upload_to_client) - -if __name__ == "__main__": - main() From f0beaa1fc28cfb0242bfc428e8d7879716f3d7bd Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 9 Jan 2026 11:43:04 -0800 Subject: [PATCH 09/28] remove accidential file --- cellpack/bin/data-manifest.json | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 cellpack/bin/data-manifest.json diff --git a/cellpack/bin/data-manifest.json b/cellpack/bin/data-manifest.json deleted file mode 100644 index 0a5e1f2d..00000000 --- a/cellpack/bin/data-manifest.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "assay-dev": { - "data_source": "https://s3-us-west-2.amazonaws.com/file-download-service.allencell.org/assay-dev_2018-10-03.csv?versionId=XVdmE.6g1kk77c7jYA2Ge54eehTjY_AP", - "static_files": [] - }, - "test": { - "data_source": "https://cellpack-demo.s3.us-west-2.amazonaws.com/alli-test/test-manifest.csv", - "static_files": [] - } -} \ No newline at end of file From a54ffa1d8393f89d2afb2926864bd1381e7870f2 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 9 Jan 2026 11:52:19 -0800 Subject: [PATCH 10/28] lint fixes --- cellpack/autopack/__init__.py | 11 +++++++---- cellpack/autopack/loaders/recipe_loader.py | 13 +++++++++++-- cellpack/bin/pack.py | 7 ++++++- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 65d32078..ce06494d 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -411,12 +411,15 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file( - filename, destination="", cache="geometries", force=None, use_docker=False, json_recipe=None + filename, + destination="", + cache="geometries", + force=None, + use_docker=False, + json_recipe=None, ): if json_recipe is not None: - composition = DBRecipeLoader.remove_empty( - json_recipe.get("composition", {}) - ) + composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) json_recipe["composition"] = composition return json_recipe, None, False if is_remote_path(filename): diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 23044443..cf87a10c 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -30,7 +30,13 @@ class RecipeLoader(object): # TODO: add all default values here default_values = default_recipe_values.copy() - def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, json_recipe=None): + def __init__( + self, + input_file_path, + save_converted_recipe=False, + use_docker=False, + json_recipe=None, + ): _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION self.file_path = input_file_path @@ -170,7 +176,10 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker, json_recipe=self.json_recipe + self.file_path, + cache="recipes", + use_docker=use_docker, + json_recipe=self.json_recipe, ) if database_name == "firebase": if is_unnested_firebase: diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index dadf0f31..99186c9d 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,7 +25,12 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, json_recipe=None + recipe, + config_path=None, + analysis_config_path=None, + docker=False, + validate=True, + json_recipe=None, ): """ Initializes an autopack packing from the command line From 3d01db317181e78eb0b9c928b6959657d4b9b778 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 14:52:55 -0800 Subject: [PATCH 11/28] refactor to try to improve clarity of json recipe vs file path --- cellpack/autopack/__init__.py | 9 ++--- cellpack/autopack/loaders/recipe_loader.py | 17 +++++--- cellpack/bin/pack.py | 45 ++++++++++++++++++++-- docker/server.py | 7 +++- 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index ce06494d..4520d388 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -409,6 +409,10 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) f.close() return sphere_data +def load_json_recipe(json_recipe): + composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) + json_recipe["composition"] = composition + return json_recipe def load_file( filename, @@ -416,12 +420,7 @@ def load_file( cache="geometries", force=None, use_docker=False, - json_recipe=None, ): - if json_recipe is not None: - composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) - json_recipe["composition"] = composition - return json_recipe, None, False if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index cf87a10c..df9e26ac 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -175,12 +175,17 @@ def _migrate_version(self, old_recipe): ) def _read(self, resolve_inheritance=True, use_docker=False): - new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, - cache="recipes", - use_docker=use_docker, - json_recipe=self.json_recipe, - ) + database_name = None + is_unnested_firebase = False + if self.json_recipe is not None: + new_values = autopack.load_json_recipe(self.json_recipe) + else: + new_values, database_name, is_unnested_firebase = autopack.load_file( + self.file_path, + cache="recipes", + use_docker=use_docker, + ) + if database_name == "firebase": if is_unnested_firebase: objects = new_values.get("objects", {}) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 99186c9d..39003eda 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -30,7 +30,6 @@ def pack( analysis_config_path=None, docker=False, validate=True, - json_recipe=None, ): """ Initializes an autopack packing from the command line @@ -42,12 +41,52 @@ def pack( :return: void """ - packing_config_data = ConfigLoader(config_path, docker).config + config_loader = ConfigLoader(config_path, docker) recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker, json_recipe + recipe, config_loader.config["save_converted_recipe"], docker ) + return run_packing( + recipe_loader, + config_loader, + analysis_config_path, + docker, + validate + ) + +def pack_from_json( + json_recipe, + config_path=None, + analysis_config_path=None, + docker=False, + validate=True, +): + """ + Initializes an autopack packing from the command line + :param json: JSON object representing the recipe + :param config_path: string argument, path to packing config file + :param analysis_config_path: string argument, path to analysis config file + :param docker: boolean argument, are we using docker + :param validate: boolean argument, validate recipe before packing + + :return: void + """ + config_loader = ConfigLoader(config_path, docker) + + recipe_loader = RecipeLoader( + "", config_loader.config["save_converted_recipe"], docker, json_recipe + ) + return run_packing( + recipe_loader, + config_loader, + analysis_config_path, + docker, + validate + ) + +def run_packing(recipe_loader, config_loader, analysis_config_path, docker, validate): recipe_data = recipe_loader.recipe_data + packing_config_data = config_loader.config analysis_config_data = {} if analysis_config_path is not None: analysis_config_data = AnalysisConfigLoader(analysis_config_path).config diff --git a/docker/server.py b/docker/server.py index ce6da7ee..6bbf338c 100644 --- a/docker/server.py +++ b/docker/server.py @@ -4,7 +4,7 @@ import uuid from cellpack.autopack.DBRecipeHandler import DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS -from cellpack.bin.pack import pack +from cellpack.bin.pack import pack, pack_from_json SERVER_PORT = 80 @@ -16,7 +16,10 @@ async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - pack(recipe=recipe, config_path=config, docker=True, json_recipe=body) + if body is None: + pack(recipe=recipe, config_path=config, docker=True) + else: + pack_from_json(json_recipe=body, config_path=config, docker=True) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) From 529e15b40e93ba0c60c526acf16646ed2eb04d61 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 14:56:38 -0800 Subject: [PATCH 12/28] lint fixes --- cellpack/autopack/__init__.py | 2 ++ cellpack/bin/pack.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 4520d388..f04bcb43 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -409,11 +409,13 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) f.close() return sphere_data + def load_json_recipe(json_recipe): composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) json_recipe["composition"] = composition return json_recipe + def load_file( filename, destination="", diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 39003eda..80ef6784 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -54,6 +54,7 @@ def pack( validate ) + def pack_from_json( json_recipe, config_path=None, @@ -84,6 +85,7 @@ def pack_from_json( validate ) + def run_packing(recipe_loader, config_loader, analysis_config_path, docker, validate): recipe_data = recipe_loader.recipe_data packing_config_data = config_loader.config From 63514c90cd54610a6ec759423c062cc52fd2ab37 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 14:57:23 -0800 Subject: [PATCH 13/28] lint fix --- cellpack/bin/pack.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 80ef6784..b8446f33 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -47,11 +47,7 @@ def pack( recipe, config_loader.config["save_converted_recipe"], docker ) return run_packing( - recipe_loader, - config_loader, - analysis_config_path, - docker, - validate + recipe_loader, config_loader, analysis_config_path, docker, validate ) @@ -78,11 +74,7 @@ def pack_from_json( "", config_loader.config["save_converted_recipe"], docker, json_recipe ) return run_packing( - recipe_loader, - config_loader, - analysis_config_path, - docker, - validate + recipe_loader, config_loader, analysis_config_path, docker, validate ) From b2440cd3f905caf29704a68db5edb2571f8e871d Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 15:00:38 -0800 Subject: [PATCH 14/28] minimize changeset --- cellpack/autopack/__init__.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index f04bcb43..e6701b52 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -416,13 +416,7 @@ def load_json_recipe(json_recipe): return json_recipe -def load_file( - filename, - destination="", - cache="geometries", - force=None, - use_docker=False, -): +def load_file(filename, destination="", cache="geometries", force=None, use_docker=False): if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: From 470e3a18ae7e4860eac53f0ff6637a920ad2b16b Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 15:02:35 -0800 Subject: [PATCH 15/28] minimize changeset --- cellpack/autopack/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index e6701b52..dbc90c58 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -416,7 +416,9 @@ def load_json_recipe(json_recipe): return json_recipe -def load_file(filename, destination="", cache="geometries", force=None, use_docker=False): +def load_file( + filename, destination="", cache="geometries", force=None, use_docker=False +): if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: From 8a3489895a71298652056ab27dd73c391aaa0198 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 15:56:18 -0800 Subject: [PATCH 16/28] simplify changeset --- cellpack/autopack/__init__.py | 6 --- cellpack/autopack/loaders/recipe_loader.py | 15 +++++-- cellpack/bin/pack.py | 47 ++++------------------ docker/server.py | 8 ++-- 4 files changed, 23 insertions(+), 53 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index dbc90c58..2219525b 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -410,12 +410,6 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) return sphere_data -def load_json_recipe(json_recipe): - composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) - json_recipe["composition"] = composition - return json_recipe - - def load_file( filename, destination="", cache="geometries", force=None, use_docker=False ): diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index df9e26ac..4b89c816 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -56,6 +56,15 @@ def __init__( self.recipe_data = self._read(use_docker=use_docker) + @classmethod + def from_json(cls, json_recipe, save_converted_recipe=False, use_docker=False): + return cls( + input_file_path="", + save_converted_recipe=save_converted_recipe, + use_docker=use_docker, + json_recipe=json_recipe, + ) + @staticmethod def _resolve_object(key, objects): current_object = objects[key] @@ -177,9 +186,9 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): database_name = None is_unnested_firebase = False - if self.json_recipe is not None: - new_values = autopack.load_json_recipe(self.json_recipe) - else: + new_values = self.json_recipe + if new_values is None: + # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( self.file_path, cache="recipes", diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index b8446f33..6c603cbb 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -33,7 +33,7 @@ def pack( ): """ Initializes an autopack packing from the command line - :param recipe: string argument, path to recipe + :param recipe: string argument, path to recipe file, or a dictionary representing a recipe :param config_path: string argument, path to packing config file :param analysis_config_path: string argument, path to analysis config file :param docker: boolean argument, are we using docker @@ -41,45 +41,14 @@ def pack( :return: void """ - config_loader = ConfigLoader(config_path, docker) - - recipe_loader = RecipeLoader( - recipe, config_loader.config["save_converted_recipe"], docker - ) - return run_packing( - recipe_loader, config_loader, analysis_config_path, docker, validate - ) - - -def pack_from_json( - json_recipe, - config_path=None, - analysis_config_path=None, - docker=False, - validate=True, -): - """ - Initializes an autopack packing from the command line - :param json: JSON object representing the recipe - :param config_path: string argument, path to packing config file - :param analysis_config_path: string argument, path to analysis config file - :param docker: boolean argument, are we using docker - :param validate: boolean argument, validate recipe before packing - - :return: void - """ - config_loader = ConfigLoader(config_path, docker) - - recipe_loader = RecipeLoader( - "", config_loader.config["save_converted_recipe"], docker, json_recipe - ) - return run_packing( - recipe_loader, config_loader, analysis_config_path, docker, validate - ) - - -def run_packing(recipe_loader, config_loader, analysis_config_path, docker, validate): + if isinstance(recipe, dict): + # Load recipe from JSON dictionary + recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) + else: + # Load recipe from file path + recipe_loader = RecipeLoader(recipe, use_docker=docker) recipe_data = recipe_loader.recipe_data + config_loader = ConfigLoader(config_path, docker) packing_config_data = config_loader.config analysis_config_data = {} if analysis_config_path is not None: diff --git a/docker/server.py b/docker/server.py index 6bbf338c..257e66ec 100644 --- a/docker/server.py +++ b/docker/server.py @@ -4,7 +4,7 @@ import uuid from cellpack.autopack.DBRecipeHandler import DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS -from cellpack.bin.pack import pack, pack_from_json +from cellpack.bin.pack import pack SERVER_PORT = 80 @@ -16,10 +16,8 @@ async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - if body is None: - pack(recipe=recipe, config_path=config, docker=True) - else: - pack_from_json(json_recipe=body, config_path=config, docker=True) + # Pack JSON recipe in body if provided, otherwise use recipe path + pack(recipe=(body if body else recipe), config_path=config, docker=True) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) From 45d438ae8788b6d999ae546fe4556742f30cc05f Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 16:03:12 -0800 Subject: [PATCH 17/28] code cleanup --- cellpack/autopack/loaders/recipe_loader.py | 4 +--- cellpack/bin/pack.py | 10 +++------- docker/server.py | 4 ++-- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 4b89c816..e40b469e 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -190,9 +190,7 @@ def _read(self, resolve_inheritance=True, use_docker=False): if new_values is None: # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, - cache="recipes", - use_docker=use_docker, + self.file_path, cache="recipes", use_docker=use_docker, ) if database_name == "firebase": diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 6c603cbb..23a69a57 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,11 +25,7 @@ def pack( - recipe, - config_path=None, - analysis_config_path=None, - docker=False, - validate=True, + recipe, config_path=None, analysis_config_path=None, docker=False, validate=True ): """ Initializes an autopack packing from the command line @@ -41,6 +37,8 @@ def pack( :return: void """ + packing_config_data = ConfigLoader(config_path, docker).config + if isinstance(recipe, dict): # Load recipe from JSON dictionary recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) @@ -48,8 +46,6 @@ def pack( # Load recipe from file path recipe_loader = RecipeLoader(recipe, use_docker=docker) recipe_data = recipe_loader.recipe_data - config_loader = ConfigLoader(config_path, docker) - packing_config_data = config_loader.config analysis_config_data = {} if analysis_config_path is not None: analysis_config_data = AnalysisConfigLoader(analysis_config_path).config diff --git a/docker/server.py b/docker/server.py index 257e66ec..9b1ce105 100644 --- a/docker/server.py +++ b/docker/server.py @@ -12,7 +12,7 @@ class CellpackServer: def __init__(self): self.packing_tasks = set() - async def run_packing(self, recipe, config, job_id, body=None): + async def run_packing(self, job_id, recipe=None, config=None, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: @@ -51,7 +51,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: job_id = str(uuid.uuid4()) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(recipe, config, job_id, body)) + packing_task = asyncio.create_task(self.run_packing(job_id, recipe, config, body)) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion From c8fe120ef4b9ecd392a8f8030307d07fee176f1f Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 16:05:28 -0800 Subject: [PATCH 18/28] minimize changeset --- cellpack/bin/pack.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 23a69a57..83b22264 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -44,7 +44,9 @@ def pack( recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) else: # Load recipe from file path - recipe_loader = RecipeLoader(recipe, use_docker=docker) + recipe_loader = RecipeLoader( + recipe, packing_config_data["save_converted_recipe"], docker + ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} if analysis_config_path is not None: From ecc645d4ca013c56e0ca6e8f82fbb6c4540f5c37 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 16:07:30 -0800 Subject: [PATCH 19/28] remove trailing comma --- cellpack/autopack/loaders/recipe_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index e40b469e..bbdb662a 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -190,7 +190,7 @@ def _read(self, resolve_inheritance=True, use_docker=False): if new_values is None: # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker, + self.file_path, cache="recipes", use_docker=use_docker ) if database_name == "firebase": From 17ba17c284349e249c1c7892d6b6405880cbd8fe Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Thu, 29 Jan 2026 13:11:58 -0800 Subject: [PATCH 20/28] Feature/firebase lookup (#445) * remove os fetch for job_id * use dedup_hash instead of job id * proposal: get hash from recipe loader * renaming and add TODOs * format * rename param to hash * remove unused validate param and doc strings in pack * simplify get_ dedup_hash * refactor job_status update * cleanup * fix upload_job_status to handle awshandler * pass dedup_pash to env for fetching across files * add tests * format1 * format test --- cellpack/autopack/DBRecipeHandler.py | 87 +++++++++---------- .../upy/simularium/simularium_helper.py | 12 ++- cellpack/autopack/writers/__init__.py | 5 +- cellpack/bin/pack.py | 44 +++++----- cellpack/tests/test_db_uploader.py | 53 +++++++++++ docker/server.py | 55 +++++++----- 6 files changed, 160 insertions(+), 96 deletions(-) diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 8b4e8578..00fc8bb7 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -529,7 +529,7 @@ def upload_config(self, config_data, source_path): self.db.update_doc("configs", id, config_data) return id - def upload_result_metadata(self, file_name, url, job_id=None): + def upload_result_metadata(self, file_name, url, dedup_hash=None): """ Upload the metadata of the result file to the database. """ @@ -543,28 +543,40 @@ def upload_result_metadata(self, file_name, url, job_id=None): "user": username, "timestamp": timestamp, "url": url, - "batch_job_id": job_id, + "dedup_hash": dedup_hash, }, ) - if job_id: - self.upload_job_status(job_id, "DONE", result_path=url) + if dedup_hash: + self.upload_job_status(dedup_hash, "DONE", result_path=url) - def upload_job_status(self, job_id, status, result_path=None, error_message=None): + def upload_job_status( + self, + dedup_hash, + status, + result_path=None, + error_message=None, + outputs_directory=None, + ): """ - Update status for a given job ID + Update status for a given dedup_hash """ if self.db: - timestamp = self.db.create_timestamp() - self.db.update_or_create( - "job_status", - job_id, - { - "timestamp": timestamp, - "status": str(status), - "result_path": result_path, - "error_message": error_message, - }, - ) + db_handler = self.db + # If db is AWSHandler, switch to firebase handler for job status updates + if hasattr(self.db, "s3_client"): + handler = DATABASE_IDS.handlers().get(DATABASE_IDS.FIREBASE) + db_handler = handler(default_db="staging") + timestamp = db_handler.create_timestamp() + data = { + "timestamp": timestamp, + "status": str(status), + "error_message": error_message, + } + if result_path: + data["result_path"] = result_path + if outputs_directory: + data["outputs_directory"] = outputs_directory + db_handler.update_or_create("job_status", dedup_hash, data) def save_recipe_and_config_to_output(self, output_folder, config_data, recipe_data): output_path = Path(output_folder) @@ -583,7 +595,7 @@ def upload_packing_results_workflow( self, source_folder, recipe_name, - job_id, + dedup_hash, config_data, recipe_data, ): @@ -591,7 +603,7 @@ def upload_packing_results_workflow( Complete packing results upload workflow including folder preparation and s3 upload """ try: - if job_id: + if dedup_hash: source_path = Path(source_folder) if not source_path.exists(): @@ -601,7 +613,7 @@ def upload_packing_results_workflow( # prepare unique S3 upload folder parent_folder = source_path.parent - unique_folder_name = f"{source_path.name}_run_{job_id}" + unique_folder_name = f"{source_path.name}_run_{dedup_hash}" s3_upload_folder = parent_folder / unique_folder_name logging.debug(f"outputs will be copied to: {s3_upload_folder}") @@ -618,7 +630,7 @@ def upload_packing_results_workflow( upload_result = self.upload_outputs_to_s3( output_folder=s3_upload_folder, recipe_name=recipe_name, - job_id=job_id, + dedup_hash=dedup_hash, ) # clean up temporary folder after upload @@ -628,9 +640,11 @@ def upload_packing_results_workflow( f"Cleaned up temporary upload folder: {s3_upload_folder}" ) - # update outputs directory in firebase - self.update_outputs_directory( - job_id, upload_result.get("outputs_directory") + # update outputs directory in job status + self.upload_job_status( + dedup_hash, + "DONE", + outputs_directory=upload_result.get("outputs_directory"), ) return upload_result @@ -639,7 +653,7 @@ def upload_packing_results_workflow( logging.error(e) return {"success": False, "error": e} - def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): + def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash): """ Upload packing outputs to S3 bucket """ @@ -647,7 +661,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): bucket_name = self.db.bucket_name region_name = self.db.region_name sub_folder_name = self.db.sub_folder_name - s3_prefix = f"{sub_folder_name}/{recipe_name}/{job_id}" + s3_prefix = f"{sub_folder_name}/{recipe_name}/{dedup_hash}" try: upload_result = self.db.upload_directory( @@ -671,7 +685,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): return { "success": True, - "run_id": job_id, + "dedup_hash": dedup_hash, "s3_bucket": bucket_name, "s3_prefix": s3_prefix, "public_url_base": f"{base_url}/{s3_prefix}/", @@ -685,25 +699,6 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): logging.error(e) return {"success": False, "error": e} - def update_outputs_directory(self, job_id, outputs_directory): - if not self.db or self.db.s3_client: - # switch to firebase handler to update job status - handler = DATABASE_IDS.handlers().get("firebase") - initialized_db = handler(default_db="staging") - if job_id: - timestamp = initialized_db.create_timestamp() - initialized_db.update_or_create( - "job_status", - job_id, - { - "timestamp": timestamp, - "outputs_directory": outputs_directory, - }, - ) - logging.debug( - f"Updated outputs s3 location {outputs_directory} for job ID: {job_id}" - ) - class DBRecipeLoader(object): """ diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 86af0874..4f934e0e 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -1385,16 +1385,14 @@ def raycast(self, **kw): def raycast_test(self, obj, start, end, length, **kw): return - def post_and_open_file(self, file_name, open_results_in_browser): + def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") - url = None - job_id = os.environ.get("AWS_BATCH_JOB_ID", None) file_name, url = simulariumHelper.store_result_file( - simularium_file, storage="aws", batch_job_id=job_id + simularium_file, storage="aws", batch_job_id=dedup_hash ) if file_name and url: simulariumHelper.store_metadata( - file_name, url, db="firebase", job_id=job_id + file_name, url, db="firebase", dedup_hash=dedup_hash ) if open_results_in_browser: simulariumHelper.open_in_simularium(url) @@ -1428,7 +1426,7 @@ def store_result_file( return file_name, url @staticmethod - def store_metadata(file_name, url, db=None, job_id=None): + def store_metadata(file_name, url, db=None, dedup_hash=None): if db == "firebase": handler = DATABASE_IDS.handlers().get(db) initialized_db = handler( @@ -1436,7 +1434,7 @@ def store_metadata(file_name, url, db=None, job_id=None): ) # default to staging for metadata uploads if initialized_db._initialized: db_uploader = DBUploader(initialized_db) - db_uploader.upload_result_metadata(file_name, url, job_id) + db_uploader.upload_result_metadata(file_name, url, dedup_hash) else: db_maintainer = DBMaintenance(initialized_db) logging.warning( diff --git a/cellpack/autopack/writers/__init__.py b/cellpack/autopack/writers/__init__.py index 6ca931af..0b09e03a 100644 --- a/cellpack/autopack/writers/__init__.py +++ b/cellpack/autopack/writers/__init__.py @@ -197,8 +197,11 @@ def save_as_simularium(self, env, seed_to_results_map): number_of_packings = env.config_data.get("number_of_packings", 1) open_results_in_browser = env.config_data.get("open_results_in_browser", False) upload_results = env.config_data.get("upload_results", False) + dedup_hash = getattr(env, "dedup_hash", None) if (number_of_packings == 1 or is_aggregate) and upload_results: - autopack.helper.post_and_open_file(file_name, open_results_in_browser) + autopack.helper.post_and_open_file( + file_name, open_results_in_browser, dedup_hash + ) def save_Mixed_asJson( self, diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 83b22264..27c4d018 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -1,6 +1,5 @@ import logging import logging.config -import os import time from pathlib import Path @@ -25,7 +24,11 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True + recipe, + config_path=None, + analysis_config_path=None, + docker=False, + hash=None, ): """ Initializes an autopack packing from the command line @@ -33,7 +36,7 @@ def pack( :param config_path: string argument, path to packing config file :param analysis_config_path: string argument, path to analysis config file :param docker: boolean argument, are we using docker - :param validate: boolean argument, validate recipe before packing + :param hash: string argument, dedup hash identifier for tracking/caching results :return: void """ @@ -57,6 +60,7 @@ def pack( autopack.helper = helper env = Environment(config=packing_config_data, recipe=recipe_data) env.helper = helper + env.dedup_hash = hash log.info("Packing recipe: %s", recipe_data["name"]) log.info("Outputs will be saved to %s", env.out_folder) @@ -83,24 +87,22 @@ def pack( env.buildGrid(rebuild=True) env.pack_grid(verbose=0, usePP=False) - if docker: - job_id = os.environ.get("AWS_BATCH_JOB_ID", None) - if job_id: - handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) - # temporarily using demo bucket before permissions are granted - initialized_handler = handler( - bucket_name="cellpack-demo", - sub_folder_name="runs", - region_name="us-west-2", - ) - uploader = DBUploader(db_handler=initialized_handler) - uploader.upload_packing_results_workflow( - source_folder=env.out_folder, - recipe_name=recipe_data["name"], - job_id=job_id, - config_data=packing_config_data, - recipe_data=recipe_loader.serializable_recipe_data, - ) + if docker and hash: + handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) + # temporarily using demo bucket before permissions are granted + initialized_handler = handler( + bucket_name="cellpack-demo", + sub_folder_name="runs", + region_name="us-west-2", + ) + uploader = DBUploader(db_handler=initialized_handler) + uploader.upload_packing_results_workflow( + source_folder=env.out_folder, + recipe_name=recipe_data["name"], + dedup_hash=hash, + config_data=packing_config_data, + recipe_data=recipe_loader.serializable_recipe_data, + ) def main(): diff --git a/cellpack/tests/test_db_uploader.py b/cellpack/tests/test_db_uploader.py index 0c91cbd5..414f6b9c 100644 --- a/cellpack/tests/test_db_uploader.py +++ b/cellpack/tests/test_db_uploader.py @@ -175,3 +175,56 @@ def test_upload_recipe(): "A": "firebase:composition/test_id", } assert recipe_doc.objects_to_path_map == {"sphere_25": "firebase:objects/test_id"} + + +def test_upload_job_status_with_firebase_handler(): + mock_firebase_db = MagicMock() + mock_firebase_db.create_timestamp.return_value = "test_timestamp" + # firebaseHandler does not have s3_client attribute + del mock_firebase_db.s3_client + + uploader = DBUploader(mock_firebase_db) + uploader.upload_job_status("test_hash", "RUNNING") + + mock_firebase_db.create_timestamp.assert_called_once() + mock_firebase_db.update_or_create.assert_called_once_with( + "job_status", + "test_hash", + { + "timestamp": "test_timestamp", + "status": "RUNNING", + "error_message": None, + }, + ) + + +def test_upload_job_status_with_aws_handler(): + mock_aws_db = MagicMock() + mock_aws_db.s3_client = MagicMock() # AWSHandler has s3_client + + mock_firebase_handler = MagicMock() + mock_firebase_handler.create_timestamp.return_value = "firebase_timestamp" + + with patch( + "cellpack.autopack.DBRecipeHandler.DATABASE_IDS.handlers" + ) as mock_handlers: + mock_handlers.return_value.get.return_value = ( + lambda default_db: mock_firebase_handler + ) + + uploader = DBUploader(mock_aws_db) + uploader.upload_job_status("test_hash", "DONE", result_path="test_path") + + mock_firebase_handler.create_timestamp.assert_called_once() + mock_firebase_handler.update_or_create.assert_called_once_with( + "job_status", + "test_hash", + { + "timestamp": "firebase_timestamp", + "status": "DONE", + "error_message": None, + "result_path": "test_path", + }, + ) + # AWS handler should not be called for timestamp + mock_aws_db.create_timestamp.assert_not_called() diff --git a/docker/server.py b/docker/server.py index 9b1ce105..74bb20f3 100644 --- a/docker/server.py +++ b/docker/server.py @@ -1,8 +1,6 @@ import asyncio from aiohttp import web -import os -import uuid -from cellpack.autopack.DBRecipeHandler import DBUploader +from cellpack.autopack.DBRecipeHandler import DataDoc, DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS from cellpack.bin.pack import pack @@ -12,29 +10,40 @@ class CellpackServer: def __init__(self): self.packing_tasks = set() - async def run_packing(self, job_id, recipe=None, config=None, body=None): - os.environ["AWS_BATCH_JOB_ID"] = job_id - self.update_job_status(job_id, "RUNNING") + def _get_firebase_handler(self, database_name="firebase"): + handler = DATABASE_IDS.handlers().get(database_name) + initialized_db = handler(default_db="staging") + if initialized_db._initialized: + return initialized_db + return None + + def job_exists(self, dedup_hash): + db = self._get_firebase_handler() + if not db: + return False + + job_status, _ = db.get_doc_by_id("job_status", dedup_hash) + return job_status is not None + + async def run_packing(self, dedup_hash, recipe=None, config=None, body=None): + self.update_job_status(dedup_hash, "RUNNING") try: # Pack JSON recipe in body if provided, otherwise use recipe path - pack(recipe=(body if body else recipe), config_path=config, docker=True) + pack(recipe=(body if body else recipe), config_path=config, docker=True, hash=dedup_hash) except Exception as e: - self.update_job_status(job_id, "FAILED", error_message=str(e)) + self.update_job_status(dedup_hash, "FAILED", error_message=str(e)) - def update_job_status(self, job_id, status, result_path=None, error_message=None): - handler = DATABASE_IDS.handlers().get("firebase") - initialized_db = handler( - default_db="staging" - ) - if initialized_db._initialized: - db_uploader = DBUploader(initialized_db) - db_uploader.upload_job_status(job_id, status, result_path, error_message) + def update_job_status(self, dedup_hash, status, result_path=None, error_message=None): + db = self._get_firebase_handler() + if db: + db_uploader = DBUploader(db) + db_uploader.upload_job_status(dedup_hash, status, result_path, error_message) async def hello_world(self, request: web.Request) -> web.Response: return web.Response(text="Hello from the cellPACK server") async def health_check(self, request: web.Request) -> web.Response: - # healthcheck endpoint needed for AWS load balancer + # health check endpoint needed for AWS load balancer return web.Response() async def pack_handler(self, request: web.Request) -> web.Response: @@ -48,10 +57,14 @@ async def pack_handler(self, request: web.Request) -> web.Response: "Pack requests must include recipe as a query param" ) config = request.rel_url.query.get("config") - job_id = str(uuid.uuid4()) + + dedup_hash = DataDoc.generate_hash(body) + + if self.job_exists(dedup_hash): + return web.json_response({"jobId": dedup_hash}) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(job_id, recipe, config, body)) + packing_task = asyncio.create_task(self.run_packing(dedup_hash, recipe, config, body)) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion @@ -60,7 +73,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: # return job id immediately, rather than wait for task to complete, # to avoid timeout issues with API gateway - return web.json_response({"jobId": job_id}) + return web.json_response({"jobId": dedup_hash}) async def init_app() -> web.Application: @@ -75,4 +88,4 @@ async def init_app() -> web.Application: ) return app -web.run_app(init_app(), host="0.0.0.0", port=SERVER_PORT) \ No newline at end of file +web.run_app(init_app(), host="0.0.0.0", port=SERVER_PORT) From 79e77e83c3749d3d8f51640a3497dac2e39e634b Mon Sep 17 00:00:00 2001 From: Alli <111383930+ascibisz@users.noreply.github.com> Date: Wed, 4 Feb 2026 08:47:47 -0800 Subject: [PATCH 21/28] Only upload simularium file once (#446) * proposal: get hash from recipe loader * simplify get_ dedup_hash * only post simularium results file once for server job runs * update code for rebase * code cleanup --------- Co-authored-by: Ruge Li --- cellpack/autopack/DBRecipeHandler.py | 12 +++--- .../upy/simularium/simularium_helper.py | 42 ++++++++----------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 00fc8bb7..3eb691d5 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -529,7 +529,7 @@ def upload_config(self, config_data, source_path): self.db.update_doc("configs", id, config_data) return id - def upload_result_metadata(self, file_name, url, dedup_hash=None): + def upload_result_metadata(self, file_name, url): """ Upload the metadata of the result file to the database. """ @@ -543,11 +543,8 @@ def upload_result_metadata(self, file_name, url, dedup_hash=None): "user": username, "timestamp": timestamp, "url": url, - "dedup_hash": dedup_hash, }, ) - if dedup_hash: - self.upload_job_status(dedup_hash, "DONE", result_path=url) def upload_job_status( self, @@ -644,6 +641,7 @@ def upload_packing_results_workflow( self.upload_job_status( dedup_hash, "DONE", + result_path=upload_result.get("simularium_url"), outputs_directory=upload_result.get("outputs_directory"), ) @@ -675,8 +673,11 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash): f"{base_url}/{file_info['s3_key']}" for file_info in upload_result["uploaded_files"] ] + simularium_url = None + for url in public_urls: + if url.endswith(".simularium"): + simularium_url = url outputs_directory = f"https://us-west-2.console.aws.amazon.com/s3/buckets/{bucket_name}/{s3_prefix}/" - logging.info( f"Successfully uploaded {upload_result['total_files']} files to {outputs_directory}" ) @@ -694,6 +695,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash): "total_size": upload_result["total_size"], "urls": public_urls, "outputs_directory": outputs_directory, + "simularium_url": simularium_url, } except Exception as e: logging.error(e) diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 4f934e0e..08179d85 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -1387,36 +1387,28 @@ def raycast_test(self, obj, start, end, length, **kw): def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") - file_name, url = simulariumHelper.store_result_file( - simularium_file, storage="aws", batch_job_id=dedup_hash - ) - if file_name and url: - simulariumHelper.store_metadata( - file_name, url, db="firebase", dedup_hash=dedup_hash + if dedup_hash is None: + file_name, url = simulariumHelper.store_result_file( + simularium_file, storage="aws" ) - if open_results_in_browser: - simulariumHelper.open_in_simularium(url) + if file_name and url: + simulariumHelper.store_metadata( + file_name, url, db="firebase" + ) + if open_results_in_browser: + simulariumHelper.open_in_simularium(url) @staticmethod def store_result_file( - file_path, storage=None, batch_job_id=None, sub_folder="simularium" + file_path, storage=None, sub_folder="simularium" ): if storage == "aws": handler = DATABASE_IDS.handlers().get(storage) - # if batch_job_id is not None, then we are in a batch job and should use the temp bucket - # TODO: use cellpack-results bucket for batch jobs once we have the correct permissions - if batch_job_id: - initialized_handler = handler( - bucket_name="cellpack-demo", - sub_folder_name=sub_folder, - region_name="us-west-2", - ) - else: - initialized_handler = handler( - bucket_name="cellpack-results", - sub_folder_name=sub_folder, - region_name="us-west-2", - ) + initialized_handler = handler( + bucket_name="cellpack-results", + sub_folder_name=sub_folder, + region_name="us-west-2", + ) file_name, url = initialized_handler.save_file_and_get_url(file_path) if not file_name or not url: db_maintainer = DBMaintenance(initialized_handler) @@ -1426,7 +1418,7 @@ def store_result_file( return file_name, url @staticmethod - def store_metadata(file_name, url, db=None, dedup_hash=None): + def store_metadata(file_name, url, db=None): if db == "firebase": handler = DATABASE_IDS.handlers().get(db) initialized_db = handler( @@ -1434,7 +1426,7 @@ def store_metadata(file_name, url, db=None, dedup_hash=None): ) # default to staging for metadata uploads if initialized_db._initialized: db_uploader = DBUploader(initialized_db) - db_uploader.upload_result_metadata(file_name, url, dedup_hash) + db_uploader.upload_result_metadata(file_name, url) else: db_maintainer = DBMaintenance(initialized_db) logging.warning( From 653285e946842613d2976c3a4956536a1b04eb67 Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Mon, 9 Feb 2026 10:43:42 -0800 Subject: [PATCH 22/28] Maint/firebase collection cleanup (#448) * remove local metadata writes for auto-pop feature * remove cleanup firebase workflow * remove cleanup firebase code * 1. make doc url a constant 2.remove unused param --- .github/workflows/cleanup-firebase.yml | 22 ------ cellpack/autopack/DBRecipeHandler.py | 70 +------------------ .../interface_objects/default_values.py | 1 - .../upy/simularium/simularium_helper.py | 45 +++--------- cellpack/bin/cleanup_tasks.py | 20 ------ cellpack/bin/upload.py | 7 +- 6 files changed, 14 insertions(+), 151 deletions(-) delete mode 100644 .github/workflows/cleanup-firebase.yml delete mode 100644 cellpack/bin/cleanup_tasks.py diff --git a/.github/workflows/cleanup-firebase.yml b/.github/workflows/cleanup-firebase.yml deleted file mode 100644 index ec939d5b..00000000 --- a/.github/workflows/cleanup-firebase.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Cleanup Firebase Metadata - -on: - schedule: - - cron: "24 18 * * 1" # Runs at 18:24 UTC every Monday - -jobs: - cleanup: - runs-on: ${{ matrix.os }} - strategy: - matrix: - python-version: [3.11] - os: [ubuntu-latest, windows-latest, macOS-latest] - steps: - - uses: actions/checkout@v4.2.2 - - uses: ./.github/actions/dependencies - - name: Cleanup Firebase Metadata - env: - FIREBASE_TOKEN: ${{ secrets.FIREBASE_TOKEN }} - FIREBASE_EMAIL: ${{ secrets.FIREBASE_EMAIL }} - run: | - uv run python cellpack/bin/cleanup_tasks.py diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 3eb691d5..8e3aec7f 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -1,7 +1,6 @@ import copy import logging import shutil -from datetime import datetime, timezone from enum import Enum from pathlib import Path @@ -10,7 +9,6 @@ import hashlib import json -import requests from cellpack.autopack.utils import deep_merge @@ -321,36 +319,6 @@ def __init__(self, settings): self.settings = settings -class ResultDoc: - def __init__(self, db): - self.db = db - - def handle_expired_results(self): - """ - Check if the results in the database are expired and delete them if the linked object expired. - """ - current_utc = datetime.now(timezone.utc) - results = self.db.get_all_docs("results") - if results: - for result in results: - result_data = self.db.doc_to_dict(result) - result_age = current_utc - result_data["timestamp"] - if result_age.days > 180 and not self.validate_existence( - result_data["url"] - ): - self.db.delete_doc("results", self.db.doc_id(result)) - logging.info("Results cleanup complete.") - else: - logging.info("No results found in the database.") - - def validate_existence(self, url): - """ - Validate the existence of an S3 object by checking if the URL is accessible. - Returns True if the URL is accessible. - """ - return requests.head(url).status_code == requests.codes.ok - - class DBUploader(object): """ Handles the uploading of data to the database. @@ -529,23 +497,6 @@ def upload_config(self, config_data, source_path): self.db.update_doc("configs", id, config_data) return id - def upload_result_metadata(self, file_name, url): - """ - Upload the metadata of the result file to the database. - """ - if self.db: - username = self.db.get_username() - timestamp = self.db.create_timestamp() - self.db.update_or_create( - "results", - file_name, - { - "user": username, - "timestamp": timestamp, - "url": url, - }, - ) - def upload_job_status( self, dedup_hash, @@ -887,23 +838,4 @@ def compile_db_recipe_data(db_recipe_data, obj_dict, grad_dict, comp_dict): return recipe_data -class DBMaintenance(object): - """ - Handles the maintenance of the database. - """ - - def __init__(self, db_handler): - self.db = db_handler - self.result_doc = ResultDoc(self.db) - - def cleanup_results(self): - """ - Check if the results in the database are expired and delete them if the linked object expired. - """ - self.result_doc.handle_expired_results() - - def readme_url(self): - """ - Return the URL to the README file for the database setup section. - """ - return "https://github.com/mesoscope/cellpack?tab=readme-ov-file#introduction-to-remote-databases" +DB_SETUP_README_URL = "https://github.com/mesoscope/cellpack?tab=readme-ov-file#introduction-to-remote-databases" diff --git a/cellpack/autopack/interface_objects/default_values.py b/cellpack/autopack/interface_objects/default_values.py index bbdbc452..969eaa6a 100644 --- a/cellpack/autopack/interface_objects/default_values.py +++ b/cellpack/autopack/interface_objects/default_values.py @@ -9,7 +9,6 @@ "objects", "gradients", "recipes", - "results", "configs", "recipes_edited", ] diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 08179d85..87c616e1 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -22,7 +22,7 @@ from simulariumio.cellpack import HAND_TYPE, CellpackConverter from simulariumio.constants import DISPLAY_TYPE, VIZ_TYPE -from cellpack.autopack.DBRecipeHandler import DBMaintenance, DBUploader +from cellpack.autopack.DBRecipeHandler import DB_SETUP_README_URL from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS from cellpack.autopack.upy import hostHelper from cellpack.autopack.upy.simularium.plots import PlotData @@ -1388,20 +1388,12 @@ def raycast_test(self, obj, start, end, length, **kw): def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") if dedup_hash is None: - file_name, url = simulariumHelper.store_result_file( - simularium_file, storage="aws" - ) - if file_name and url: - simulariumHelper.store_metadata( - file_name, url, db="firebase" - ) - if open_results_in_browser: - simulariumHelper.open_in_simularium(url) + url = simulariumHelper.store_result_file(simularium_file, storage="aws") + if url and open_results_in_browser: + simulariumHelper.open_in_simularium(url) @staticmethod - def store_result_file( - file_path, storage=None, sub_folder="simularium" - ): + def store_result_file(file_path, storage=None, sub_folder="simularium"): if storage == "aws": handler = DATABASE_IDS.handlers().get(storage) initialized_handler = handler( @@ -1409,30 +1401,13 @@ def store_result_file( sub_folder_name=sub_folder, region_name="us-west-2", ) - file_name, url = initialized_handler.save_file_and_get_url(file_path) - if not file_name or not url: - db_maintainer = DBMaintenance(initialized_handler) - logging.warning( - f"Skipping browser opening, upload credentials not configured. For setup instructions see: {db_maintainer.readme_url()}" - ) - return file_name, url - - @staticmethod - def store_metadata(file_name, url, db=None): - if db == "firebase": - handler = DATABASE_IDS.handlers().get(db) - initialized_db = handler( - default_db="staging" - ) # default to staging for metadata uploads - if initialized_db._initialized: - db_uploader = DBUploader(initialized_db) - db_uploader.upload_result_metadata(file_name, url) - else: - db_maintainer = DBMaintenance(initialized_db) + _, url = initialized_handler.save_file_and_get_url(file_path) + if not url: logging.warning( - f"Firebase credentials not found. For setup instructions see: {db_maintainer.readme_url()}. Or try cellPACK web interface: https://cellpack.allencell.org (no setup required)" + f"Skipping browser opening, upload credentials not configured. For setup instructions see: {DB_SETUP_README_URL}" ) - return + return url + return None @staticmethod def open_in_simularium(aws_url): diff --git a/cellpack/bin/cleanup_tasks.py b/cellpack/bin/cleanup_tasks.py deleted file mode 100644 index 08217aa0..00000000 --- a/cellpack/bin/cleanup_tasks.py +++ /dev/null @@ -1,20 +0,0 @@ -from cellpack.autopack.DBRecipeHandler import DBMaintenance -from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS - - -def run_cleanup(db_id=DATABASE_IDS.FIREBASE): - """ - Performs cleanup operations on expired database entries. - This function is executed as part of a scheduled task defined in .github/workflows/cleanup-firebase.yml - - Args: - db_id(str): The database id to use - """ - handler = DATABASE_IDS.handlers().get(db_id) - initialized_db = handler(default_db="staging") - db_maintainer = DBMaintenance(initialized_db) - db_maintainer.cleanup_results() - - -if __name__ == "__main__": - run_cleanup() diff --git a/cellpack/bin/upload.py b/cellpack/bin/upload.py index b038b4e4..59fc2104 100644 --- a/cellpack/bin/upload.py +++ b/cellpack/bin/upload.py @@ -3,7 +3,7 @@ import json from cellpack.autopack.FirebaseHandler import FirebaseHandler -from cellpack.autopack.DBRecipeHandler import DBUploader, DBMaintenance +from cellpack.autopack.DBRecipeHandler import DBUploader, DB_SETUP_README_URL from cellpack.autopack.upy.simularium.simularium_helper import simulariumHelper from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS from cellpack.autopack.loaders.config_loader import ConfigLoader @@ -90,7 +90,7 @@ def upload( id, _ = db_handler.upload_data("editable_fields", field) editable_fields_ids.append(id) if output_file: - _, result_url = simulariumHelper.store_result_file( + result_url = simulariumHelper.store_result_file( output_file, storage="aws", sub_folder="client" ) if studio: @@ -105,9 +105,8 @@ def upload( db_handler.upload_data("example_packings", recipe_metadata) else: - db_maintainer = DBMaintenance(db_handler) sys.exit( - f"The selected database is not initialized. Please set up Firebase credentials to upload recipes. Refer to the instructions at {db_maintainer.readme_url()} " + f"The selected database is not initialized. Please set up Firebase credentials to upload recipes. Refer to the instructions at {DB_SETUP_README_URL} " ) From 84d13c41abf5b2e506f24a8c937f709f48f7aa9f Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Mon, 23 Feb 2026 10:31:13 -0800 Subject: [PATCH 23/28] handle both recipe_path and json body requests (#449) --- docker/server.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/docker/server.py b/docker/server.py index 74bb20f3..7b0a209d 100644 --- a/docker/server.py +++ b/docker/server.py @@ -1,4 +1,5 @@ import asyncio +import uuid from aiohttp import web from cellpack.autopack.DBRecipeHandler import DataDoc, DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS @@ -25,19 +26,19 @@ def job_exists(self, dedup_hash): job_status, _ = db.get_doc_by_id("job_status", dedup_hash) return job_status is not None - async def run_packing(self, dedup_hash, recipe=None, config=None, body=None): - self.update_job_status(dedup_hash, "RUNNING") + async def run_packing(self, job_id, recipe=None, config=None, body=None): + self.update_job_status(job_id, "RUNNING") try: # Pack JSON recipe in body if provided, otherwise use recipe path - pack(recipe=(body if body else recipe), config_path=config, docker=True, hash=dedup_hash) + pack(recipe=(body if body else recipe), config_path=config, docker=True, hash=job_id) except Exception as e: - self.update_job_status(dedup_hash, "FAILED", error_message=str(e)) + self.update_job_status(job_id, "FAILED", error_message=str(e)) - def update_job_status(self, dedup_hash, status, result_path=None, error_message=None): + def update_job_status(self, job_id, status, result_path=None, error_message=None): db = self._get_firebase_handler() if db: db_uploader = DBUploader(db) - db_uploader.upload_job_status(dedup_hash, status, result_path, error_message) + db_uploader.upload_job_status(job_id, status, result_path, error_message) async def hello_world(self, request: web.Request) -> web.Response: return web.Response(text="Hello from the cellPACK server") @@ -58,13 +59,18 @@ async def pack_handler(self, request: web.Request) -> web.Response: ) config = request.rel_url.query.get("config") - dedup_hash = DataDoc.generate_hash(body) - - if self.job_exists(dedup_hash): - return web.json_response({"jobId": dedup_hash}) + if body: + dedup_hash = DataDoc.generate_hash(body) + if self.job_exists(dedup_hash): + return web.json_response({"jobId": dedup_hash}) + job_id = dedup_hash + else: + job_id = str(uuid.uuid4()) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(dedup_hash, recipe, config, body)) + packing_task = asyncio.create_task( + self.run_packing(job_id, recipe, config, body) + ) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion @@ -73,7 +79,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: # return job id immediately, rather than wait for task to complete, # to avoid timeout issues with API gateway - return web.json_response({"jobId": dedup_hash}) + return web.json_response({"jobId": job_id}) async def init_app() -> web.Application: From d5d359fb3c46ae6b4d5f6ab1c5dcf26375cad67f Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Thu, 5 Mar 2026 14:50:29 -0800 Subject: [PATCH 24/28] first draft diagram --- server_workflow_diagram.md | 102 +++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 server_workflow_diagram.md diff --git a/server_workflow_diagram.md b/server_workflow_diagram.md new file mode 100644 index 00000000..50b80411 --- /dev/null +++ b/server_workflow_diagram.md @@ -0,0 +1,102 @@ +# CellPACK Server Job Workflow Changes + +## Summary of Changes + +The main changes in this PR allow the server to accept recipe JSON directly in the request body, in addition to the existing recipe file path approach. This enables better deduplication and caching of packing jobs. + +## BEFORE: Original Server Workflow +```mermaid +graph TD + A[Client Request] --> B[POST /start-packing] + B --> C{Check recipe param} + C -->|Missing| D[Return 400 Error] + C -->|Present| E[Generate UUID for job_id] + E --> F[Create Background Task] + F --> G[Return job_id immediately] + F --> H[run_packing with recipe path] + H --> I[pack with docker enabled] + I --> J[Load recipe from file path] + J --> K[Execute packing] + K --> L{Packing succeeds?} + L -->|Success| M[S3: Upload outputs to S3] + L -->|Failure| N[Firebase: Update job status to FAILED] + + style A fill:#e1f5fe + style G fill:#c8e6c9 + style M fill:#fff3e0 + style N fill:#ffcdd2 +``` + +## AFTER: Enhanced Server Workflow with JSON Recipe Support +```mermaid +graph TD + A[Client Request] --> B[POST /start-packing] + B --> C{Check inputs} + C -->|No recipe param + No body| D[Return 400 Error] + C -->|Has recipe param + config| E[Generate UUID for job_id] + C -->|Has JSON body + config| F[Generate hash from JSON] + F --> G{Job exists?} + G -->|Yes| H[Return existing job_id] + G -->|No| I[Use hash as job_id] + E --> J[Create Background Task] + I --> J + J --> K[Return job_id immediately] + J --> L[pack with docker enabled and hash] + L --> M{Input type?} + M -->|Recipe path| N[Load recipe from file path] + M -->|JSON body| O[Load recipe from JSON dict] + N --> P[Execute packing in pack function] + O --> P + P --> Q{Packing succeeds?} + Q -->|Success| R[S3: Upload outputs to S3] + Q -->|Failure| S[Firebase: Update job status to FAILED] + + style A fill:#e1f5fe + style K fill:#c8e6c9 + style R fill:#fff3e0 + style S fill:#ffcdd2 + style G fill:#ffeb3b + style H fill:#4caf50 +``` + +## Key Server Improvements + +### 1. **Deduplication & Caching** +- **BEFORE**: Each request generated a unique UUID, no deduplication possible +- **AFTER**: JSON recipes generate deterministic hash, enabling job deduplication + +### 2. **Input Flexibility** +- **BEFORE**: Only recipe file paths supported via query parameter +- **AFTER**: Supports both recipe file paths AND direct JSON recipe objects in request body, plus optional config parameter + +### 3. **Job Tracking** +- **BEFORE**: Generated UUID for each job without deduplication +- **AFTER**: Uses deterministic hash for JSON recipes, enabling job reuse + +### 4. **Smart Job Management** +- **BEFORE**: Every request creates new job regardless of content +- **AFTER**: Identical recipe JSON returns existing job ID if already processed + +## Technical Implementation + +### New Server Components: +1. **`DataDoc.generate_hash()`** - Creates deterministic hash from recipe JSON +2. **`job_exists()`** - Checks if job already completed in Firebase +3. **Enhanced request handling** - Reads JSON from request body +4. **Smart job ID generation** - Uses hash for JSON recipes, UUID for file paths + +### Note: Known Issues +- Error message still says "recipe as a query param" but should mention body JSON is also accepted + +### Request Flow Changes: +1. **Input validation** now checks both query params and request body +2. **Hash-based deduplication** for JSON recipes +3. **Backward compatibility** maintained for file-based recipes +4. **Consistent job tracking** with hash parameter + +## Benefits + +1. **Reduced Server Load**: Identical recipes don't reprocess +2. **Faster Client Response**: Instant return for duplicate JSON requests +3. **Better Resource Utilization**: No redundant compute for same recipes +4. **Improved API Design**: JSON recipes easier for programmatic access \ No newline at end of file From d68769950006b0b43f477c0d24323f19a407fdf7 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 6 Mar 2026 11:58:48 -0800 Subject: [PATCH 25/28] slight wording changes for improved clarity --- server_workflow_diagram.md | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/server_workflow_diagram.md b/server_workflow_diagram.md index 50b80411..2d4cfee7 100644 --- a/server_workflow_diagram.md +++ b/server_workflow_diagram.md @@ -8,17 +8,16 @@ The main changes in this PR allow the server to accept recipe JSON directly in t ```mermaid graph TD A[Client Request] --> B[POST /start-packing] - B --> C{Check recipe param} + B --> C{Check for recipe URL param} C -->|Missing| D[Return 400 Error] C -->|Present| E[Generate UUID for job_id] E --> F[Create Background Task] F --> G[Return job_id immediately] - F --> H[run_packing with recipe path] - H --> I[pack with docker enabled] - I --> J[Load recipe from file path] + F --> I[Initiate packing] + I --> J[Load recipe from firebase, using file path from URL param] J --> K[Execute packing] K --> L{Packing succeeds?} - L -->|Success| M[S3: Upload outputs to S3] + L -->|Success| M[S3: Upload outputs to S3, Firebase: Update job status to SUCCEEDED] L -->|Failure| N[Firebase: Update job status to FAILED] style A fill:#e1f5fe @@ -32,23 +31,23 @@ graph TD graph TD A[Client Request] --> B[POST /start-packing] B --> C{Check inputs} - C -->|No recipe param + No body| D[Return 400 Error] - C -->|Has recipe param + config| E[Generate UUID for job_id] - C -->|Has JSON body + config| F[Generate hash from JSON] - F --> G{Job exists?} - G -->|Yes| H[Return existing job_id] + C -->|No recipe - no URL param and no request body| D[Return 400 Error] + C -->|Has recipe path URL param| E[Generate UUID for job_id] + C -->|Has recipe JSON in request body| F[Generate hash from JSON] + F --> G{Packing result exists in firebase for this hash?} + G -->|Yes| H[Return existing hash as job_id] G -->|No| I[Use hash as job_id] E --> J[Create Background Task] I --> J J --> K[Return job_id immediately] - J --> L[pack with docker enabled and hash] + J --> L[Initiate packing] L --> M{Input type?} - M -->|Recipe path| N[Load recipe from file path] - M -->|JSON body| O[Load recipe from JSON dict] - N --> P[Execute packing in pack function] + M -->|Recipe path| N[Load recipe from firebase, using file path from URL param] + M -->|JSON body| O[Load recipe from JSON dict, from request body] + N --> P[Execute packing] O --> P P --> Q{Packing succeeds?} - Q -->|Success| R[S3: Upload outputs to S3] + Q -->|Success| R[S3: Upload outputs to S3, Firebase: Update job status to SUCCEEDED] Q -->|Failure| S[Firebase: Update job status to FAILED] style A fill:#e1f5fe @@ -56,7 +55,7 @@ graph TD style R fill:#fff3e0 style S fill:#ffcdd2 style G fill:#ffeb3b - style H fill:#4caf50 + style H fill:#c8e6c9 ``` ## Key Server Improvements From fc7896ccf19f468321bbbc18a135492cae816c04 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 6 Mar 2026 12:03:42 -0800 Subject: [PATCH 26/28] add break lines --- server_workflow_diagram.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/server_workflow_diagram.md b/server_workflow_diagram.md index 2d4cfee7..7254e672 100644 --- a/server_workflow_diagram.md +++ b/server_workflow_diagram.md @@ -14,10 +14,10 @@ graph TD E --> F[Create Background Task] F --> G[Return job_id immediately] F --> I[Initiate packing] - I --> J[Load recipe from firebase, using file path from URL param] + I --> J[Load recipe from firebase
using file path from URL param] J --> K[Execute packing] K --> L{Packing succeeds?} - L -->|Success| M[S3: Upload outputs to S3, Firebase: Update job status to SUCCEEDED] + L -->|Success| M[S3: Upload outputs to S3
Firebase: Update job status to SUCCEEDED] L -->|Failure| N[Firebase: Update job status to FAILED] style A fill:#e1f5fe @@ -35,19 +35,19 @@ graph TD C -->|Has recipe path URL param| E[Generate UUID for job_id] C -->|Has recipe JSON in request body| F[Generate hash from JSON] F --> G{Packing result exists in firebase for this hash?} - G -->|Yes| H[Return existing hash as job_id] + G -->|Yes| H[Return existing hash
as job_id] G -->|No| I[Use hash as job_id] E --> J[Create Background Task] I --> J J --> K[Return job_id immediately] J --> L[Initiate packing] L --> M{Input type?} - M -->|Recipe path| N[Load recipe from firebase, using file path from URL param] - M -->|JSON body| O[Load recipe from JSON dict, from request body] + M -->|Recipe path| N[Load recipe from firebase
using file path from URL param] + M -->|JSON body| O[Load recipe from JSON dict
from request body] N --> P[Execute packing] O --> P P --> Q{Packing succeeds?} - Q -->|Success| R[S3: Upload outputs to S3, Firebase: Update job status to SUCCEEDED] + Q -->|Success| R[S3: Upload outputs to S3
Firebase: Update job status to SUCCEEDED] Q -->|Failure| S[Firebase: Update job status to FAILED] style A fill:#e1f5fe From 5eb8431552cc743c3a1714e3cc4f0c7a0093e81f Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 11 Mar 2026 13:35:06 -0700 Subject: [PATCH 27/28] make sure all words show up using breaklines --- server_workflow_diagram.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server_workflow_diagram.md b/server_workflow_diagram.md index 7254e672..8489e1c9 100644 --- a/server_workflow_diagram.md +++ b/server_workflow_diagram.md @@ -14,7 +14,7 @@ graph TD E --> F[Create Background Task] F --> G[Return job_id immediately] F --> I[Initiate packing] - I --> J[Load recipe from firebase
using file path from URL param] + I --> J[Load recipe from firebase
using file path from
URL param] J --> K[Execute packing] K --> L{Packing succeeds?} L -->|Success| M[S3: Upload outputs to S3
Firebase: Update job status to SUCCEEDED] @@ -31,10 +31,10 @@ graph TD graph TD A[Client Request] --> B[POST /start-packing] B --> C{Check inputs} - C -->|No recipe - no URL param and no request body| D[Return 400 Error] + C -->|No recipe - no URL param
and no request body| D[Return 400 Error] C -->|Has recipe path URL param| E[Generate UUID for job_id] C -->|Has recipe JSON in request body| F[Generate hash from JSON] - F --> G{Packing result exists in firebase for this hash?} + F --> G{Packing result exists
in firebase for this hash?} G -->|Yes| H[Return existing hash
as job_id] G -->|No| I[Use hash as job_id] E --> J[Create Background Task] @@ -42,7 +42,7 @@ graph TD J --> K[Return job_id immediately] J --> L[Initiate packing] L --> M{Input type?} - M -->|Recipe path| N[Load recipe from firebase
using file path from URL param] + M -->|Recipe path| N[Load recipe from firebase
using file path from
URL param] M -->|JSON body| O[Load recipe from JSON dict
from request body] N --> P[Execute packing] O --> P From a69adfc5af2dcb495f9e52344e428091261de2d8 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 11 Mar 2026 13:43:30 -0700 Subject: [PATCH 28/28] Slight improvements to wording --- server_workflow_diagram.md | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/server_workflow_diagram.md b/server_workflow_diagram.md index 8489e1c9..d950a0bf 100644 --- a/server_workflow_diagram.md +++ b/server_workflow_diagram.md @@ -64,17 +64,21 @@ graph TD - **BEFORE**: Each request generated a unique UUID, no deduplication possible - **AFTER**: JSON recipes generate deterministic hash, enabling job deduplication -### 2. **Input Flexibility** +### 2. **Input Flexibility & Backwards Compatibility** - **BEFORE**: Only recipe file paths supported via query parameter - **AFTER**: Supports both recipe file paths AND direct JSON recipe objects in request body, plus optional config parameter -### 3. **Job Tracking** -- **BEFORE**: Generated UUID for each job without deduplication -- **AFTER**: Uses deterministic hash for JSON recipes, enabling job reuse +### 3. **Smart Job Management** +- **BEFORE**: Generated UUID for each job without deduplication, every request creates new job regardless of content +- **AFTER**: Uses deterministic hash for JSON recipes, enabling job reuse for identical recipes -### 4. **Smart Job Management** -- **BEFORE**: Every request creates new job regardless of content -- **AFTER**: Identical recipe JSON returns existing job ID if already processed +### 4. **Firebase Request Reduction** +- **BEFORE**: Every edited recipe was uploaded to firebase by the client and downloaded from firebase by the server +- **AFTER**: Edited recipes are passed in the body of the packing request, so no firebase uploads or downloads occur + +### 5: **Unified Results Upload** +- **BEFORE**: Simularium result file was uploaded to S3 twice per job, once on its own and once as part of the full output files upload +- **AFTER**: Only upload Simularium result file once by keeping track of its path when we upload all output files ## Technical Implementation @@ -84,9 +88,6 @@ graph TD 3. **Enhanced request handling** - Reads JSON from request body 4. **Smart job ID generation** - Uses hash for JSON recipes, UUID for file paths -### Note: Known Issues -- Error message still says "recipe as a query param" but should mention body JSON is also accepted - ### Request Flow Changes: 1. **Input validation** now checks both query params and request body 2. **Hash-based deduplication** for JSON recipes @@ -98,4 +99,5 @@ graph TD 1. **Reduced Server Load**: Identical recipes don't reprocess 2. **Faster Client Response**: Instant return for duplicate JSON requests 3. **Better Resource Utilization**: No redundant compute for same recipes -4. **Improved API Design**: JSON recipes easier for programmatic access \ No newline at end of file +4. **Improved API Design**: JSON recipes easier for programmatic access +5. **Reduced Firebase Usage**: Passing recipe directly instead of uploading to firebase \ No newline at end of file