Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
78ca86d
add upload script
ascibisz Oct 22, 2025
78b9b0a
add example data and more documentation
ascibisz Oct 22, 2025
a9b056b
point to correct collection
ascibisz Oct 22, 2025
f5f7a69
have server accept recipe as json object in body of request
ascibisz Oct 22, 2025
f87915a
update documentation
ascibisz Oct 22, 2025
1f2d2e3
remove accidential dockerfile changes
ascibisz Oct 22, 2025
bd8ec42
rename param json_recipe
ascibisz Oct 23, 2025
358158e
remove file that shouldn't be in this PR
ascibisz Jan 9, 2026
f0beaa1
remove accidential file
ascibisz Jan 9, 2026
a54ffa1
lint fixes
ascibisz Jan 9, 2026
3d01db3
refactor to try to improve clarity of json recipe vs file path
ascibisz Jan 21, 2026
529e15b
lint fixes
ascibisz Jan 21, 2026
63514c9
lint fix
ascibisz Jan 21, 2026
b2440cd
minimize changeset
ascibisz Jan 21, 2026
470e3a1
minimize changeset
ascibisz Jan 21, 2026
8a34898
simplify changeset
ascibisz Jan 21, 2026
45d438a
code cleanup
ascibisz Jan 22, 2026
c8fe120
minimize changeset
ascibisz Jan 22, 2026
ecc645d
remove trailing comma
ascibisz Jan 22, 2026
17ba17c
Feature/firebase lookup (#445)
rugeli Jan 29, 2026
79e77e8
Only upload simularium file once (#446)
ascibisz Feb 4, 2026
653285e
Maint/firebase collection cleanup (#448)
rugeli Feb 9, 2026
84d13c4
handle both recipe_path and json body requests (#449)
rugeli Feb 23, 2026
d5d359f
first draft diagram
rugeli Mar 5, 2026
d687699
slight wording changes for improved clarity
ascibisz Mar 6, 2026
fc7896c
add break lines
ascibisz Mar 6, 2026
5eb8431
make sure all words show up using breaklines
ascibisz Mar 11, 2026
a69adfc
Slight improvements to wording
ascibisz Mar 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 0 additions & 22 deletions .github/workflows/cleanup-firebase.yml

This file was deleted.

159 changes: 44 additions & 115 deletions cellpack/autopack/DBRecipeHandler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import copy
import logging
import shutil
from datetime import datetime, timezone
from enum import Enum
from pathlib import Path

Expand All @@ -10,7 +9,6 @@

import hashlib
import json
import requests

from cellpack.autopack.utils import deep_merge

Expand Down Expand Up @@ -321,36 +319,6 @@ def __init__(self, settings):
self.settings = settings


class ResultDoc:
def __init__(self, db):
self.db = db

def handle_expired_results(self):
"""
Check if the results in the database are expired and delete them if the linked object expired.
"""
current_utc = datetime.now(timezone.utc)
results = self.db.get_all_docs("results")
if results:
for result in results:
result_data = self.db.doc_to_dict(result)
result_age = current_utc - result_data["timestamp"]
if result_age.days > 180 and not self.validate_existence(
result_data["url"]
):
self.db.delete_doc("results", self.db.doc_id(result))
logging.info("Results cleanup complete.")
else:
logging.info("No results found in the database.")

def validate_existence(self, url):
"""
Validate the existence of an S3 object by checking if the URL is accessible.
Returns True if the URL is accessible.
"""
return requests.head(url).status_code == requests.codes.ok


class DBUploader(object):
"""
Handles the uploading of data to the database.
Expand Down Expand Up @@ -529,42 +497,34 @@ def upload_config(self, config_data, source_path):
self.db.update_doc("configs", id, config_data)
return id

def upload_result_metadata(self, file_name, url, job_id=None):
"""
Upload the metadata of the result file to the database.
"""
if self.db:
username = self.db.get_username()
timestamp = self.db.create_timestamp()
self.db.update_or_create(
"results",
file_name,
{
"user": username,
"timestamp": timestamp,
"url": url,
"batch_job_id": job_id,
},
)
if job_id:
self.upload_job_status(job_id, "DONE", result_path=url)

def upload_job_status(self, job_id, status, result_path=None, error_message=None):
def upload_job_status(
self,
dedup_hash,
status,
result_path=None,
error_message=None,
outputs_directory=None,
):
"""
Update status for a given job ID
Update status for a given dedup_hash
"""
if self.db:
timestamp = self.db.create_timestamp()
self.db.update_or_create(
"job_status",
job_id,
{
"timestamp": timestamp,
"status": str(status),
"result_path": result_path,
"error_message": error_message,
},
)
db_handler = self.db
# If db is AWSHandler, switch to firebase handler for job status updates
if hasattr(self.db, "s3_client"):
handler = DATABASE_IDS.handlers().get(DATABASE_IDS.FIREBASE)
db_handler = handler(default_db="staging")
timestamp = db_handler.create_timestamp()
data = {
"timestamp": timestamp,
"status": str(status),
"error_message": error_message,
}
if result_path:
data["result_path"] = result_path
if outputs_directory:
data["outputs_directory"] = outputs_directory
db_handler.update_or_create("job_status", dedup_hash, data)

def save_recipe_and_config_to_output(self, output_folder, config_data, recipe_data):
output_path = Path(output_folder)
Expand All @@ -583,15 +543,15 @@ def upload_packing_results_workflow(
self,
source_folder,
recipe_name,
job_id,
dedup_hash,
config_data,
recipe_data,
):
"""
Complete packing results upload workflow including folder preparation and s3 upload
"""
try:
if job_id:
if dedup_hash:

source_path = Path(source_folder)
if not source_path.exists():
Expand All @@ -601,7 +561,7 @@ def upload_packing_results_workflow(

# prepare unique S3 upload folder
parent_folder = source_path.parent
unique_folder_name = f"{source_path.name}_run_{job_id}"
unique_folder_name = f"{source_path.name}_run_{dedup_hash}"
s3_upload_folder = parent_folder / unique_folder_name

logging.debug(f"outputs will be copied to: {s3_upload_folder}")
Expand All @@ -618,7 +578,7 @@ def upload_packing_results_workflow(
upload_result = self.upload_outputs_to_s3(
output_folder=s3_upload_folder,
recipe_name=recipe_name,
job_id=job_id,
dedup_hash=dedup_hash,
)

# clean up temporary folder after upload
Expand All @@ -628,9 +588,12 @@ def upload_packing_results_workflow(
f"Cleaned up temporary upload folder: {s3_upload_folder}"
)

# update outputs directory in firebase
self.update_outputs_directory(
job_id, upload_result.get("outputs_directory")
# update outputs directory in job status
self.upload_job_status(
dedup_hash,
"DONE",
result_path=upload_result.get("simularium_url"),
outputs_directory=upload_result.get("outputs_directory"),
)

return upload_result
Expand All @@ -639,15 +602,15 @@ def upload_packing_results_workflow(
logging.error(e)
return {"success": False, "error": e}

def upload_outputs_to_s3(self, output_folder, recipe_name, job_id):
def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash):
"""
Upload packing outputs to S3 bucket
"""

bucket_name = self.db.bucket_name
region_name = self.db.region_name
sub_folder_name = self.db.sub_folder_name
s3_prefix = f"{sub_folder_name}/{recipe_name}/{job_id}"
s3_prefix = f"{sub_folder_name}/{recipe_name}/{dedup_hash}"

try:
upload_result = self.db.upload_directory(
Expand All @@ -661,8 +624,11 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id):
f"{base_url}/{file_info['s3_key']}"
for file_info in upload_result["uploaded_files"]
]
simularium_url = None
for url in public_urls:
if url.endswith(".simularium"):
simularium_url = url
outputs_directory = f"https://us-west-2.console.aws.amazon.com/s3/buckets/{bucket_name}/{s3_prefix}/"

logging.info(
f"Successfully uploaded {upload_result['total_files']} files to {outputs_directory}"
)
Expand All @@ -671,7 +637,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id):

return {
"success": True,
"run_id": job_id,
"dedup_hash": dedup_hash,
"s3_bucket": bucket_name,
"s3_prefix": s3_prefix,
"public_url_base": f"{base_url}/{s3_prefix}/",
Expand All @@ -680,30 +646,12 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id):
"total_size": upload_result["total_size"],
"urls": public_urls,
"outputs_directory": outputs_directory,
"simularium_url": simularium_url,
}
except Exception as e:
logging.error(e)
return {"success": False, "error": e}

def update_outputs_directory(self, job_id, outputs_directory):
if not self.db or self.db.s3_client:
# switch to firebase handler to update job status
handler = DATABASE_IDS.handlers().get("firebase")
initialized_db = handler(default_db="staging")
if job_id:
timestamp = initialized_db.create_timestamp()
initialized_db.update_or_create(
"job_status",
job_id,
{
"timestamp": timestamp,
"outputs_directory": outputs_directory,
},
)
logging.debug(
f"Updated outputs s3 location {outputs_directory} for job ID: {job_id}"
)


class DBRecipeLoader(object):
"""
Expand Down Expand Up @@ -890,23 +838,4 @@ def compile_db_recipe_data(db_recipe_data, obj_dict, grad_dict, comp_dict):
return recipe_data


class DBMaintenance(object):
"""
Handles the maintenance of the database.
"""

def __init__(self, db_handler):
self.db = db_handler
self.result_doc = ResultDoc(self.db)

def cleanup_results(self):
"""
Check if the results in the database are expired and delete them if the linked object expired.
"""
self.result_doc.handle_expired_results()

def readme_url(self):
"""
Return the URL to the README file for the database setup section.
"""
return "https://github.com/mesoscope/cellpack?tab=readme-ov-file#introduction-to-remote-databases"
DB_SETUP_README_URL = "https://github.com/mesoscope/cellpack?tab=readme-ov-file#introduction-to-remote-databases"
1 change: 0 additions & 1 deletion cellpack/autopack/interface_objects/default_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
"objects",
"gradients",
"recipes",
"results",
"configs",
"recipes_edited",
]
30 changes: 26 additions & 4 deletions cellpack/autopack/loaders/recipe_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,21 @@ class RecipeLoader(object):
# TODO: add all default values here
default_values = default_recipe_values.copy()

def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False):
def __init__(
self,
input_file_path,
save_converted_recipe=False,
use_docker=False,
json_recipe=None,
):
_, file_extension = os.path.splitext(input_file_path)
self.current_version = CURRENT_VERSION
self.file_path = input_file_path
self.file_extension = file_extension
self.ingredient_list = []
self.compartment_list = []
self.save_converted_recipe = save_converted_recipe
self.json_recipe = json_recipe

# set CURRENT_RECIPE_PATH appropriately for remote(firebase) vs local recipes
if autopack.is_remote_path(self.file_path):
Expand All @@ -49,6 +56,15 @@ def __init__(self, input_file_path, save_converted_recipe=False, use_docker=Fals

self.recipe_data = self._read(use_docker=use_docker)

@classmethod
def from_json(cls, json_recipe, save_converted_recipe=False, use_docker=False):
return cls(
input_file_path="",
save_converted_recipe=save_converted_recipe,
use_docker=use_docker,
json_recipe=json_recipe,
)

@staticmethod
def _resolve_object(key, objects):
current_object = objects[key]
Expand Down Expand Up @@ -168,9 +184,15 @@ def _migrate_version(self, old_recipe):
)

def _read(self, resolve_inheritance=True, use_docker=False):
new_values, database_name, is_unnested_firebase = autopack.load_file(
self.file_path, cache="recipes", use_docker=use_docker
)
database_name = None
is_unnested_firebase = False
new_values = self.json_recipe
if new_values is None:
# Read recipe from filepath
new_values, database_name, is_unnested_firebase = autopack.load_file(
self.file_path, cache="recipes", use_docker=use_docker
)

if database_name == "firebase":
if is_unnested_firebase:
objects = new_values.get("objects", {})
Expand Down
Loading
Loading