From 87f4227c081bce914ffb0fcb97f31e1ad48f4cc8 Mon Sep 17 00:00:00 2001
From: Graham Hukill <ghukill@gmail.com>
Date: Fri, 23 Jan 2026 09:56:12 -0500
Subject: [PATCH 1/4] Build CPU and GPU Docker images

Why these changes are being introduced:

We have decided to move into AWS Batch for running this application in a deployed context.  AWS
Batch is a container based service and will use ECR images this repository builds.  Put succinctly,
we have a CPU and a GPU pipeline in AWS Batch, and each pipeline needs a slightly different Docker
ECR image built.  We also need to support local development, but can leverage any CPU builds locally
equally well.

How this addresses that need:

Two Dockerfiles are created `Dockerfile-cpu` and `Dockerfile-gpu`.  They are very similar and may be
refactored to extend a common Dockerfile in the future, but are understandable as-is and successfully
build Docker images that work for CPU or GPU enabled contexts.

Other parts of the application have been updated to support dual Dockerfiles:
- `Makefile`: dedicated Docker build and push commands
- `pyproject.toml`: build specific dependencies, utilized in the build specific Dockerfiles
- `.github` workflows: utilize the new `Makefile` commands
- etc.

This PR is a combination of two spike branches, tested and confirmed to build Docker images that target
CPU or GPU contexts.  There is opportunity for DRYing some of the new code up, but we are opting to keep
things verbose and explicit until that time.

Side effects of this change:
* This repository now builds two Docker images.  Build commands are associated with CPU or GPU builds,
with commands that perform both.

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/USE-342
---
 .aws-architecture                          |   5 +-
 .github/workflows/prod-deploy.yml          |  56 -------
 .pre-commit-config.yaml                    |   2 +-
 Dockerfile => Dockerfile-cpu               |   6 +-
 Dockerfile-gpu                             |  37 +++++
 Makefile                                   | 130 ++++++++++------
 README.md                                  |  25 ++-
 docs/adrs/01-parallel-builds-both-archs.md |  19 +++
 pyproject.toml                             |  14 +-
 uv.lock                                    | 171 +++++++++------------
 10 files changed, 250 insertions(+), 215 deletions(-)
 delete mode 100644 .github/workflows/prod-deploy.yml
 rename Dockerfile => Dockerfile-cpu (83%)
 create mode 100644 Dockerfile-gpu
 create mode 100644 docs/adrs/01-parallel-builds-both-archs.md

diff --git a/.aws-architecture b/.aws-architecture
index ed3f99e..1ba2103 100644
--- a/.aws-architecture
+++ b/.aws-architecture
@@ -1 +1,4 @@
-linux/arm64
\ No newline at end of file
+{
+    "gpu": "linux/amd64",
+    "cpu": "linux/arm64"
+}
\ No newline at end of file
diff --git a/.github/workflows/prod-deploy.yml b/.github/workflows/prod-deploy.yml
deleted file mode 100644
index 276c882..0000000
--- a/.github/workflows/prod-deploy.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-### This is the Terraform-generated prod-promote.yml workflow for the      ###
-### timdex-embeddings-prod repository.                                     ###
-### If this is a Lambda repo, uncomment the FUNCTION line at the end of    ###
-### the document.                                                          ###
-
-name: Prod Container Promote
-on:
-  workflow_dispatch:
-  release:
-    types: [published]
-
-permissions:
-  id-token: write
-  contents: read
-
-jobs:
-  prep:
-    name: Prep for Promote
-    runs-on: ubuntu-latest
-    outputs: 
-      cpuarch: ${{ steps.setarch.outputs.cpuarch }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v5
-
-      - name: Set CPU Architecture
-        id: setarch
-        run: |
-          echo "### :abacus: Architecture Selection" >> $GITHUB_STEP_SUMMARY
-          if [[ -f .aws-architecture ]]; then
-            ARCH=$(cat .aws-architecture)
-            echo "\`$ARCH\` was read from \`.aws-architecture\` and passed to the deploy job." >> $GITHUB_STEP_SUMMARY
-          else
-            ARCH="linux/amd64"
-            echo "No \`.aws-architecture\` file, so default \`$ARCH\` was passed to the deploy job." >> $GITHUB_STEP_SUMMARY
-          fi
-          if [[ "$ARCH" != "linux/arm64" && "$ARCH" != "linux/amd64" ]]; then
-            echo "$ARCH is INVALID architecture!"
-            echo "$ARCH is INVALID architecture!" >> $GITHUB_STEP_SUMMARY
-            exit 1
-          fi
-          echo "cpuarch=$ARCH" >> $GITHUB_OUTPUT
-
-  deploy:
-    needs: prep
-    name: Deploy
-    uses: mitlibraries/.github/.github/workflows/ecr-multi-arch-promote-prod.yml@main
-    secrets: inherit
-    with:
-      AWS_REGION: "us-east-1"
-      GHA_ROLE_STAGE: timdex-embeddings-gha-stage
-      GHA_ROLE_PROD: timdex-embeddings-gha-prod
-      ECR_STAGE: "timdex-embeddings-stage"
-      ECR_PROD: "timdex-embeddings-prod"
-      CPU_ARCH: ${{ needs.prep.outputs.cpuarch }}
- 
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d9f03fa..4d13446 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,6 +24,6 @@ repos:
         types: ["python"]
       - id: pip-audit
         name: pip-audit
-        entry: uv run pip-audit
+        entry: uv run pip-audit --ignore-vuln CVE-2025-2953 --ignore-vuln CVE-2025-3730
         language: system
         pass_filenames: false
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile-cpu
similarity index 83%
rename from Dockerfile
rename to Dockerfile-cpu
index ad7f440..21abf6e 100644
--- a/Dockerfile
+++ b/Dockerfile-cpu
@@ -1,4 +1,4 @@
-FROM python:3.12-slim
+FROM public.ecr.aws/deep-learning-containers/pytorch-inference-arm64:2.6.0-cpu-py312-ubuntu22.04-ec2
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends git ca-certificates && \
@@ -10,10 +10,10 @@ ENV UV_SYSTEM_PYTHON=1
 WORKDIR /app
 
 # Copy project metadata
-COPY pyproject.toml uv.lock* ./
+COPY pyproject.toml ./
 
 # Install package into system python
-RUN uv pip install --system .
+RUN uv pip install --group dlc_arm64_cpu --system .
 
 # Copy CLI application
 COPY embeddings ./embeddings
diff --git a/Dockerfile-gpu b/Dockerfile-gpu
new file mode 100644
index 0000000..0b2f51f
--- /dev/null
+++ b/Dockerfile-gpu
@@ -0,0 +1,37 @@
+FROM public.ecr.aws/deep-learning-containers/pytorch-inference:2.6.0-gpu-py312-cu124-ubuntu22.04-ec2
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git ca-certificates && \
+    rm -rf /var/lib/apt/lists/*
+
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
+ENV UV_SYSTEM_PYTHON=1
+
+WORKDIR /app
+
+# Copy project metadata
+COPY pyproject.toml ./
+
+# Install package into system python
+RUN uv pip install --group dlc_amd64_gpu --system .
+
+# Copy CLI application
+COPY embeddings ./embeddings
+
+# Copy fixtures
+COPY tests/fixtures /fixtures
+
+# Set environment variables
+# NOTE: The env vars "TE_MODEL_URI" and "TE_MODEL_PATH" are set here to support
+# the downloading of the model during image build, but also persist in the container
+# and serve to set the default model.
+ENV PYTHONPATH=/app
+ENV HF_HUB_DISABLE_PROGRESS_BARS=true
+ENV TE_MODEL_URI=opensearch-project/opensearch-neural-sparse-encoding-doc-v3-gte
+ENV TE_MODEL_PATH=/model
+ENV TE_TORCH_DEVICE=cuda
+
+# Download the model and include in the Docker image
+RUN embeddings --verbose download-model
+
+ENTRYPOINT ["embeddings"]
diff --git a/Makefile b/Makefile
index e69332d..dcaeb1b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,11 @@
 SHELL=/bin/bash
 DATETIME:=$(shell date -u +%Y%m%dT%H%M%SZ)
-CPU_ARCH ?= $(shell cat .aws-architecture 2>/dev/null || echo "linux/amd64")
 
 ### This is the Terraform-generated header for timdex-embeddings-dev. If  ###
 ###   this is a Lambda repo, uncomment the FUNCTION line below            ###
 ###   and review the other commented lines in the document.               ###
 ECR_NAME_DEV := timdex-embeddings-dev
 ECR_URL_DEV := 222053980223.dkr.ecr.us-east-1.amazonaws.com/timdex-embeddings-dev
-CPU_ARCH ?= $(shell cat .aws-architecture 2>/dev/null || echo "linux/amd64")
 ### End of Terraform-generated header                                     ###
 
 help: # Preview Makefile commands
@@ -22,7 +20,7 @@ help: # Preview Makefile commands
 ##############################################
 
 install: .venv .git/hooks/pre-commit # Install Python dependencies and create virtual environment if not exists
-	uv sync --dev
+	uv sync --group dev --group local
 
 .venv: # Creates virtual environment if not found
 	@echo "Creating virtual environment at .venv..."
@@ -36,7 +34,7 @@ venv: .venv # Create the Python virtual environment
 
 update: # Update Python dependencies
 	uv lock --upgrade
-	uv sync --dev
+	uv sync --group dev --group local
 
 ######################
 # Unit test commands
@@ -65,7 +63,7 @@ ruff: # Run 'ruff' linter and print a preview of errors
 	uv run ruff check .
 
 safety: # Check for security vulnerabilities
-	uv run pip-audit
+	uv run pip-audit --ignore-vuln CVE-2025-2953 --ignore-vuln CVE-2025-3730
 
 lint-apply: black-apply ruff-apply # Apply changes with 'black' and resolve 'fixable errors' with 'ruff'
 
@@ -77,57 +75,89 @@ ruff-apply: # Resolve 'fixable errors' with 'ruff'
 
 
 ####################################
-# Docker
+# Developer Build and Deploy Commands for Dev environment in AWS
 ####################################
-docker-build: # Build local image for testing
-	docker build --platform $(CPU_ARCH) -t timdex-embeddings:latest .
-
-docker-shell: # Shell into local container for testing
-	docker run -it --entrypoint='bash' timdex-embeddings:latest
-
-docker-run: # Run main entrypoint + command without arguments
-	docker run timdex-embeddings:latest
-
-
-### Terraform-generated Developer Deploy Commands for Dev environment ###
-check-arch:
-	@ARCH_FILE=".aws-architecture"; \
-	if [[ "$(CPU_ARCH)" != "linux/amd64" && "$(CPU_ARCH)" != "linux/arm64" ]]; then \
-        echo "Invalid CPU_ARCH: $(CPU_ARCH)"; exit 1; \
-    fi; \
-	if [[ -f $$ARCH_FILE ]]; then \
-		echo "latest-$(shell echo $(CPU_ARCH) | cut -d'/' -f2)" > .arch_tag; \
-	else \
-		echo "latest" > .arch_tag; \
-	fi
 
-dist-dev: check-arch ## Build docker container (intended for developer-based manual build)
-	@ARCH_TAG=$$(cat .arch_tag); \
-	docker buildx inspect $(ECR_NAME_DEV) >/dev/null 2>&1 || docker buildx create --name $(ECR_NAME_DEV) --use; \
+# Capture the SHA of the latest local developer commit on the feature branch
+GIT_SHA := $(shell git describe --always)
+
+# For validation testing of the .aws-architecture file
+VALID_ARCH := linux/amd64 linux/arm64
+
+# Extract/set the architecture for GPU builds and non-GPU builds from the
+# .aws-architecture file, defaulting to "linux/amd64" if the key does not
+# exist in the file.
+GPU_ARCH := $(shell jq -r '.gpu // "linux/amd64"' .aws-architecture 2>/dev/null)
+GPU_TAG := $(shell echo $(GPU_ARCH) | cut -d'/' -f2)-gpu
+CPU_ARCH := $(shell jq -r '.cpu // "linux/amd64"' .aws-architecture 2>/dev/null)
+CPU_TAG := $(shell echo $(CPU_ARCH) | cut -d'/' -f2)-cpu
+
+validate-arch: ## Ensure that the parsing of the .aws-architecture file provided valid values
+	@if [ ! -f .aws-architecture ]; then \
+		echo "WARN: .aws-architecture not found. Using defaults gpu=linux/amd64, cpu=linux/amd64"; \
+	fi
+	@for value in $(GPU_ARCH) $(CPU_ARCH); do \
+		case " $(VALID_ARCH) " in \
+		*" $$value "*) ;; \
+		*) echo "ERROR: Invalid architecture: $$value" >&2; exit 1;; \
+		esac; \
+	done
+	@echo "Validation passed: gpu=$(GPU_ARCH), cpu=$(CPU_ARCH)"
+
+ensure-builder: ## Ensures the the buildx builder is ready to go
+	@echo "Prepare the Docker BuildX builder"; \
+	docker buildx inspect $(ECR_NAME_DEV) >/dev/null 2>&1 || docker buildx create --name $(ECR_NAME_DEV) --driver docker-container --use; \
 	docker buildx use $(ECR_NAME_DEV); \
-	docker buildx build --platform $(CPU_ARCH) \
+	docker buildx inspect --bootstrap >/dev/null; \
+	docker buildx prune -af --filter until=24h || true; \
+	echo "BuildX Builder Ready!"
+
+dist-dev-gpu: validate-arch ensure-builder ## Build GPU-enabled docker container (intended for developer-based manual build)
+	@echo "Build GPU-enabled container (for $(GPU_ARCH))"
+	@docker buildx build --platform $(GPU_ARCH) \
+		--file Dockerfile-gpu \
+		--progress=plain \
 		--load \
-	    --tag $(ECR_URL_DEV):$$ARCH_TAG \
-	    --tag $(ECR_URL_DEV):make-$$ARCH_TAG \
-		--tag $(ECR_URL_DEV):make-$(shell git describe --always) \
-		--tag $(ECR_NAME_DEV):$$ARCH_TAG \
+	    --tag $(ECR_URL_DEV):latest-$(GPU_TAG) \
+		--tag $(ECR_URL_DEV):make-$(GIT_SHA)-$(GPU_TAG) \
+		--tag $(ECR_NAME_DEV):latest-$(GPU_TAG) \
 		.
+	@echo "Build for GPU-enabled container is done!"
+
+dist-dev-cpu: validate-arch ensure-builder ## Build non-GPU docker container (intended for developer-based manual build)
+	@echo "Build CPU container (for $(CPU_ARCH))"
+	@docker buildx build --platform $(CPU_ARCH) \
+		--file Dockerfile-cpu \
+		--progress=plain \
+		--load \
+	    --tag $(ECR_URL_DEV):latest-$(CPU_TAG) \
+		--tag $(ECR_URL_DEV):make-$(GIT_SHA)-$(CPU_TAG) \
+		--tag $(ECR_NAME_DEV):latest-$(CPU_TAG) \
+		.
+	@echo "Build for CPU container is done!"
+
+dist-dev-all: dist-dev-gpu dist-dev-cpu ## Runs both the GPU and the CPU builds
+
+publish-dev-gpu: dist-dev-gpu ## Build, tag and push GPU-enabled container (intended for developer-based manual publish)
+	@aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $(ECR_URL_DEV); \
+	docker push $(ECR_URL_DEV):latest-$(GPU_TAG); \
+	docker push $(ECR_URL_DEV):make-$(GIT_SHA)-$(GPU_TAG)
+	@echo "Cleaning up dangling Docker images..."; \
+	docker image prune -f --filter "dangling=true"
+
+publish-dev-cpu: dist-dev-cpu ## Build, tag and push no-GPU container (intended for developer-based manual publish)
+	@aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $(ECR_URL_DEV); \
+	docker push $(ECR_URL_DEV):latest-$(CPU_TAG); \
+	docker push $(ECR_URL_DEV):make-$(GIT_SHA)-$(CPU_TAG)
+	@echo "Cleaning up dangling Docker images..."; \
+	docker image prune -f --filter "dangling=true"
 
-publish-dev: dist-dev ## Build, tag and push (intended for developer-based manual publish)
-	@ARCH_TAG=$$(cat .arch_tag); \
-	aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $(ECR_URL_DEV); \
-	docker push $(ECR_URL_DEV):$$ARCH_TAG; \
-	docker push $(ECR_URL_DEV):make-$$ARCH_TAG; \
-	docker push $(ECR_URL_DEV):make-$(shell git describe --always); \
-    echo "Cleaning up dangling Docker images..."; \
-    docker image prune -f --filter "dangling=true"
+publish-dev-all: publish-dev-gpu publish-dev-cpu ## Publish both images to AWS
 
 docker-clean: ## Clean up Docker detritus
-	@ARCH_TAG=$$(cat .arch_tag); \
 	echo "Cleaning up Docker leftovers (containers, images, builders)"; \
-	docker rmi -f $(ECR_URL_DEV):$$ARCH_TAG; \
-	docker rmi -f $(ECR_URL_DEV):make-$$ARCH_TAG; \
-	docker rmi -f $(ECR_URL_DEV):make-$(shell git describe --always) || true; \
-    docker rmi -f $(ECR_NAME_DEV):$$ARCH_TAG || true; \
-	docker buildx rm $(ECR_NAME_DEV) || true
-	@rm -rf .arch_tag
+	docker rmi -f $(ECR_URL_DEV):latest-$(GPU_TAG) $(ECR_URL_DEV):latest-$(CPU_TAG) || true; \
+	docker rmi -f $(ECR_URL_DEV):make-$(GIT_SHA)-$(GPU_TAG) $(ECR_URL_DEV):make-$(GIT_SHA)-$(CPU_TAG) || true; \
+    docker rmi -f $(ECR_NAME_DEV):latest-$(GPU_TAG) $(ECR_NAME_DEV):latest-$(CPU_TAG) || true; \
+	docker buildx rm $(ECR_NAME_DEV) || true; \
+	docker buildx prune -af || true
diff --git a/README.md b/README.md
index 170933d..68cf74d 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,26 @@ A CLI application for creating embeddings for TIMDEX.
 - To update dependencies: `make update`
 - To run unit tests: `make test`
 - To lint the repo: `make lint`
-- To run the app: `my-app --help` (Note the hyphen `-` vs underscore `_` that matches the `project.scripts` in `pyproject.toml`)
+- To run the app: `embeddings --help`
+  - see below for more details about running the CLI with `.env` files and arguments
+
+### Building Docker Images
+
+This project is unusual in that we have **two, distinct** Docker files for building:
+
+- `Dockerfile-cpu`
+  - targets an `arm64` architecture for CPU-only inference
+  - targets AWS Fargate ECS environment
+  - also a good fit for running locally on `arm64` machines
+- `Dockerfile-gpu`
+  - targets an `amd64` architecture for CPU or GPU inference
+  - targets AWS EC2 compute environment
+
+Note the Docker image build commands in the `Makefile`, allowing for building a CPU image, a GPU image, or both.  
+
+Also note that due to the size of the AWS Deep Learning Container (DLC) base images, these images can be quite large (~4gb for CPU, ~16gb for GPU).  For successful builds locally, you may need to increase the "Disk usage limit" in your local Docker environment; observed failures at 50gb, success at 96gb.
+
+See the following ADR for more background: [01-parallel-builds-both-archs.md](docs/adrs/01-parallel-builds-both-archs.md).
 
 ## Environment Variables
 
@@ -26,13 +45,15 @@ WORKSPACE=### Set to `dev` for local development, this will be set to `stage` an
 TE_MODEL_URI=# HuggingFace model URI
 TE_MODEL_PATH=# Path where the model will be downloaded to and loaded from
 HF_HUB_DISABLE_PROGRESS_BARS=#boolean to use progress bars for HuggingFace model downloads; defaults to 'true' in deployed contexts
-# inference performance tuning
+
 TE_TORCH_DEVICE=# defaults to 'cpu', but can be set to 'mps' for Apple Silicon, or theoretically 'cuda' for GPUs
 TE_BATCH_SIZE=# batch size for each inference worker, defaults to 32
 TE_NUM_WORKERS=# number of parallel model inference workers, defaults to 1
 TE_CHUNK_SIZE=# number of batches each parallel worker grabs; no effect if TE_NUM_WORKERS=1
 OMP_NUM_THREADS=# torch env var that sets thread usage during inference, default is not setting and using torch defaults
 MKL_NUM_THREADS=# torch env var that sets thread usage during inference, default is not setting and using torch defaults
+
+EMBEDDING_BATCH_SIZE=# controls batch size sent to model for embedding generation, primary memory management knob, defaults to 100
 ```
 
 ## Configuring an Embedding Model
diff --git a/docs/adrs/01-parallel-builds-both-archs.md b/docs/adrs/01-parallel-builds-both-archs.md
new file mode 100644
index 0000000..a544870
--- /dev/null
+++ b/docs/adrs/01-parallel-builds-both-archs.md
@@ -0,0 +1,19 @@
+# 1. Parallel Builds for both ARM64 and AMD64
+
+## Status
+
+Proposed
+
+## Context
+
+This application was originally conceived as an ECS Fargate Task, but times have changed. It is now very likely that this app will be run through AWS Batch, sometimes using GPU-enabled host servers and sometimes running as a simple Fargate task.
+
+In AWS Batch, GPU-enabled host machines only run on the AMD64 hardware (with NVIDIA GPUs attached). ARM64 (e.g., Graviton) is **not** supported for GPU-enabled EC2 instances. See [Batch EC2 Configuration](https://docs.aws.amazon.com/batch/latest/APIReference/API_Ec2Configuration.html) and [ECS-Optimized AMIs](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-optimized_AMI.html#gpuami). For the Fargate execution in AWS Batch, the ARM64 hardware is more efficient and less expensive.
+
+Our current CD workflows are built for building on either one CPU architecture or the other (`amd64` or `arm64`), but not for both. This will be the first use case for an application that will need separate and parallel builds for both AMD64 and ARM64. We could build just one multi-architecture image, but that would make for a very large image. Additionally, there will likely be different Dockerfiles for each CPU architecture based on how the container should be built for GPU-enabled jobs versus jobs with no GPU.
+
+At this time, it looks like this is the only repository that will need this special treatment for builds, so instead of trying to build a shared workflow in our [.github]() repository, we will update the local workflows to do all the work. If, in the future, we need to do something similar for another repository, we can decide to move this into a shared CD workflow. For now, it's okay to keep this as a one-off workflow here.
+
+## Decision
+
+Rebuild the dev, stage, and prod GHA workflows to build both AMD64 and ARM64 containers and push both container images to the ECR Repository in AWS. Additionally, update the `Makefile` commands for `dev` to make it easy to build one or both container images for a developer-based push to Dev1.
diff --git a/pyproject.toml b/pyproject.toml
index 7607b13..a4e3bb1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@
 
 [project]
 name = "timdex-embeddings"
-version = "1.0.0"
+version = "2.0.0"
 requires-python = ">=3.12"
 
 dependencies = [
@@ -15,7 +15,6 @@ dependencies = [
     "sentry-sdk>=2.34.1",
     "smart-open[s3]>=7.4.4",
     "timdex-dataset-api",
-    "torch>=2.9.0",
 ]
 
 [dependency-groups]
@@ -30,6 +29,17 @@ dev = [
     "ruff>=0.12.8",
 ]
 
+# dependency group for local virtual environment development work
+local = [
+    "torch>=2.6,<2.7",
+]
+
+# dependency group for arm64 / CPU AWS Deep Learning Container (DLC) base image
+dlc_arm64_cpu = []
+
+# dependency group for amd64 / GPU AWS Deep Learning Container (DLC) base image
+dlc_amd64_gpu = []
+
 [tool.black]
 line-length = 90
 
diff --git a/uv.lock b/uv.lock
index 97324f6..a2dc152 100644
--- a/uv.lock
+++ b/uv.lock
@@ -63,30 +63,30 @@ wheels = [
 
 [[package]]
 name = "boto3"
-version = "1.42.30"
+version = "1.42.33"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "botocore" },
     { name = "jmespath" },
     { name = "s3transfer" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/42/79/2dac8b7cb075cfa43908ee9af3f8ee06880d84b86013854c5cca8945afac/boto3-1.42.30.tar.gz", hash = "sha256:ba9cd2f7819637d15bfbeb63af4c567fcc8a7dcd7b93dd12734ec58601169538", size = 112809, upload-time = "2026-01-16T20:37:23.636Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d4/c7/695a39a862140dd40637a3dc0020f4f645bb78c47f0d9195db76ed7e1da2/boto3-1.42.33.tar.gz", hash = "sha256:5da0d35dd82451d4520af63f8fcc722537597d7c790035e8b3a8fc53f032be3a", size = 112844, upload-time = "2026-01-22T20:29:15.817Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/52/b3/2c0d828c9f668292e277ca5232e6160dd5b4b660a3f076f20dd5378baa1e/boto3-1.42.30-py3-none-any.whl", hash = "sha256:d7e548bea65e0ae2c465c77de937bc686b591aee6a352d5a19a16bc751e591c1", size = 140573, upload-time = "2026-01-16T20:37:22.089Z" },
+    { url = "https://files.pythonhosted.org/packages/be/93/80aa0c9c5931e72252cbf46162f5b438f040f618bb941aa85bb591c62bc9/boto3-1.42.33-py3-none-any.whl", hash = "sha256:81db4a1ef08b3a69b2c5a879e7bd26ee43ca3fd5202cd320a2aaa4f5dd11182c", size = 140574, upload-time = "2026-01-22T20:29:13.531Z" },
 ]
 
 [[package]]
 name = "botocore"
-version = "1.42.30"
+version = "1.42.33"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jmespath" },
     { name = "python-dateutil" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/44/38/23862628a0eb044c8b8b3d7a9ad1920b3bfd6bce6d746d5a871e8382c7e4/botocore-1.42.30.tar.gz", hash = "sha256:9bf1662b8273d5cc3828a49f71ca85abf4e021011c1f0a71f41a2ea5769a5116", size = 14891439, upload-time = "2026-01-16T20:37:13.77Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/8f/ea/7bfe0902a228b4aa73106e704188189ab0e16e0a0e9598fa2b126ebfe759/botocore-1.42.33.tar.gz", hash = "sha256:ecf48db73605a592b6c7f8f29e517d9eb6cf0c7e004a1fdbd9c192afc7b42b03", size = 14903415, upload-time = "2026-01-22T20:29:04.293Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3d/8d/6d7b016383b1f74dd93611b1c5078bbaddaca901553ab886dcda87cae365/botocore-1.42.30-py3-none-any.whl", hash = "sha256:97070a438cac92430bb7b65f8ebd7075224f4a289719da4ee293d22d1e98db02", size = 14566340, upload-time = "2026-01-16T20:37:10.94Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/58/da9a094c8c2499a19c57f4aedca2d5fb2c88bfb9e2931d87af41309c4521/botocore-1.42.33-py3-none-any.whl", hash = "sha256:156a1ead55c38709730c543eb8085c36098b7baf272fedc67cc4a543ae4b4cf6", size = 14575729, upload-time = "2026-01-22T20:29:00.759Z" },
 ]
 
 [[package]]
@@ -583,11 +583,11 @@ wheels = [
 
 [[package]]
 name = "jmespath"
-version = "1.0.1"
+version = "1.1.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843, upload-time = "2022-06-17T18:00:12.224Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" },
+    { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" },
 ]
 
 [[package]]
@@ -947,77 +947,69 @@ wheels = [
 
 [[package]]
 name = "nvidia-cublas-cu12"
-version = "12.8.4.1"
+version = "12.4.5.8"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/71/1c91302526c45ab494c23f61c7a84aa568b8c1f9d196efa5993957faf906/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b", size = 363438805, upload-time = "2024-04-03T20:57:06.025Z" },
 ]
 
 [[package]]
 name = "nvidia-cuda-cupti-cu12"
-version = "12.8.90"
+version = "12.4.127"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
+    { url = "https://files.pythonhosted.org/packages/67/42/f4f60238e8194a3106d06a058d494b18e006c10bb2b915655bd9f6ea4cb1/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb", size = 13813957, upload-time = "2024-04-03T20:55:01.564Z" },
 ]
 
 [[package]]
 name = "nvidia-cuda-nvrtc-cu12"
-version = "12.8.93"
+version = "12.4.127"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/14/91ae57cd4db3f9ef7aa99f4019cfa8d54cb4caa7e00975df6467e9725a9f/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338", size = 24640306, upload-time = "2024-04-03T20:56:01.463Z" },
 ]
 
 [[package]]
 name = "nvidia-cuda-runtime-cu12"
-version = "12.8.90"
+version = "12.4.127"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/27/1795d86fe88ef397885f2e580ac37628ed058a92ed2c39dc8eac3adf0619/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5", size = 883737, upload-time = "2024-04-03T20:54:51.355Z" },
 ]
 
 [[package]]
 name = "nvidia-cudnn-cu12"
-version = "9.10.2.21"
+version = "9.1.0.70"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "nvidia-cublas-cu12" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/fd/713452cd72343f682b1c7b9321e23829f00b842ceaedcda96e742ea0b0b3/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f", size = 664752741, upload-time = "2024-04-22T15:24:15.253Z" },
 ]
 
 [[package]]
 name = "nvidia-cufft-cu12"
-version = "11.3.3.83"
+version = "11.2.1.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "nvidia-nvjitlink-cu12" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
-]
-
-[[package]]
-name = "nvidia-cufile-cu12"
-version = "1.13.1.3"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
+    { url = "https://files.pythonhosted.org/packages/27/94/3266821f65b92b3138631e9c8e7fe1fb513804ac934485a8d05776e1dd43/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9", size = 211459117, upload-time = "2024-04-03T20:57:40.402Z" },
 ]
 
 [[package]]
 name = "nvidia-curand-cu12"
-version = "10.3.9.90"
+version = "10.3.5.147"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/6d/44ad094874c6f1b9c654f8ed939590bdc408349f137f9b98a3a23ccec411/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b", size = 56305206, upload-time = "2024-04-03T20:58:08.722Z" },
 ]
 
 [[package]]
 name = "nvidia-cusolver-cu12"
-version = "11.7.3.90"
+version = "11.6.1.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "nvidia-cublas-cu12" },
@@ -1025,58 +1017,50 @@ dependencies = [
     { name = "nvidia-nvjitlink-cu12" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/e1/5b9089a4b2a4790dfdea8b3a006052cfecff58139d5a4e34cb1a51df8d6f/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260", size = 127936057, upload-time = "2024-04-03T20:58:28.735Z" },
 ]
 
 [[package]]
 name = "nvidia-cusparse-cu12"
-version = "12.5.8.93"
+version = "12.3.1.170"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "nvidia-nvjitlink-cu12" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
+    { url = "https://files.pythonhosted.org/packages/db/f7/97a9ea26ed4bbbfc2d470994b8b4f338ef663be97b8f677519ac195e113d/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1", size = 207454763, upload-time = "2024-04-03T20:58:59.995Z" },
 ]
 
 [[package]]
 name = "nvidia-cusparselt-cu12"
-version = "0.7.1"
+version = "0.6.2"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
+    { url = "https://files.pythonhosted.org/packages/78/a8/bcbb63b53a4b1234feeafb65544ee55495e1bb37ec31b999b963cbccfd1d/nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:df2c24502fd76ebafe7457dbc4716b2fec071aabaed4fb7691a201cde03704d9", size = 150057751, upload-time = "2024-07-23T02:35:53.074Z" },
 ]
 
 [[package]]
 name = "nvidia-nccl-cu12"
-version = "2.27.5"
+version = "2.21.5"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
+    { url = "https://files.pythonhosted.org/packages/df/99/12cd266d6233f47d00daf3a72739872bdc10267d0383508b0b9c84a18bb6/nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0", size = 188654414, upload-time = "2024-04-03T15:32:57.427Z" },
 ]
 
 [[package]]
 name = "nvidia-nvjitlink-cu12"
-version = "12.8.93"
+version = "12.4.127"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
-]
-
-[[package]]
-name = "nvidia-nvshmem-cu12"
-version = "3.3.20"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/ff/847841bacfbefc97a00036e0fce5a0f086b640756dc38caea5e1bb002655/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57", size = 21066810, upload-time = "2024-04-03T20:59:46.957Z" },
 ]
 
 [[package]]
 name = "nvidia-nvtx-cu12"
-version = "12.8.90"
+version = "12.4.127"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
+    { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144, upload-time = "2024-04-03T20:56:12.406Z" },
 ]
 
 [[package]]
@@ -1090,11 +1074,11 @@ wheels = [
 
 [[package]]
 name = "packaging"
-version = "25.0"
+version = "26.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
 ]
 
 [[package]]
@@ -1359,11 +1343,11 @@ wheels = [
 
 [[package]]
 name = "pyparsing"
-version = "3.3.1"
+version = "3.3.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/33/c1/1d9de9aeaa1b89b0186e5fe23294ff6517fce1bc69149185577cd31016b2/pyparsing-3.3.1.tar.gz", hash = "sha256:47fad0f17ac1e2cad3de3b458570fbc9b03560aa029ed5e16ee5554da9a2251c", size = 1550512, upload-time = "2025-12-23T03:14:04.391Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f3/91/9c6ee907786a473bf81c5f53cf703ba0957b23ab84c264080fb5a450416f/pyparsing-3.3.2.tar.gz", hash = "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc", size = 6851574, upload-time = "2026-01-21T03:57:59.36Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8b/40/2614036cdd416452f5bf98ec037f38a1afb17f327cb8e6b652d4729e0af8/pyparsing-3.3.1-py3-none-any.whl", hash = "sha256:023b5e7e5520ad96642e2c6db4cb683d3970bd640cdf7115049a6e9c3682df82", size = 121793, upload-time = "2025-12-23T03:14:02.103Z" },
+    { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" },
 ]
 
 [[package]]
@@ -1792,11 +1776,11 @@ wheels = [
 
 [[package]]
 name = "setuptools"
-version = "80.9.0"
+version = "80.10.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/86/ff/f75651350db3cf2ef767371307eb163f3cc1ac03e16fdf3ac347607f7edb/setuptools-80.10.1.tar.gz", hash = "sha256:bf2e513eb8144c3298a3bd28ab1a5edb739131ec5c22e045ff93cd7f5319703a", size = 1229650, upload-time = "2026-01-21T09:42:03.061Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/76/f963c61683a39084aa575f98089253e1e852a4417cb8a3a8a422923a5246/setuptools-80.10.1-py3-none-any.whl", hash = "sha256:fc30c51cbcb8199a219c12cc9c281b5925a4978d212f84229c909636d9f6984e", size = 1099859, upload-time = "2026-01-21T09:42:00.688Z" },
 ]
 
 [[package]]
@@ -1879,14 +1863,14 @@ wheels = [
 
 [[package]]
 name = "sympy"
-version = "1.14.0"
+version = "1.13.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "mpmath" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/99/5a5b6f19ff9f083671ddf7b9632028436167cd3d33e11015754e41b249a4/sympy-1.13.1.tar.gz", hash = "sha256:9cebf7e04ff162015ce31c9c6c9144daa34a93bd082f54fd8f12deca4f47515f", size = 7533040, upload-time = "2024-07-19T09:26:51.238Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/fe/81695a1aa331a842b582453b605175f419fe8540355886031328089d840a/sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8", size = 6189177, upload-time = "2024-07-19T09:26:48.863Z" },
 ]
 
 [[package]]
@@ -1900,8 +1884,8 @@ wheels = [
 
 [[package]]
 name = "timdex-dataset-api"
-version = "3.10.0"
-source = { git = "https://github.com/MITLibraries/timdex-dataset-api#03521efa08691d3a4b3e21e7258caec37882fa64" }
+version = "3.11.0"
+source = { git = "https://github.com/MITLibraries/timdex-dataset-api#4cf98a4cb135596652130959644dd9c7b16f115b" }
 dependencies = [
     { name = "attrs" },
     { name = "boto3" },
@@ -1914,7 +1898,7 @@ dependencies = [
 
 [[package]]
 name = "timdex-embeddings"
-version = "1.0.0"
+version = "2.0.0"
 source = { editable = "." }
 dependencies = [
     { name = "click" },
@@ -1924,7 +1908,6 @@ dependencies = [
     { name = "sentry-sdk" },
     { name = "smart-open", extra = ["s3"] },
     { name = "timdex-dataset-api" },
-    { name = "torch" },
 ]
 
 [package.dev-dependencies]
@@ -1938,6 +1921,9 @@ dev = [
     { name = "pytest" },
     { name = "ruff" },
 ]
+local = [
+    { name = "torch" },
+]
 
 [package.metadata]
 requires-dist = [
@@ -1948,7 +1934,6 @@ requires-dist = [
     { name = "sentry-sdk", specifier = ">=2.34.1" },
     { name = "smart-open", extras = ["s3"], specifier = ">=7.4.4" },
     { name = "timdex-dataset-api", git = "https://github.com/MITLibraries/timdex-dataset-api" },
-    { name = "torch", specifier = ">=2.9.0" },
 ]
 
 [package.metadata.requires-dev]
@@ -1962,6 +1947,9 @@ dev = [
     { name = "pytest", specifier = ">=8.4.1" },
     { name = "ruff", specifier = ">=0.12.8" },
 ]
+dlc-amd64-gpu = []
+dlc-arm64-cpu = []
+local = [{ name = "torch", specifier = ">=2.6,<2.7" }]
 
 [[package]]
 name = "tokenizers"
@@ -2045,7 +2033,7 @@ wheels = [
 
 [[package]]
 name = "torch"
-version = "2.9.1"
+version = "2.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -2058,14 +2046,12 @@ dependencies = [
     { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "setuptools" },
     { name = "sympy" },
@@ -2073,26 +2059,14 @@ dependencies = [
     { name = "typing-extensions" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0f/27/07c645c7673e73e53ded71705045d6cb5bae94c4b021b03aa8d03eee90ab/torch-2.9.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:da5f6f4d7f4940a173e5572791af238cb0b9e21b1aab592bd8b26da4c99f1cd6", size = 104126592, upload-time = "2025-11-12T15:20:41.62Z" },
-    { url = "https://files.pythonhosted.org/packages/19/17/e377a460603132b00760511299fceba4102bd95db1a0ee788da21298ccff/torch-2.9.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:27331cd902fb4322252657f3902adf1c4f6acad9dcad81d8df3ae14c7c4f07c4", size = 899742281, upload-time = "2025-11-12T15:22:17.602Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/1a/64f5769025db846a82567fa5b7d21dba4558a7234ee631712ee4771c436c/torch-2.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:81a285002d7b8cfd3fdf1b98aa8df138d41f1a8334fd9ea37511517cedf43083", size = 110940568, upload-time = "2025-11-12T15:21:18.689Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/ab/07739fd776618e5882661d04c43f5b5586323e2f6a2d7d84aac20d8f20bd/torch-2.9.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:c0d25d1d8e531b8343bea0ed811d5d528958f1dcbd37e7245bc686273177ad7e", size = 74479191, upload-time = "2025-11-12T15:21:25.816Z" },
-    { url = "https://files.pythonhosted.org/packages/20/60/8fc5e828d050bddfab469b3fe78e5ab9a7e53dda9c3bdc6a43d17ce99e63/torch-2.9.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c29455d2b910b98738131990394da3e50eea8291dfeb4b12de71ecf1fdeb21cb", size = 104135743, upload-time = "2025-11-12T15:21:34.936Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/b7/6d3f80e6918213babddb2a37b46dbb14c15b14c5f473e347869a51f40e1f/torch-2.9.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:524de44cd13931208ba2c4bde9ec7741fd4ae6bfd06409a604fc32f6520c2bc9", size = 899749493, upload-time = "2025-11-12T15:24:36.356Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/47/c7843d69d6de8938c1cbb1eba426b1d48ddf375f101473d3e31a5fc52b74/torch-2.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:545844cc16b3f91e08ce3b40e9c2d77012dd33a48d505aed34b7740ed627a1b2", size = 110944162, upload-time = "2025-11-12T15:21:53.151Z" },
-    { url = "https://files.pythonhosted.org/packages/28/0e/2a37247957e72c12151b33a01e4df651d9d155dd74d8cfcbfad15a79b44a/torch-2.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5be4bf7496f1e3ffb1dd44b672adb1ac3f081f204c5ca81eba6442f5f634df8e", size = 74830751, upload-time = "2025-11-12T15:21:43.792Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/f7/7a18745edcd7b9ca2381aa03353647bca8aace91683c4975f19ac233809d/torch-2.9.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:30a3e170a84894f3652434b56d59a64a2c11366b0ed5776fab33c2439396bf9a", size = 104142929, upload-time = "2025-11-12T15:21:48.319Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/dd/f1c0d879f2863ef209e18823a988dc7a1bf40470750e3ebe927efdb9407f/torch-2.9.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8301a7b431e51764629208d0edaa4f9e4c33e6df0f2f90b90e261d623df6a4e2", size = 899748978, upload-time = "2025-11-12T15:23:04.568Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/9f/6986b83a53b4d043e36f3f898b798ab51f7f20fdf1a9b01a2720f445043d/torch-2.9.1-cp313-cp313t-win_amd64.whl", hash = "sha256:2e1c42c0ae92bf803a4b2409fdfed85e30f9027a66887f5e7dcdbc014c7531db", size = 111176995, upload-time = "2025-11-12T15:22:01.618Z" },
-    { url = "https://files.pythonhosted.org/packages/40/60/71c698b466dd01e65d0e9514b5405faae200c52a76901baf6906856f17e4/torch-2.9.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:2c14b3da5df416cf9cb5efab83aa3056f5b8cd8620b8fde81b4987ecab730587", size = 74480347, upload-time = "2025-11-12T15:21:57.648Z" },
-    { url = "https://files.pythonhosted.org/packages/48/50/c4b5112546d0d13cc9eaa1c732b823d676a9f49ae8b6f97772f795874a03/torch-2.9.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1edee27a7c9897f4e0b7c14cfc2f3008c571921134522d5b9b5ec4ebbc69041a", size = 74433245, upload-time = "2025-11-12T15:22:39.027Z" },
-    { url = "https://files.pythonhosted.org/packages/81/c9/2628f408f0518b3bae49c95f5af3728b6ab498c8624ab1e03a43dd53d650/torch-2.9.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:19d144d6b3e29921f1fc70503e9f2fc572cde6a5115c0c0de2f7ca8b1483e8b6", size = 104134804, upload-time = "2025-11-12T15:22:35.222Z" },
-    { url = "https://files.pythonhosted.org/packages/28/fc/5bc91d6d831ae41bf6e9e6da6468f25330522e92347c9156eb3f1cb95956/torch-2.9.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:c432d04376f6d9767a9852ea0def7b47a7bbc8e7af3b16ac9cf9ce02b12851c9", size = 899747132, upload-time = "2025-11-12T15:23:36.068Z" },
-    { url = "https://files.pythonhosted.org/packages/63/5d/e8d4e009e52b6b2cf1684bde2a6be157b96fb873732542fb2a9a99e85a83/torch-2.9.1-cp314-cp314-win_amd64.whl", hash = "sha256:d187566a2cdc726fc80138c3cdb260970fab1c27e99f85452721f7759bbd554d", size = 110934845, upload-time = "2025-11-12T15:22:48.367Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/b2/2d15a52516b2ea3f414643b8de68fa4cb220d3877ac8b1028c83dc8ca1c4/torch-2.9.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cb10896a1f7fedaddbccc2017ce6ca9ecaaf990f0973bdfcf405439750118d2c", size = 74823558, upload-time = "2025-11-12T15:22:43.392Z" },
-    { url = "https://files.pythonhosted.org/packages/86/5c/5b2e5d84f5b9850cd1e71af07524d8cbb74cba19379800f1f9f7c997fc70/torch-2.9.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:0a2bd769944991c74acf0c4ef23603b9c777fdf7637f115605a4b2d8023110c7", size = 104145788, upload-time = "2025-11-12T15:23:52.109Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/8c/3da60787bcf70add986c4ad485993026ac0ca74f2fc21410bc4eb1bb7695/torch-2.9.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:07c8a9660bc9414c39cac530ac83b1fb1b679d7155824144a40a54f4a47bfa73", size = 899735500, upload-time = "2025-11-12T15:24:08.788Z" },
-    { url = "https://files.pythonhosted.org/packages/db/2b/f7818f6ec88758dfd21da46b6cd46af9d1b3433e53ddbb19ad1e0da17f9b/torch-2.9.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c88d3299ddeb2b35dcc31753305612db485ab6f1823e37fb29451c8b2732b87e", size = 111163659, upload-time = "2025-11-12T15:23:20.009Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/35/0c52d708144c2deb595cd22819a609f78fdd699b95ff6f0ebcd456e3c7c1/torch-2.6.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:2bb8987f3bb1ef2675897034402373ddfc8f5ef0e156e2d8cfc47cacafdda4a9", size = 766624563, upload-time = "2025-01-29T16:23:19.084Z" },
+    { url = "https://files.pythonhosted.org/packages/01/d6/455ab3fbb2c61c71c8842753b566012e1ed111e7a4c82e0e1c20d0c76b62/torch-2.6.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b789069020c5588c70d5c2158ac0aa23fd24a028f34a8b4fcb8fcb4d7efcf5fb", size = 95607867, upload-time = "2025-01-29T16:25:55.649Z" },
+    { url = "https://files.pythonhosted.org/packages/18/cf/ae99bd066571656185be0d88ee70abc58467b76f2f7c8bfeb48735a71fe6/torch-2.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:7e1448426d0ba3620408218b50aa6ada88aeae34f7a239ba5431f6c8774b1239", size = 204120469, upload-time = "2025-01-29T16:24:01.821Z" },
+    { url = "https://files.pythonhosted.org/packages/81/b4/605ae4173aa37fb5aa14605d100ff31f4f5d49f617928c9f486bb3aaec08/torch-2.6.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:9a610afe216a85a8b9bc9f8365ed561535c93e804c2a317ef7fabcc5deda0989", size = 66532538, upload-time = "2025-01-29T16:24:18.976Z" },
+    { url = "https://files.pythonhosted.org/packages/24/85/ead1349fc30fe5a32cadd947c91bda4a62fbfd7f8c34ee61f6398d38fb48/torch-2.6.0-cp313-cp313-manylinux1_x86_64.whl", hash = "sha256:4874a73507a300a5d089ceaff616a569e7bb7c613c56f37f63ec3ffac65259cf", size = 766626191, upload-time = "2025-01-29T16:17:26.26Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/b0/26f06f9428b250d856f6d512413e9e800b78625f63801cbba13957432036/torch-2.6.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a0d5e1b9874c1a6c25556840ab8920569a7a4137afa8a63a32cee0bc7d89bd4b", size = 95611439, upload-time = "2025-01-29T16:21:21.061Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/9c/fc5224e9770c83faed3a087112d73147cd7c7bfb7557dcf9ad87e1dda163/torch-2.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:510c73251bee9ba02ae1cb6c9d4ee0907b3ce6020e62784e2d7598e0cfa4d6cc", size = 204126475, upload-time = "2025-01-29T16:21:55.394Z" },
+    { url = "https://files.pythonhosted.org/packages/88/8b/d60c0491ab63634763be1537ad488694d316ddc4a20eaadd639cedc53971/torch-2.6.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:ff96f4038f8af9f7ec4231710ed4549da1bdebad95923953a25045dcf6fd87e2", size = 66536783, upload-time = "2025-01-29T16:22:08.559Z" },
 ]
 
 [[package]]
@@ -2139,14 +2113,11 @@ wheels = [
 
 [[package]]
 name = "triton"
-version = "3.5.1"
+version = "3.2.0"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f2/50/9a8358d3ef58162c0a415d173cfb45b67de60176e1024f71fbc4d24c0b6d/triton-3.5.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d2c6b915a03888ab931a9fd3e55ba36785e1fe70cbea0b40c6ef93b20fc85232", size = 170470207, upload-time = "2025-11-11T17:41:00.253Z" },
-    { url = "https://files.pythonhosted.org/packages/27/46/8c3bbb5b0a19313f50edcaa363b599e5a1a5ac9683ead82b9b80fe497c8d/triton-3.5.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3f4346b6ebbd4fad18773f5ba839114f4826037c9f2f34e0148894cd5dd3dba", size = 170470410, upload-time = "2025-11-11T17:41:06.319Z" },
-    { url = "https://files.pythonhosted.org/packages/37/92/e97fcc6b2c27cdb87ce5ee063d77f8f26f19f06916aa680464c8104ef0f6/triton-3.5.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0b4d2c70127fca6a23e247f9348b8adde979d2e7a20391bfbabaac6aebc7e6a8", size = 170579924, upload-time = "2025-11-11T17:41:12.455Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/e6/c595c35e5c50c4bc56a7bac96493dad321e9e29b953b526bbbe20f9911d0/triton-3.5.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0637b1efb1db599a8e9dc960d53ab6e4637db7d4ab6630a0974705d77b14b60", size = 170480488, upload-time = "2025-11-11T17:41:18.222Z" },
-    { url = "https://files.pythonhosted.org/packages/16/b5/b0d3d8b901b6a04ca38df5e24c27e53afb15b93624d7fd7d658c7cd9352a/triton-3.5.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bac7f7d959ad0f48c0e97d6643a1cc0fd5786fe61cb1f83b537c6b2d54776478", size = 170582192, upload-time = "2025-11-11T17:41:23.963Z" },
+    { url = "https://files.pythonhosted.org/packages/06/00/59500052cb1cf8cf5316be93598946bc451f14072c6ff256904428eaf03c/triton-3.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d9b215efc1c26fa7eefb9a157915c92d52e000d2bf83e5f69704047e63f125c", size = 253159365, upload-time = "2025-01-22T19:13:24.648Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/30/37a3384d1e2e9320331baca41e835e90a3767303642c7a80d4510152cbcf/triton-3.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5dfa23ba84541d7c0a531dfce76d8bcd19159d50a4a8b14ad01e91734a5c1b0", size = 253154278, upload-time = "2025-01-22T19:13:54.221Z" },
 ]
 
 [[package]]
@@ -2192,11 +2163,11 @@ wheels = [
 
 [[package]]
 name = "wcwidth"
-version = "0.2.14"
+version = "0.3.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/24/30/6b0809f4510673dc723187aeaf24c7f5459922d01e2f794277a3dfb90345/wcwidth-0.2.14.tar.gz", hash = "sha256:4d478375d31bc5395a3c55c40ccdf3354688364cd61c4f6adacaa9215d0b3605", size = 102293, upload-time = "2025-09-22T16:29:53.023Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/38/75/2144b65e4fba12a2d9868e9a3f99db7fa0760670d064603634bef9ff1709/wcwidth-0.3.0.tar.gz", hash = "sha256:af1a2fb0b83ef4a7fc0682a4c95ca2576e14d0280bca2a9e67b7dc9f2733e123", size = 172238, upload-time = "2026-01-21T17:44:09.508Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/af/b5/123f13c975e9f27ab9c0770f514345bd406d0e8d3b7a0723af9d43f710af/wcwidth-0.2.14-py2.py3-none-any.whl", hash = "sha256:a7bb560c8aee30f9957e5f9895805edd20602f2d7f720186dfd906e82b4982e1", size = 37286, upload-time = "2025-09-22T16:29:51.641Z" },
+    { url = "https://files.pythonhosted.org/packages/18/0e/a5f0257ab47492b7afb5fb60347d14ba19445e2773fc8352d4be6bd2f6f8/wcwidth-0.3.0-py3-none-any.whl", hash = "sha256:073a1acb250e4add96cfd5ef84e0036605cd6e0d0782c8c15c80e42202348458", size = 85520, upload-time = "2026-01-21T17:44:08.002Z" },
 ]
 
 [[package]]

From 27943c573a647695ddefdac628d6a4e1ddfecfb1 Mon Sep 17 00:00:00 2001
From: Graham Hukill <ghukill@gmail.com>
Date: Fri, 23 Jan 2026 11:17:47 -0500
Subject: [PATCH 2/4] Add updated dev and stage GHA workflows

---
 .github/workflows/dev-build.yml   | 134 +++++++++++++++++++++---------
 .github/workflows/stage-build.yml | 128 +++++++++++++++++++---------
 2 files changed, 185 insertions(+), 77 deletions(-)

diff --git a/.github/workflows/dev-build.yml b/.github/workflows/dev-build.yml
index d9468a3..a4d78cd 100644
--- a/.github/workflows/dev-build.yml
+++ b/.github/workflows/dev-build.yml
@@ -1,9 +1,9 @@
-### This is the Terraform-generated dev-build.yml workflow for the         ### 
-### timdex-embeddings-dev app repository.                                                 ###
-### If this is a Lambda repo, uncomment the FUNCTION line at the end of    ### 
-### the document. If the container requires any additional pre-build       ### 
-### commands, uncomment and edit the PREBUILD line at the end of the       ###
-### document.                                                              ###
+### This is a custom dev-build.yml workflow for the                        ###
+### timdex-embeddings-dev app repository. It is customized in order to     ###
+### support parallel builds for amd64 and/or arm64 for AWS Batch compute   ###
+### environments that have GPUs and compute environments that do NOT have  ###
+### GPUs.                                                                  ###
+
 
 name: Dev Container Build and Deploy
 on:
@@ -13,48 +13,106 @@ on:
       - main
     paths-ignore:
       - '.github/**'
+      - 'docs/**'
+      - 'tests/**'
 
 permissions:
   id-token: write
   contents: read
 
+env:
+  AWS_REGION: "us-east-1"
+  GHA_ROLE: "timdex-embeddings-gha-dev"
+  REPOSITORY: "timdex-embeddings-dev"
+
 jobs:
-  prep:
-    name: Prep for Build
+  choose-runners:
+    name: Determine Runner architecture from .aws-architecture file
     runs-on: ubuntu-latest
-    outputs: 
-      cpuarch: ${{ steps.setarch.outputs.cpuarch }}
+    outputs:
+      gpu_arch: ${{ steps.out.outputs.gpu_arch }}
+      cpu_arch: ${{ steps.out.outputs.cpu_arch }}
+    steps:
+      - uses: actions/checkout@v5
+
+      - id: out
+        run: |
+          GPU_ARCH=$(jq -r '.gpu // "linux/amd64"' .aws-architecture)
+          CPU_ARCH=$(jq -r '.cpu // "linux/amd64"' .aws-architecture)
+          echo "gpu_arch=$GPU_ARCH" >> $GITHUB_OUTPUT
+          echo "cpu_arch=$CPU_ARCH" >> $GITHUB_OUTPUT
+
+  build:
+    name: Build ${{ matrix.variant }} (${{ matrix.arch }})
+    needs: choose-runners
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - variant: gpu
+            arch: ${{ needs.choose-runners.outputs.gpu_arch }}
+          - variant: cpu
+            arch: ${{ needs.choose-runners.outputs.cpu_arch }}
+    runs-on: ${{ matrix.arch == 'linux/arm64' && 'ubuntu-24.04-arm' || 'ubuntu-latest' }}
+
     steps:
-      - name: Checkout
+      - name: Checkout code
         uses: actions/checkout@v5
 
-      - name: Set CPU Architecture
-        id: setarch
+      - name: Set Tags
+        id: tags
         run: |
-          echo "### :abacus: Architecture Selection" >> $GITHUB_STEP_SUMMARY
-          if [[ -f .aws-architecture ]]; then
-            ARCH=$(cat .aws-architecture)
-            echo "\`$ARCH\` was read from \`.aws-architecture\` and passed to the deploy job." >> $GITHUB_STEP_SUMMARY
+          KEY=${{ matrix.variant }}
+          ARCH=${{ matrix.arch }}
+          TAG_ARCH=$(echo "$ARCH" | cut -d'/' -f2)
+          if [ "$GITHUB_EVENT_NAME" = "workflow_dispatch" ]; then
+            TAG_SHA=$(echo $GITHUB_SHA | cut -c 1-8)
+            TAG_PR=$GITHUB_EVENT_NAME
           else
-            ARCH="linux/amd64"
-            echo "No \`.aws-architecture\` file, so default \`$ARCH\` was passed to the deploy job." >> $GITHUB_STEP_SUMMARY
+            TAG_SHA=$(echo ${{ github.event.pull_request.head.sha }} | cut -c 1-8)
+            TAG_PR="PR-${{ github.event.pull_request.number }}"
           fi
-          if [[ "$ARCH" != "linux/arm64" && "$ARCH" != "linux/amd64" ]]; then
-            echo "$ARCH is INVALID architecture!"
-            echo "$ARCH is INVALID architecture!" >> $GITHUB_STEP_SUMMARY
-            exit 1
-          fi
-          echo "cpuarch=$ARCH" >> $GITHUB_OUTPUT
-
-  deploy:
-    needs: prep
-    name: Dev Deploy
-    uses: mitlibraries/.github/.github/workflows/ecr-multi-arch-deploy-dev.yml@main
-    secrets: inherit
-    with:
-      AWS_REGION: "us-east-1"
-      GHA_ROLE: "timdex-embeddings-gha-dev"
-      ECR: "timdex-embeddings-dev"
-      CPU_ARCH: ${{ needs.prep.outputs.cpuarch }}
-      # FUNCTION: ""
-      # PREBUILD: 
+          echo "arch=${ARCH}" >> $GITHUB_OUTPUT
+          echo "tag_latest=latest-${TAG_ARCH}-${KEY}" >> $GITHUB_OUTPUT 
+          echo "tag_sha=${TAG_SHA}-${TAG_ARCH}-${KEY}" >> $GITHUB_OUTPUT
+          echo "tag_pr=${TAG_PR}-${TAG_ARCH}-${KEY}" >> $GITHUB_OUTPUT
+
+      - name: Free Disk Space
+        run: |
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
+      - name: Configure AWS Dev credentials
+        uses: aws-actions/configure-aws-credentials@v5
+        with:
+          aws-region: ${{ env.AWS_REGION }}
+          role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCT_DEV }}:role/${{ env.GHA_ROLE }}
+
+      - name: Login to Dev ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Setup BuildX Builder
+        id: buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver: docker-container
+
+      - name: Build and push (${{ matrix.variant }})
+        uses: docker/build-push-action@v6
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          context: .
+          file: ./Dockerfile-${{ matrix.variant }}
+          pull: true
+          push: true
+          sbom: false
+          provenance: false
+          tags: |
+            ${{ steps.login-ecr.outputs.registry }}/${{ env.REPOSITORY }}:${{ steps.tags.outputs.tag_latest }}
+            ${{ steps.login-ecr.outputs.registry }}/${{ env.REPOSITORY }}:${{ steps.tags.outputs.tag_sha }}
+            ${{ steps.login-ecr.outputs.registry }}/${{ env.REPOSITORY }}:${{ steps.tags.outputs.tag_pr }}
diff --git a/.github/workflows/stage-build.yml b/.github/workflows/stage-build.yml
index d1cf541..53b4d91 100644
--- a/.github/workflows/stage-build.yml
+++ b/.github/workflows/stage-build.yml
@@ -1,9 +1,8 @@
-### This is the Terraform-generated stage-build.yml workflow for the       ###
-### timdex-embeddings-stage app repository.                                ###
-### If this is a Lambda repo, uncomment the FUNCTION line at the end of    ###
-### the document. If the container requires any additional pre-build       ###
-### commands, uncomment and edit the PREBUILD line at the end of the       ###
-### document.                                                              ###
+### This is a custom stage-build.yml workflow for the                      ###
+### timdex-embeddings-stage app repository. It is customized in order to   ###
+### support parallel builds for amd64 and/or arm64 for AWS Batch compute   ###
+### environments that have GPUs and compute environments that do NOT have  ###
+### GPUs.                                                                  ###
 
 name: Stage Container Build and Deploy
 on:
@@ -13,47 +12,98 @@ on:
       - main
     paths-ignore:
       - '.github/**'
+      - 'docs/**'
+      - 'tests/**'
 
 permissions:
   id-token: write
   contents: read
 
+env:
+  AWS_REGION: "us-east-1"
+  GHA_ROLE: "timdex-embeddings-gha-stage"
+  REPOSITORY: "timdex-embeddings-stage"
+
 jobs:
-  prep:
-    name: Prep for Build
+  choose-runners:
+    name: Determine Runner architecture from .aws-architecture file
     runs-on: ubuntu-latest
-    outputs: 
-      cpuarch: ${{ steps.setarch.outputs.cpuarch }}
+    outputs:
+      gpu_arch: ${{ steps.out.outputs.gpu_arch }}
+      cpu_arch: ${{ steps.out.outputs.cpu_arch }}
+    steps:
+      - uses: actions/checkout@v5
+
+      - id: out
+        run: |
+          GPU_ARCH=$(jq -r '.gpu // "linux/amd64"' .aws-architecture)
+          CPU_ARCH=$(jq -r '.cpu // "linux/amd64"' .aws-architecture)
+          echo "gpu_arch=$GPU_ARCH" >> $GITHUB_OUTPUT
+          echo "cpu_arch=$CPU_ARCH" >> $GITHUB_OUTPUT
+
+  build:
+    name: Build ${{ matrix.variant }} (${{ matrix.arch }})
+    needs: choose-runners
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - variant: gpu
+            arch: ${{ needs.choose-runners.outputs.gpu_arch }}
+          - variant: cpu
+            arch: ${{ needs.choose-runners.outputs.cpu_arch }}
+    runs-on: ${{ matrix.arch == 'linux/arm64' && 'ubuntu-24.04-arm' || 'ubuntu-latest' }}
+
     steps:
-      - name: Checkout
+      - name: Checkout code
         uses: actions/checkout@v5
 
-      - name: Set CPU Architecture
-        id: setarch
+      - name: Set Tags
+        id: tags
         run: |
-          echo "### :abacus: Architecture Selection" >> $GITHUB_STEP_SUMMARY
-          if [[ -f .aws-architecture ]]; then
-            ARCH=$(cat .aws-architecture)
-            echo "\`$ARCH\` was read from \`.aws-architecture\` and passed to the deploy job." >> $GITHUB_STEP_SUMMARY
-          else
-            ARCH="linux/amd64"
-            echo "No \`.aws-architecture\` file, so default \`$ARCH\` was passed to the deploy job." >> $GITHUB_STEP_SUMMARY
-          fi
-          if [[ "$ARCH" != "linux/arm64" && "$ARCH" != "linux/amd64" ]]; then
-            echo "$ARCH is INVALID architecture!"
-            echo "$ARCH is INVALID architecture!" >> $GITHUB_STEP_SUMMARY
-            exit 1
-          fi
-          echo "cpuarch=$ARCH" >> $GITHUB_OUTPUT
-
-  deploy:
-    needs: prep
-    name: Stage Deploy
-    uses: mitlibraries/.github/.github/workflows/ecr-multi-arch-deploy-stage.yml@main
-    secrets: inherit
-    with:
-      AWS_REGION: "us-east-1"
-      GHA_ROLE: "timdex-embeddings-gha-stage"
-      ECR: "timdex-embeddings-stage"
-      CPU_ARCH: ${{ needs.prep.outputs.cpuarch }}
-      # PREBUILD: 
+          KEY=${{ matrix.variant }}
+          ARCH=${{ matrix.arch }}
+          TAG_ARCH=$(echo "$ARCH" | cut -d'/' -f2)
+          TAG_SHA=$(echo $GITHUB_SHA | cut -c 1-8)
+          echo "arch=${ARCH}" >> $GITHUB_OUTPUT
+          echo "tag_latest=latest-${TAG_ARCH}-${KEY}" >> $GITHUB_OUTPUT 
+          echo "tag_sha=${TAG_SHA}-${TAG_ARCH}-${KEY}" >> $GITHUB_OUTPUT
+
+      - name: Free Disk Space
+        run: |
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
+      - name: Configure AWS Stage credentials
+        uses: aws-actions/configure-aws-credentials@v5
+        with:
+          aws-region: ${{ env.AWS_REGION }}
+          role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCT_STAGE }}:role/${{ env.GHA_ROLE }}
+
+      - name: Login to Stage ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Setup BuildX Builder
+        id: buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver: docker-container
+
+      - name: Build and push (${{ matrix.variant }})
+        uses: docker/build-push-action@v6
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          context: .
+          file: ./Dockerfile-${{ matrix.variant }}
+          pull: true
+          push: true
+          sbom: false
+          provenance: false
+          tags: |
+            ${{ steps.login-ecr.outputs.registry }}/${{ env.REPOSITORY }}:${{ steps.tags.outputs.tag_latest }}
+            ${{ steps.login-ecr.outputs.registry }}/${{ env.REPOSITORY }}:${{ steps.tags.outputs.tag_sha }}

From fbe2e4209d5f47150658b5364bbf1e60ec94dc04 Mon Sep 17 00:00:00 2001
From: Graham Hukill <ghukill@gmail.com>
Date: Fri, 23 Jan 2026 14:05:45 -0500
Subject: [PATCH 3/4] Additional CD documentation

---
 README.md                                   |  4 +-
 docs/continuous-delivery-parallel-builds.md | 55 +++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)
 create mode 100644 docs/continuous-delivery-parallel-builds.md

diff --git a/README.md b/README.md
index 68cf74d..e44f0ba 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,9 @@ Note the Docker image build commands in the `Makefile`, allowing for building a
 
 Also note that due to the size of the AWS Deep Learning Container (DLC) base images, these images can be quite large (~4gb for CPU, ~16gb for GPU).  For successful builds locally, you may need to increase the "Disk usage limit" in your local Docker environment; observed failures at 50gb, success at 96gb.
 
-See the following ADR for more background: [01-parallel-builds-both-archs.md](docs/adrs/01-parallel-builds-both-archs.md).
+See the following for more information:
+- [ADR](docs/adrs/01-parallel-builds-both-archs.md)
+- [Continuous Delivery (CD) documentation](docs/continuous-delivery-parallel-builds.md)
 
 ## Environment Variables
 
diff --git a/docs/continuous-delivery-parallel-builds.md b/docs/continuous-delivery-parallel-builds.md
new file mode 100644
index 0000000..2b4dff1
--- /dev/null
+++ b/docs/continuous-delivery-parallel-builds.md
@@ -0,0 +1,55 @@
+# The CD pipeline for multiple, parallel builds
+
+This application has a unique CD pipeline for pushing container images to AWS. All of our other applications will push either an AMD64-based image or an ARM64-based image to the ECR Repository while this one expects to build both.
+
+This application is destined to run in a Compute Environment in AWS Batch and, depending on the data to be processed, the application might run better in an environment that has a GPU or it might run better in an environment without a GPU. (By "better" we mean both faster and at a lower cost. See [Addendum: Use AWS Batch as Compute for Embeddings](https://mitlibraries.atlassian.net/wiki/spaces/D/pages/4832493614/Engineering+Plan+Record+Embeddings+ETL#Addendum%3A-Use-AWS-Batch-as-Compute-for-Embeddings) for more details.)
+
+## Two Dockerfiles
+
+We separate the two builds by leveraging two different Dockerfiles:
+
+* `Dockerfile-gpu`: The Dockerfile that defines the build for any GPU-enabled containers
+* `Dockerfile-cpu`: The Dockerfile that defines the build for containers that will not use GPUs
+
+At this time, AWS Batch only supports the `amd64` architecture for AWS Batch compute environments that leverage GPUs. Otherwise, for runs of this application that do not require GPUs, the `arm64` architecture is less expensive and more efficient.
+
+## CPU Architecture
+
+We will stick with a single `.aws-architecture` file to manage the CPU architecture choice, but we will format it as a simple list of key/value pairs and leverage `jq` to parse the information in our `make` commands and GitHub workflows. The file will look like
+
+```json
+{
+    "gpu": "linux/<arch>",
+    "cpu": "linux/<arch>"
+}
+```
+
+Where `<arch>` is either `amd64` or `arm64`.
+
+## Makefile configuration
+
+There are a collection of targets in the Makefile for generating Docker images locally and pushing those local builds to the ECR Repository in the Dev1 AWS Account. The tags generated by running these targets will include the work `make` so that it is clear in the AWS Console that the image came from a developer, not from GitHub Actions.
+
+## GitHub Actions
+
+There are three GitHub Actions workflows for automated build+deploy to AWS. These do **NOT** depend on the `Makefile` targets at all. While the triggers follow our typical GitHub-flow, the actual build process is different from the rest of our application respositories
+
+### Dev Workflow
+
+1. There is an initial job that runs and parses the `.aws-architecture` file and generates outputs that will drive the next phase.
+1. The second phase of the workflow is a matrix strategy that will kick off two runners, one for each build. The runner is picked to match the CPU architecture of the requested build. That is, if the `gpu` key in the `.aws-architecture` file specifies `linux/amd64`, then the runner for the `gpu` container will be an `amd64`-based runner. If the `cpu` key in the `.aws-architecture` file specifies `linux/arm64` then the `cpu` container will be an `arm64`-based runner. This way, when Docker runs, it is running on the same architecture as the container it is trying to build.
+
+### Stage Workflow
+
+The Stage workflow is the same as the Dev workflow, only the trigger is different (it runs on `push` instead of `pull_request`).
+
+### Prod Workflow
+
+Similar to our shared workflows, the Prod workflow will 
+
+1. Verify that the SHAs match between stage & prod
+1. Download the images from the Stage ECR
+1. Re-tag the images for Prod
+1. Upload the images to Prod ECR
+
+There is no need for a matrix or different runners since we aren't building anything.

From 701b3e3969505e8e7876d9a3beae99a071918740 Mon Sep 17 00:00:00 2001
From: Graham Hukill <ghukill@gmail.com>
Date: Fri, 23 Jan 2026 14:49:18 -0500
Subject: [PATCH 4/4] Prevent GHA actions for README updates

---
 .github/workflows/dev-build.yml   | 1 +
 .github/workflows/stage-build.yml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/dev-build.yml b/.github/workflows/dev-build.yml
index a4d78cd..699ae06 100644
--- a/.github/workflows/dev-build.yml
+++ b/.github/workflows/dev-build.yml
@@ -15,6 +15,7 @@ on:
       - '.github/**'
       - 'docs/**'
       - 'tests/**'
+      - 'README.md'
 
 permissions:
   id-token: write
diff --git a/.github/workflows/stage-build.yml b/.github/workflows/stage-build.yml
index 53b4d91..643d1e8 100644
--- a/.github/workflows/stage-build.yml
+++ b/.github/workflows/stage-build.yml
@@ -14,6 +14,7 @@ on:
       - '.github/**'
       - 'docs/**'
       - 'tests/**'
+      - 'README.md'
 
 permissions:
   id-token: write