Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
6fef38f
reorganize folders for gpu jobs
homework36 Jul 30, 2025
ac24e18
modify install job script and make individual dockerfile
homework36 Jul 30, 2025
ce47305
use one base block for gpu worker and separate services
homework36 Aug 5, 2025
d059eeb
add service profiles for gpu jobs
homework36 Aug 5, 2025
fb3377d
remove incorrect environment lines
homework36 Aug 5, 2025
391ff92
fix dockerfile path
homework36 Aug 5, 2025
29a305c
fix dockerfile path
homework36 Aug 5, 2025
f01e0ad
fix script copy relative path
homework36 Aug 5, 2025
0d5c22b
add entrypoint file
homework36 Aug 5, 2025
259e6c6
fix relative rodan path
homework36 Aug 11, 2025
106fa10
use no cache install
homework36 Aug 11, 2025
24d0f78
leave only one gpu job for testing
homework36 Aug 11, 2025
2e2f1f4
fix path in bg removal set up
homework36 Aug 11, 2025
2303574
remove entry point
homework36 Aug 11, 2025
95abda0
fix path in install gpu jobs
homework36 Aug 11, 2025
5263fbe
Delete .DS_Store
homework36 Aug 11, 2025
185a8d3
comment out job install in gpu script for now
homework36 Aug 11, 2025
c50d999
remove entrypoint...
homework36 Aug 11, 2025
801231e
comment out everything in install gpu jobs for now
homework36 Aug 11, 2025
7e73471
add back other jobs since bg removal works now
homework36 Aug 11, 2025
fe26ea7
leave only paco for testing
homework36 Aug 12, 2025
74cf437
fix path
homework36 Aug 12, 2025
7dfb1f9
add back sae
homework36 Aug 12, 2025
086bb79
try to clean up before build in ci to solve "no enough space left""
homework36 Aug 12, 2025
b8e142c
add back text alignment...
homework36 Aug 12, 2025
d772dfe
comment out notes
homework36 Aug 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion .github/workflows/CI-celery-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ concurrency:
cancel-in-progress: true

jobs:

# ------- Celery Controller -------
celery-controller-build:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -45,9 +46,21 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Free disk space before Docker build
run: |
echo "=== Disk usage before cleanup ==="
df -h
sudo rm -rf /usr/local/lib/android || true
sudo rm -rf /opt/ghc || true
sudo rm -rf /usr/share/dotnet || true
docker system prune -af --volumes || true
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
echo "=== Disk usage after cleanup ==="
df -h
- uses: docker/setup-compose-action@v1
with:
version: latest
- name: Build GPU-related celery workers
run: docker compose build celery-gpu-workers
run: docker compose --profile celery-gpu-workers build

187 changes: 187 additions & 0 deletions celery/gpu-workers/gpu-background-removal/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
#
# THIS IS A GENERATED DOCKERFILE.
#
# This file was assembled from multiple pieces, whose use is documented
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.

ARG UBUNTU_VERSION=18.04
ARG CUDA=11.2
FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.2-base-ubuntu${UBUNTU_VERSION} AS base
# ARCH and CUDA are specified again because the FROM directive resets ARGs
# (but their default value is retained if set previously)
ARG ARCH
ARG CUDA
ARG CUDNN=8.1.0.77-1
ARG CUDNN_MAJOR_VERSION=8
ARG LIB_DIR_PREFIX=x86_64
ARG LIBNVINFER=7.2.2-1
ARG LIBNVINFER_MAJOR_VERSION=7

# The following two arguments are rodan-specific
ARG BRANCHES
ARG VERSION

# Needed for string substitution
SHELL ["/bin/bash", "-c"]
# Pick up some TF dependencies
#RUN apt-get update

#RUN rm -rf /etc/apt/sources.list.d/cuda.list

RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cuda-command-line-tools-${CUDA/./-} \
libcublas-${CUDA/./-} \
cuda-nvrtc-${CUDA/./-} \
libcufft-${CUDA/./-} \
libcurand-${CUDA/./-} \
libcusolver-${CUDA/./-} \
libcusparse-${CUDA/./-} \
curl \
libcudnn8=${CUDNN}+cuda${CUDA} \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
pkg-config \
software-properties-common \
unzip \
wget
# added wget

# Install TensorRT if not building for PowerPC
# NOTE: libnvinfer uses cuda11.1 versions
# RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
# apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.1 \
# libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.1 \
# && apt-get clean \
# && rm -rf /var/lib/apt/lists/*; }

# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH

# Link the libcuda stub to the location where tensorflow is searching for it and reconfigure
# dynamic linker run-time bindings
RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 \
&& echo "/usr/local/cuda/lib64/stubs" > /etc/ld.so.conf.d/z-cuda-stubs.conf \
&& ldconfig

# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8

# This section differs from the default tensorflow2.5.1 Dockerfile, because we specifically add python 3.7;
ARG PYTHON=python3.7
ARG TF_PACKAGE=tensorflow
ARG TF_PACKAGE_VERSION=2.5.1

RUN apt-get update && apt-get install -y --no-install-recommends \
python3.7 \
python3-pip \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
&& wget https://bootstrap.pypa.io/pip/3.7/get-pip.py \
&& ${PYTHON} get-pip.py \
&& ln -sf /usr/bin/${PYTHON} /usr/local/bin/python3 \
&& ln -sf /usr/local/bin/pip /usr/local/bin/pip3 \
&& pip3 --no-cache-dir install --upgrade pip setuptools==57.0.0 \
# Some TF tools expect a "python" binary
&& ln -s $(which python3) /usr/local/bin/python \
&& python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}

# RUN ln -s $(which python3) /usr/local/bin/python

# Options:
# tensorflow
# tensorflow-gpu
# tf-nightly
# tf-nightly-gpu
# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version.
# Installs the latest version by default.

# COPY bashrc /etc/bash.bashrc
# RUN chmod a+rwx /etc/bash.bashrc

# This ends the material obtained from TensorFlow's dockerfile. the remainder is rodan-docker-specific setup.

# FROM base
RUN set -e \
&& apt-get update \
&& DEBIAN_FRONTEND="noninteractive" apt-get install -yqq \
git \
# Python lxml dependencies
python3.7-dev \
python3-opencv \
libxml2-dev \
libxslt1-dev \
zlib1g-dev \
lib32ncurses5-dev \
# Psycopg2 dependencies
libpq-dev \
# For resource identification
libmagic-dev \
unzip \
# Remove when done
vim

RUN rm -rf /var/lib/apt/lists/*

# Bandaid fix for the cannot import name '_registerMatType' from 'cv2.cv2' issue
#RUN pip uninstall opencv-python-headless && pip install opencv-python-headless==4.1.2.30

# Install GPU Rodan Jobs
COPY scripts/install_gpu_jobs/install_background_removal /opt/

# Install Rodan
# Runs on both Rodan service, and Rodan-Celery
# COPY scripts/entrypoint /opt/
COPY scripts/start-celery /run/
COPY scripts/wait-for-app /run/

# Copying rodan core from build context into container
# Rodan folder MUST be uppercase, otherwise many unittests fail.
COPY backend/django /code/Rodan

# necessary for scikit-image > 0.17, or else it will try to make a cache directory
# in a place where the www-data user does not have permissions to do so
ENV SKIMAGE_DATADIR "/tmp/.skimage_cache"


RUN set -x \
# Create Folders
&& mkdir -p /code/jobs \
# Install GPU Jobs
&& chmod +x /opt/install_background_removal \
&& /opt/install_background_removal \
# Install Rodan
&& sed -i "s/lxml/#lxml/g" /code/Rodan/requirements.txt \
# && sed -i "s/pybagit==1.5.0/-e git:\/\/github.com\/deepio\/pybagit.git@a27c9e0fc3bdf99dab8bd327f3ce9ea884abd6b4#egg=pybagit/g" /code/Rodan/requirements.txt \
# Add Entrypoints
# && sed -i 's/\r//' /opt/entrypoint \
# && chmod +x /opt/entrypoint \
# Add Celery script
&& chmod +x /run/start-celery \
# Change the concurency for gpu jobs because Calvo is very expensive
&& sed -i "s/=10/=1/g" /run/start-celery \
# Script to wait for postgres and redis to be running before attempting to connect to them.
&& chmod +x /run/wait-for-app

# Install Rodan.
RUN pip3 install --no-cache-dir -r /code/Rodan/requirements.txt

RUN pip3 uninstall -y opencv-python opencv-python-headless
RUN pip3 install opencv-python-headless==4.1.2.30
Loading