diff --git a/.github/workflows/CI-celery-build.yml b/.github/workflows/CI-celery-build.yml index 6e276b1..46ce1c3 100644 --- a/.github/workflows/CI-celery-build.yml +++ b/.github/workflows/CI-celery-build.yml @@ -18,6 +18,7 @@ concurrency: cancel-in-progress: true jobs: + # ------- Celery Controller ------- celery-controller-build: runs-on: ubuntu-latest @@ -45,9 +46,21 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + - name: Free disk space before Docker build + run: | + echo "=== Disk usage before cleanup ===" + df -h + sudo rm -rf /usr/local/lib/android || true + sudo rm -rf /opt/ghc || true + sudo rm -rf /usr/share/dotnet || true + docker system prune -af --volumes || true + sudo apt-get clean + sudo rm -rf /var/lib/apt/lists/* + echo "=== Disk usage after cleanup ===" + df -h - uses: docker/setup-compose-action@v1 with: version: latest - name: Build GPU-related celery workers - run: docker compose build celery-gpu-workers + run: docker compose --profile celery-gpu-workers build \ No newline at end of file diff --git a/celery/gpu-workers/gpu-background-removal/Dockerfile b/celery/gpu-workers/gpu-background-removal/Dockerfile new file mode 100644 index 0000000..fe5a3c9 --- /dev/null +++ b/celery/gpu-workers/gpu-background-removal/Dockerfile @@ -0,0 +1,187 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=18.04 +ARG CUDA=11.2 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.2-base-ubuntu${UBUNTU_VERSION} AS base +# ARCH and CUDA are specified again because the FROM directive resets ARGs +# (but their default value is retained if set previously) +ARG ARCH +ARG CUDA +ARG CUDNN=8.1.0.77-1 +ARG CUDNN_MAJOR_VERSION=8 +ARG LIB_DIR_PREFIX=x86_64 +ARG LIBNVINFER=7.2.2-1 +ARG LIBNVINFER_MAJOR_VERSION=7 + +# The following two arguments are rodan-specific +ARG BRANCHES +ARG VERSION + +# Needed for string substitution +SHELL ["/bin/bash", "-c"] +# Pick up some TF dependencies +#RUN apt-get update + +#RUN rm -rf /etc/apt/sources.list.d/cuda.list + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + cuda-command-line-tools-${CUDA/./-} \ + libcublas-${CUDA/./-} \ + cuda-nvrtc-${CUDA/./-} \ + libcufft-${CUDA/./-} \ + libcurand-${CUDA/./-} \ + libcusolver-${CUDA/./-} \ + libcusparse-${CUDA/./-} \ + curl \ + libcudnn8=${CUDNN}+cuda${CUDA} \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + software-properties-common \ + unzip \ + wget +# added wget + +# Install TensorRT if not building for PowerPC +# NOTE: libnvinfer uses cuda11.1 versions +# RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ +# apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.1 \ +# libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.1 \ +# && apt-get clean \ +# && rm -rf /var/lib/apt/lists/*; } + +# For CUDA profiling, TensorFlow requires CUPTI. +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH + +# Link the libcuda stub to the location where tensorflow is searching for it and reconfigure +# dynamic linker run-time bindings +RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 \ + && echo "/usr/local/cuda/lib64/stubs" > /etc/ld.so.conf.d/z-cuda-stubs.conf \ + && ldconfig + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +# This section differs from the default tensorflow2.5.1 Dockerfile, because we specifically add python 3.7; +ARG PYTHON=python3.7 +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION=2.5.1 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.7 \ + python3-pip \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ + && wget https://bootstrap.pypa.io/pip/3.7/get-pip.py \ + && ${PYTHON} get-pip.py \ + && ln -sf /usr/bin/${PYTHON} /usr/local/bin/python3 \ + && ln -sf /usr/local/bin/pip /usr/local/bin/pip3 \ + && pip3 --no-cache-dir install --upgrade pip setuptools==57.0.0 \ + # Some TF tools expect a "python" binary + && ln -s $(which python3) /usr/local/bin/python \ + && python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +# RUN ln -s $(which python3) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. + +# COPY bashrc /etc/bash.bashrc +# RUN chmod a+rwx /etc/bash.bashrc + +# This ends the material obtained from TensorFlow's dockerfile. the remainder is rodan-docker-specific setup. + +# FROM base +RUN set -e \ + && apt-get update \ + && DEBIAN_FRONTEND="noninteractive" apt-get install -yqq \ + git \ + # Python lxml dependencies + python3.7-dev \ + python3-opencv \ + libxml2-dev \ + libxslt1-dev \ + zlib1g-dev \ + lib32ncurses5-dev \ + # Psycopg2 dependencies + libpq-dev \ + # For resource identification + libmagic-dev \ + unzip \ + # Remove when done + vim + +RUN rm -rf /var/lib/apt/lists/* + +# Bandaid fix for the cannot import name '_registerMatType' from 'cv2.cv2' issue +#RUN pip uninstall opencv-python-headless && pip install opencv-python-headless==4.1.2.30 + +# Install GPU Rodan Jobs +COPY scripts/install_gpu_jobs/install_background_removal /opt/ + +# Install Rodan +# Runs on both Rodan service, and Rodan-Celery +# COPY scripts/entrypoint /opt/ +COPY scripts/start-celery /run/ +COPY scripts/wait-for-app /run/ + +# Copying rodan core from build context into container +# Rodan folder MUST be uppercase, otherwise many unittests fail. +COPY backend/django /code/Rodan + +# necessary for scikit-image > 0.17, or else it will try to make a cache directory +# in a place where the www-data user does not have permissions to do so +ENV SKIMAGE_DATADIR "/tmp/.skimage_cache" + + +RUN set -x \ + # Create Folders + && mkdir -p /code/jobs \ + # Install GPU Jobs + && chmod +x /opt/install_background_removal \ + && /opt/install_background_removal \ + # Install Rodan + && sed -i "s/lxml/#lxml/g" /code/Rodan/requirements.txt \ + # && sed -i "s/pybagit==1.5.0/-e git:\/\/github.com\/deepio\/pybagit.git@a27c9e0fc3bdf99dab8bd327f3ce9ea884abd6b4#egg=pybagit/g" /code/Rodan/requirements.txt \ + # Add Entrypoints + # && sed -i 's/\r//' /opt/entrypoint \ + # && chmod +x /opt/entrypoint \ + # Add Celery script + && chmod +x /run/start-celery \ + # Change the concurency for gpu jobs because Calvo is very expensive + && sed -i "s/=10/=1/g" /run/start-celery \ + # Script to wait for postgres and redis to be running before attempting to connect to them. + && chmod +x /run/wait-for-app + +# Install Rodan. +RUN pip3 install --no-cache-dir -r /code/Rodan/requirements.txt + +RUN pip3 uninstall -y opencv-python opencv-python-headless +RUN pip3 install opencv-python-headless==4.1.2.30 \ No newline at end of file diff --git a/celery/gpu-workers/config/promtail.yaml b/celery/gpu-workers/gpu-background-removal/config/promtail.yaml similarity index 100% rename from celery/gpu-workers/config/promtail.yaml rename to celery/gpu-workers/gpu-background-removal/config/promtail.yaml diff --git a/celery/gpu-workers/gpu-paco-classifier/Dockerfile b/celery/gpu-workers/gpu-paco-classifier/Dockerfile new file mode 100644 index 0000000..f97f169 --- /dev/null +++ b/celery/gpu-workers/gpu-paco-classifier/Dockerfile @@ -0,0 +1,187 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=18.04 +ARG CUDA=11.2 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.2-base-ubuntu${UBUNTU_VERSION} AS base +# ARCH and CUDA are specified again because the FROM directive resets ARGs +# (but their default value is retained if set previously) +ARG ARCH +ARG CUDA +ARG CUDNN=8.1.0.77-1 +ARG CUDNN_MAJOR_VERSION=8 +ARG LIB_DIR_PREFIX=x86_64 +ARG LIBNVINFER=7.2.2-1 +ARG LIBNVINFER_MAJOR_VERSION=7 + +# The following two arguments are rodan-specific +ARG BRANCHES +ARG VERSION + +# Needed for string substitution +SHELL ["/bin/bash", "-c"] +# Pick up some TF dependencies +#RUN apt-get update + +#RUN rm -rf /etc/apt/sources.list.d/cuda.list + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + cuda-command-line-tools-${CUDA/./-} \ + libcublas-${CUDA/./-} \ + cuda-nvrtc-${CUDA/./-} \ + libcufft-${CUDA/./-} \ + libcurand-${CUDA/./-} \ + libcusolver-${CUDA/./-} \ + libcusparse-${CUDA/./-} \ + curl \ + libcudnn8=${CUDNN}+cuda${CUDA} \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + software-properties-common \ + unzip \ + wget +# added wget + +# Install TensorRT if not building for PowerPC +# NOTE: libnvinfer uses cuda11.1 versions +# RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ +# apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.1 \ +# libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.1 \ +# && apt-get clean \ +# && rm -rf /var/lib/apt/lists/*; } + +# For CUDA profiling, TensorFlow requires CUPTI. +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH + +# Link the libcuda stub to the location where tensorflow is searching for it and reconfigure +# dynamic linker run-time bindings +RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 \ + && echo "/usr/local/cuda/lib64/stubs" > /etc/ld.so.conf.d/z-cuda-stubs.conf \ + && ldconfig + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +# This section differs from the default tensorflow2.5.1 Dockerfile, because we specifically add python 3.7; +ARG PYTHON=python3.7 +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION=2.5.1 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.7 \ + python3-pip \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ + && wget https://bootstrap.pypa.io/pip/3.7/get-pip.py \ + && ${PYTHON} get-pip.py \ + && ln -sf /usr/bin/${PYTHON} /usr/local/bin/python3 \ + && ln -sf /usr/local/bin/pip /usr/local/bin/pip3 \ + && pip3 --no-cache-dir install --upgrade pip setuptools==57.0.0 \ + # Some TF tools expect a "python" binary + && ln -s $(which python3) /usr/local/bin/python \ + && python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +# RUN ln -s $(which python3) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. + +# COPY bashrc /etc/bash.bashrc +# RUN chmod a+rwx /etc/bash.bashrc + +# This ends the material obtained from TensorFlow's dockerfile. the remainder is rodan-docker-specific setup. + +# FROM base +RUN set -e \ + && apt-get update \ + && DEBIAN_FRONTEND="noninteractive" apt-get install -yqq \ + git \ + # Python lxml dependencies + python3.7-dev \ + python3-opencv \ + libxml2-dev \ + libxslt1-dev \ + zlib1g-dev \ + lib32ncurses5-dev \ + # Psycopg2 dependencies + libpq-dev \ + # For resource identification + libmagic-dev \ + unzip \ + # Remove when done + vim + +RUN rm -rf /var/lib/apt/lists/* + +# Bandaid fix for the cannot import name '_registerMatType' from 'cv2.cv2' issue +#RUN pip uninstall opencv-python-headless && pip install opencv-python-headless==4.1.2.30 + +# Install GPU Rodan Jobs +COPY scripts/install_gpu_jobs/install_paco_classifier /opt/ + +# Install Rodan +# Runs on both Rodan service, and Rodan-Celery +# COPY scripts/entrypoint /opt/ +COPY scripts/start-celery /run/ +COPY scripts/wait-for-app /run/ + +# Copying rodan core from build context into container +# Rodan folder MUST be uppercase, otherwise many unittests fail. +COPY backend/django /code/Rodan + +# necessary for scikit-image > 0.17, or else it will try to make a cache directory +# in a place where the www-data user does not have permissions to do so +ENV SKIMAGE_DATADIR "/tmp/.skimage_cache" + + +RUN set -x \ + # Create Folders + && mkdir -p /code/jobs \ + # Install GPU Jobs + && chmod +x /opt/install_paco_classifier \ + && /opt/install_paco_classifier \ + # Install Rodan + && sed -i "s/lxml/#lxml/g" /code/Rodan/requirements.txt \ + # && sed -i "s/pybagit==1.5.0/-e git:\/\/github.com\/deepio\/pybagit.git@a27c9e0fc3bdf99dab8bd327f3ce9ea884abd6b4#egg=pybagit/g" /code/Rodan/requirements.txt \ + # Add Entrypoints + # && sed -i 's/\r//' /opt/entrypoint \ + # && chmod +x /opt/entrypoint \ + # Add Celery script + && chmod +x /run/start-celery \ + # Change the concurency for gpu jobs because Calvo is very expensive + && sed -i "s/=10/=1/g" /run/start-celery \ + # Script to wait for postgres and redis to be running before attempting to connect to them. + && chmod +x /run/wait-for-app + +# Install Rodan. +RUN pip3 install --no-cache-dir -r /code/Rodan/requirements.txt + +RUN pip3 uninstall -y opencv-python opencv-python-headless +RUN pip3 install opencv-python-headless==4.1.2.30 \ No newline at end of file diff --git a/celery/gpu-workers/gpu-paco-classifier/config/promtail.yaml b/celery/gpu-workers/gpu-paco-classifier/config/promtail.yaml new file mode 100644 index 0000000..6fd7300 --- /dev/null +++ b/celery/gpu-workers/gpu-paco-classifier/config/promtail.yaml @@ -0,0 +1,19 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://192.168.17.170:3100/loki/api/v1/push + +scrape_configs: +- job_name: rodan_celery + static_configs: + - targets: + - localhost + labels: + job: varlogs + host: SERVER_HOST + __path__: /code/Rodan/*.log \ No newline at end of file diff --git a/celery/gpu-workers/Dockerfile b/celery/gpu-workers/gpu-sae-binarization/Dockerfile similarity index 93% rename from celery/gpu-workers/Dockerfile rename to celery/gpu-workers/gpu-sae-binarization/Dockerfile index 1674bb2..28dd96c 100644 --- a/celery/gpu-workers/Dockerfile +++ b/celery/gpu-workers/gpu-sae-binarization/Dockerfile @@ -144,17 +144,17 @@ RUN rm -rf /var/lib/apt/lists/* #RUN pip uninstall opencv-python-headless && pip install opencv-python-headless==4.1.2.30 # Install GPU Rodan Jobs -COPY ../../scripts/install_gpu_rodan_jobs /opt/ +COPY scripts/install_gpu_jobs/install_sae_binarization /opt/ # Install Rodan # Runs on both Rodan service, and Rodan-Celery -COPY ../../scripts/entrypoint /opt/ -COPY ../../scripts/start-celery /run/ -COPY ../../scripts/wait-for-app /run/ +# COPY scripts/entrypoint /opt/ +COPY scripts/start-celery /run/ +COPY scripts/wait-for-app /run/ # Copying rodan core from build context into container # Rodan folder MUST be uppercase, otherwise many unittests fail. -COPY ../../backend/django /code/Rodan +COPY backend/django /code/Rodan # necessary for scikit-image > 0.17, or else it will try to make a cache directory # in a place where the www-data user does not have permissions to do so @@ -165,14 +165,14 @@ RUN set -x \ # Create Folders && mkdir -p /code/jobs \ # Install GPU Jobs - && chmod +x /opt/install_gpu_rodan_jobs \ - && /opt/install_gpu_rodan_jobs \ + && chmod +x /opt/install_sae_binarization \ + && /opt/install_sae_binarization \ # Install Rodan && sed -i "s/lxml/#lxml/g" /code/Rodan/requirements.txt \ # && sed -i "s/pybagit==1.5.0/-e git:\/\/github.com\/deepio\/pybagit.git@a27c9e0fc3bdf99dab8bd327f3ce9ea884abd6b4#egg=pybagit/g" /code/Rodan/requirements.txt \ # Add Entrypoints - && sed -i 's/\r//' /opt/entrypoint \ - && chmod +x /opt/entrypoint \ + # && sed -i 's/\r//' /opt/entrypoint \ + # && chmod +x /opt/entrypoint \ # Add Celery script && chmod +x /run/start-celery \ # Change the concurency for gpu jobs because Calvo is very expensive @@ -181,9 +181,7 @@ RUN set -x \ && chmod +x /run/wait-for-app # Install Rodan. -RUN pip3 install -r /code/Rodan/requirements.txt +RUN pip3 install --no-cache-dir -r /code/Rodan/requirements.txt RUN pip3 uninstall -y opencv-python opencv-python-headless RUN pip3 install opencv-python-headless==4.1.2.30 - -ENTRYPOINT ["/opt/entrypoint"] diff --git a/celery/gpu-workers/gpu-sae-binarization/config/promtail.yaml b/celery/gpu-workers/gpu-sae-binarization/config/promtail.yaml new file mode 100644 index 0000000..6fd7300 --- /dev/null +++ b/celery/gpu-workers/gpu-sae-binarization/config/promtail.yaml @@ -0,0 +1,19 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://192.168.17.170:3100/loki/api/v1/push + +scrape_configs: +- job_name: rodan_celery + static_configs: + - targets: + - localhost + labels: + job: varlogs + host: SERVER_HOST + __path__: /code/Rodan/*.log \ No newline at end of file diff --git a/celery/gpu-workers/gpu-text-alignment/Dockerfile b/celery/gpu-workers/gpu-text-alignment/Dockerfile new file mode 100644 index 0000000..1a97d57 --- /dev/null +++ b/celery/gpu-workers/gpu-text-alignment/Dockerfile @@ -0,0 +1,187 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=18.04 +ARG CUDA=11.2 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.2-base-ubuntu${UBUNTU_VERSION} AS base +# ARCH and CUDA are specified again because the FROM directive resets ARGs +# (but their default value is retained if set previously) +ARG ARCH +ARG CUDA +ARG CUDNN=8.1.0.77-1 +ARG CUDNN_MAJOR_VERSION=8 +ARG LIB_DIR_PREFIX=x86_64 +ARG LIBNVINFER=7.2.2-1 +ARG LIBNVINFER_MAJOR_VERSION=7 + +# The following two arguments are rodan-specific +ARG BRANCHES +ARG VERSION + +# Needed for string substitution +SHELL ["/bin/bash", "-c"] +# Pick up some TF dependencies +#RUN apt-get update + +#RUN rm -rf /etc/apt/sources.list.d/cuda.list + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + cuda-command-line-tools-${CUDA/./-} \ + libcublas-${CUDA/./-} \ + cuda-nvrtc-${CUDA/./-} \ + libcufft-${CUDA/./-} \ + libcurand-${CUDA/./-} \ + libcusolver-${CUDA/./-} \ + libcusparse-${CUDA/./-} \ + curl \ + libcudnn8=${CUDNN}+cuda${CUDA} \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + software-properties-common \ + unzip \ + wget +# added wget + +# Install TensorRT if not building for PowerPC +# NOTE: libnvinfer uses cuda11.1 versions +# RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ +# apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.1 \ +# libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.1 \ +# && apt-get clean \ +# && rm -rf /var/lib/apt/lists/*; } + +# For CUDA profiling, TensorFlow requires CUPTI. +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH + +# Link the libcuda stub to the location where tensorflow is searching for it and reconfigure +# dynamic linker run-time bindings +RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 \ + && echo "/usr/local/cuda/lib64/stubs" > /etc/ld.so.conf.d/z-cuda-stubs.conf \ + && ldconfig + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +# This section differs from the default tensorflow2.5.1 Dockerfile, because we specifically add python 3.7; +ARG PYTHON=python3.7 +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION=2.5.1 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.7 \ + python3-pip \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* \ + && wget https://bootstrap.pypa.io/pip/3.7/get-pip.py \ + && ${PYTHON} get-pip.py \ + && ln -sf /usr/bin/${PYTHON} /usr/local/bin/python3 \ + && ln -sf /usr/local/bin/pip /usr/local/bin/pip3 \ + && pip3 --no-cache-dir install --upgrade pip setuptools==57.0.0 \ + # Some TF tools expect a "python" binary + && ln -s $(which python3) /usr/local/bin/python \ + && python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +# RUN ln -s $(which python3) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. + +# COPY bashrc /etc/bash.bashrc +# RUN chmod a+rwx /etc/bash.bashrc + +# This ends the material obtained from TensorFlow's dockerfile. the remainder is rodan-docker-specific setup. + +# FROM base +RUN set -e \ + && apt-get update \ + && DEBIAN_FRONTEND="noninteractive" apt-get install -yqq \ + git \ + # Python lxml dependencies + python3.7-dev \ + python3-opencv \ + libxml2-dev \ + libxslt1-dev \ + zlib1g-dev \ + lib32ncurses5-dev \ + # Psycopg2 dependencies + libpq-dev \ + # For resource identification + libmagic-dev \ + unzip \ + # Remove when done + vim + +RUN rm -rf /var/lib/apt/lists/* + +# Bandaid fix for the cannot import name '_registerMatType' from 'cv2.cv2' issue +#RUN pip uninstall opencv-python-headless && pip install opencv-python-headless==4.1.2.30 + +# Install GPU Rodan Jobs +COPY scripts/install_gpu_jobs/install_text_alignment /opt/ + +# Install Rodan +# Runs on both Rodan service, and Rodan-Celery +# COPY scripts/entrypoint /opt/ +COPY scripts/start-celery /run/ +COPY scripts/wait-for-app /run/ + +# Copying rodan core from build context into container +# Rodan folder MUST be uppercase, otherwise many unittests fail. +COPY backend/django /code/Rodan + +# necessary for scikit-image > 0.17, or else it will try to make a cache directory +# in a place where the www-data user does not have permissions to do so +ENV SKIMAGE_DATADIR "/tmp/.skimage_cache" + + +RUN set -x \ + # Create Folders + && mkdir -p /code/jobs \ + # Install GPU Jobs + && chmod +x /opt/install_text_alignment \ + && /opt/install_text_alignment \ + # Install Rodan + && sed -i "s/lxml/#lxml/g" /code/Rodan/requirements.txt \ + # && sed -i "s/pybagit==1.5.0/-e git:\/\/github.com\/deepio\/pybagit.git@a27c9e0fc3bdf99dab8bd327f3ce9ea884abd6b4#egg=pybagit/g" /code/Rodan/requirements.txt \ + # Add Entrypoints + # && sed -i 's/\r//' /opt/entrypoint \ + # && chmod +x /opt/entrypoint \ + # Add Celery script + && chmod +x /run/start-celery \ + # Change the concurency for gpu jobs because Calvo is very expensive + && sed -i "s/=10/=1/g" /run/start-celery \ + # Script to wait for postgres and redis to be running before attempting to connect to them. + && chmod +x /run/wait-for-app + +# Install Rodan. +RUN pip3 install --no-cache-dir -r /code/Rodan/requirements.txt + +RUN pip3 uninstall -y opencv-python opencv-python-headless +RUN pip3 install opencv-python-headless==4.1.2.30 diff --git a/celery/gpu-workers/gpu-text-alignment/config/promtail.yaml b/celery/gpu-workers/gpu-text-alignment/config/promtail.yaml new file mode 100644 index 0000000..6fd7300 --- /dev/null +++ b/celery/gpu-workers/gpu-text-alignment/config/promtail.yaml @@ -0,0 +1,19 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://192.168.17.170:3100/loki/api/v1/push + +scrape_configs: +- job_name: rodan_celery + static_configs: + - targets: + - localhost + labels: + job: varlogs + host: SERVER_HOST + __path__: /code/Rodan/*.log \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 265e5db..0827098 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,6 +12,30 @@ volumes: rodan-resources: +x-celery-gpu-worker-base: &celery-gpu-worker-base + command: bash -c "tail -f /dev/null" + environment: + TZ: America/Toronto + SERVER_HOST: localhost + depends_on: + - backend-django + - taskqueue-rabbitmq + - database-postgres + - database-redis + - celery-controller + env_file: + - ./scripts/local.env + volumes: + - "rodan-resources:/rodan/data" + - "./backend/django/code:/code/Rodan" + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu, compute, utility] + services: frontend-nginx: container_name: "frontend-nginx" @@ -115,36 +139,44 @@ services: - "rodan-resources:/rodan/data" - "./backend/django/code:/code/Rodan" - celery-gpu-workers: + celery-gpu-background-removal: + <<: *celery-gpu-worker-base + profiles: ["celery-gpu-workers"] build: context: . - dockerfile: ./celery/gpu-workers/Dockerfile - command: bash -c "tail -f /dev/null" + dockerfile: ./celery/gpu-workers/gpu-background-removal/Dockerfile environment: - TZ: America/Toronto - SERVER_HOST: localhost - # CELERY_JOB_QUEUE: GPU - depends_on: - - backend-django - - taskqueue-rabbitmq - - database-postgres - - database-redis - - celery-controller - env_file: - - ./scripts/local.env - volumes: - - "rodan-resources:/rodan/data" - - "./backend/django/code:/code/Rodan" - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu, compute, utility] - # See more: - # - https://docs.docker.com/compose/how-tos/gpu-support/ - # - https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html + CELERY_JOB_QUEUE: GPU + + celery-gpu-paco-classifier: + <<: *celery-gpu-worker-base + profiles: ["celery-gpu-workers"] + build: + context: . + dockerfile: ./celery/gpu-workers/gpu-paco-classifier/Dockerfile + environment: + CELERY_JOB_QUEUE: GPU + + celery-gpu-sae-binarization: + <<: *celery-gpu-worker-base + profiles: ["celery-gpu-workers"] + build: + context: . + dockerfile: ./celery/gpu-workers/gpu-sae-binarization/Dockerfile + environment: + CELERY_JOB_QUEUE: GPU + + celery-gpu-text-alignment: + <<: *celery-gpu-worker-base + profiles: ["celery-gpu-workers"] + build: + context: . + dockerfile: ./celery/gpu-workers/gpu-text-alignment/Dockerfile + environment: + CELERY_JOB_QUEUE: GPU + # See more: + # - https://docs.docker.com/compose/how-tos/gpu-support/ + # - https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html # Database: PostgreSQL, Redit (messaging) database-postgres: diff --git a/scripts/install_gpu_jobs/install_background_removal b/scripts/install_gpu_jobs/install_background_removal new file mode 100644 index 0000000..86b5a07 --- /dev/null +++ b/scripts/install_gpu_jobs/install_background_removal @@ -0,0 +1,34 @@ +#!/bin/sh +set -o errexit # Exit immediately if a command exits with a non-zero status. +set -o nounset # Treat unset variables as an error when substituting. +set -o xtrace # Print commands and their arguments as they are executed. + +PIP=$(which pip3) || PIP=$(which pip) + +cd /code/Rodan/code/jobs + + +cat << EOF | python3 +new_backend = """{ + "image_dim_ordering": "tf", + "epsilon": 1e-07, + "floatx": "float32", + "backend": "tensorflow" +}""" +open("keras.json", "w").write(new_backend) +EOF + +# Install Background Removal +cd / +git clone -b v1.1.1 https://github.com/DDMAL/background_removal.git +mv background_removal .background_removal +cd .background_removal +which pip3 && $PIP install --no-cache-dir . + + + +cd /code/Rodan/code +sed -i 's/#gpu //g' /code/Rodan/code/settings.py + +# Temporary fix for OpenCV until https://github.com/DDMAL/Rodan/issues/639 is resolved. +pip install opencv-python==4.6.0.66 diff --git a/scripts/install_gpu_jobs/install_paco_classifier b/scripts/install_gpu_jobs/install_paco_classifier new file mode 100644 index 0000000..135e5a1 --- /dev/null +++ b/scripts/install_gpu_jobs/install_paco_classifier @@ -0,0 +1,32 @@ +#!/bin/sh +set -o errexit # Exit immediately if a command exits with a non-zero status. +set -o nounset # Treat unset variables as an error when substituting. +set -o xtrace # Print commands and their arguments as they are executed. + +PIP=$(which pip3) || PIP=$(which pip) + +cd /code/Rodan/code/jobs + + +cat << EOF | python3 +new_backend = """{ + "image_dim_ordering": "tf", + "epsilon": 1e-07, + "floatx": "float32", + "backend": "tensorflow" +}""" +open("keras.json", "w").write(new_backend) +EOF + +# Install Paco classifier +cd / +git clone -b v2.0.4 --depth 1 https://github.com/DDMAL/Paco_classifier.git +mv Paco_classifier .Paco_classifier +cd .Paco_classifier +which pip3 && $PIP install --no-cache-dir . + +cd /code/Rodan/code +sed -i 's/#gpu //g' /code/Rodan/code/settings.py + +# Temporary fix for OpenCV until https://github.com/DDMAL/Rodan/issues/639 is resolved. +pip install opencv-python==4.6.0.66 diff --git a/scripts/install_gpu_jobs/install_sae_binarization b/scripts/install_gpu_jobs/install_sae_binarization new file mode 100644 index 0000000..c9527aa --- /dev/null +++ b/scripts/install_gpu_jobs/install_sae_binarization @@ -0,0 +1,35 @@ +#!/bin/sh +set -o errexit # Exit immediately if a command exits with a non-zero status. +set -o nounset # Treat unset variables as an error when substituting. +set -o xtrace # Print commands and their arguments as they are executed. + +PIP=$(which pip3) || PIP=$(which pip) + +cd /code/Rodan/code/jobs + + +cat << EOF | python3 +new_backend = """{ + "image_dim_ordering": "tf", + "epsilon": 1e-07, + "floatx": "float32", + "backend": "tensorflow" +}""" +open("keras.json", "w").write(new_backend) +EOF + + + +# Install SAE_binarization +cd / +git clone -b v1.1.1 https://github.com/DDMAL/SAE_binarization.git +mv SAE_binarization .SAE_binarization +cd .SAE_binarization +which pip3 && $PIP install --no-cache-dir . + + +cd /code/Rodan/code +sed -i 's/#gpu //g' /code/Rodan/code/settings.py + +# Temporary fix for OpenCV until https://github.com/DDMAL/Rodan/issues/639 is resolved. +pip install opencv-python==4.6.0.66 diff --git a/scripts/install_gpu_jobs/install_text_alignment b/scripts/install_gpu_jobs/install_text_alignment new file mode 100644 index 0000000..6331a32 --- /dev/null +++ b/scripts/install_gpu_jobs/install_text_alignment @@ -0,0 +1,28 @@ +#!/bin/sh +set -o errexit # Exit immediately if a command exits with a non-zero status. +set -o nounset # Treat unset variables as an error when substituting. +set -o xtrace # Print commands and their arguments as they are executed. + +PIP=$(which pip3) || PIP=$(which pip) + +cd /code/Rodan/code/jobs +# Install Text Alignment +$PIP install -r ./text_alignment/requirements.txt + +cat << EOF | python3 +new_backend = """{ + "image_dim_ordering": "tf", + "epsilon": 1e-07, + "floatx": "float32", + "backend": "tensorflow" +}""" +open("keras.json", "w").write(new_backend) +EOF + + + +cd /code/Rodan/code +sed -i 's/#gpu //g' /code/Rodan/code/settings.py + +# Temporary fix for OpenCV until https://github.com/DDMAL/Rodan/issues/639 is resolved. +pip install opencv-python==4.6.0.66 diff --git a/scripts/install_gpu_rodan_jobs b/scripts/install_gpu_rodan_jobs index 2bffbd5..275ffe3 100644 --- a/scripts/install_gpu_rodan_jobs +++ b/scripts/install_gpu_rodan_jobs @@ -5,43 +5,43 @@ set -o xtrace # Print commands and their arguments as they are executed. PIP=$(which pip3) || PIP=$(which pip) -cd /code/Rodan/code/jobs +# cd /code/Rodan/code/jobs # Install Text Alignment -$PIP install -r ./text_alignment/requirements.txt +# $PIP install -r ./text_alignment/requirements.txt -cat << EOF | python3 -new_backend = """{ - "image_dim_ordering": "tf", - "epsilon": 1e-07, - "floatx": "float32", - "backend": "tensorflow" -}""" -open("keras.json", "w").write(new_backend) -EOF +# cat << EOF | python3 +# new_backend = """{ +# "image_dim_ordering": "tf", +# "epsilon": 1e-07, +# "floatx": "float32", +# "backend": "tensorflow" +# }""" +# open("keras.json", "w").write(new_backend) +# EOF -# Install Background Removal -cd / -git clone -b v1.1.1 https://github.com/DDMAL/background_removal.git -mv background_removal .background_removal -cd .background_removal -which pip3 && $PIP install --no-cache-dir . +# # Install Background Removal +# cd / +# git clone -b v1.1.1 https://github.com/DDMAL/background_removal.git +# mv background_removal .background_removal +# cd .background_removal +# which pip3 && $PIP install --no-cache-dir . -# Install SAE_binarization -cd / -git clone -b v1.1.1 https://github.com/DDMAL/SAE_binarization.git -mv SAE_binarization .SAE_binarization -cd .SAE_binarization -which pip3 && $PIP install --no-cache-dir . +# # Install SAE_binarization +# cd / +# git clone -b v1.1.1 https://github.com/DDMAL/SAE_binarization.git +# mv SAE_binarization .SAE_binarization +# cd .SAE_binarization +# which pip3 && $PIP install --no-cache-dir . -# Install Paco classifier -cd / -git clone -b v2.0.4 --depth 1 https://github.com/DDMAL/Paco_classifier.git -mv Paco_classifier .Paco_classifier -cd .Paco_classifier -which pip3 && $PIP install --no-cache-dir . +# # Install Paco classifier +# cd / +# git clone -b v2.0.4 --depth 1 https://github.com/DDMAL/Paco_classifier.git +# mv Paco_classifier .Paco_classifier +# cd .Paco_classifier +# which pip3 && $PIP install --no-cache-dir . -cd /code/Rodan/rodan -sed -i 's/#gpu //g' /code/Rodan/rodan/settings.py +# cd /code/Rodan/code +# sed -i 's/#gpu //g' /code/Rodan/code/settings.py -# Temporary fix for OpenCV until https://github.com/DDMAL/Rodan/issues/639 is resolved. -pip install opencv-python==4.6.0.66 +# # Temporary fix for OpenCV until https://github.com/DDMAL/Rodan/issues/639 is resolved. +# pip install opencv-python==4.6.0.66