From 29f045d596893419e2e2db408e04d4c19cceaaa2 Mon Sep 17 00:00:00 2001 From: tsai Date: Thu, 22 Dec 2022 13:45:51 +0800 Subject: [PATCH 01/14] apply --- .github/workflows/build-oneflow.yml | 135 ++++++++++++++++++ .gitignore | 1 + build.sh | 34 ++--- docker/Dockerfile | 9 ++ .../build_scripts/install-runtime-packages.sh | 8 ++ 5 files changed, 164 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/build-oneflow.yml diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml new file mode 100644 index 000000000..6f9598de1 --- /dev/null +++ b/.github/workflows/build-oneflow.yml @@ -0,0 +1,135 @@ +name: Build (OneFlow) + +on: + workflow_dispatch: + inputs: + useCache: + description: Use GHA cache + type: boolean + required: false + default: true + push: + branches-ignore: + - "update-dependencies-pr" + paths: + - ".github/workflows/build.yml" + - "docker/**" + - "*.sh" + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true + +env: + REGION_ID: cn-beijing + ACR_REGISTRY: registry.cn-beijing.aliyuncs.com + ACR_NAMESPACE: oneflow + DOCKER_HUB_NAMESPACE: oneflowinc + +jobs: + build_manylinux: + name: ${{ matrix.policy }}_${{ matrix.platform }}_${{ matrix.tag-suffix }} + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + tag-suffix: ["cuda11.2", "cuda10.2"] + include: + - tag-suffix: "cuda11.8" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.8.0-cudnn8-devel-centos7" + - tag-suffix: "cuda11.6" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.6.2-cudnn8-devel-centos7" + - tag-suffix: "cuda11.5" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.5.1-cudnn8-devel-centos7" + - tag-suffix: "cuda11.4" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.4.3-cudnn8-devel-centos7" + - tag-suffix: "cuda11.3" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.3.1-cudnn8-devel-centos7" + - tag-suffix: "cuda11.2" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.2.2-cudnn8-devel-centos7" + - tag-suffix: "cuda11.0" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.0.3-cudnn8-devel-centos7" + target: "manylinux" + - tag-suffix: "cuda10.2" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:10.2-cudnn7-devel-centos7" + - tag-suffix: "cpu" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "" + + env: + POLICY: ${{ matrix.policy }} + PLATFORM: ${{ matrix.platform }} + COMMIT_SHA: ${{ github.sha }} + DOCKER_REPO: "${{ matrix.policy }}_${{ matrix.platform }}_${{ matrix.tag-suffix }}" + TEST_TAG: ${{ matrix.policy }}_${{ matrix.platform }}_${{ matrix.tag-suffix }}:${{ github.sha }} + CUDA_BASE_IMAGE: ${{ matrix.CUDA_BASE_IMAGE }} + + steps: + - name: Checkout + uses: actions/checkout@v3 + # 1.1 Login to ACR + - name: Login to ACR with the AccessKey pair + uses: aliyun/acr-login@v1 + with: + login-server: https://registry.${{env.REGION_ID}}.aliyuncs.com + username: "${{ secrets.ACR_USERNAME }}" + password: "${{ secrets.ACR_PASSWORD }}" + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up emulation + if: matrix.platform != 'i686' && matrix.platform != 'x86_64' + uses: docker/setup-qemu-action@v2 + with: + platforms: ${{ matrix.platform }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Build + run: ./build.sh + - name: Build and push + uses: docker/build-push-action@v2 + with: + push: true + tags: | + ${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_REPO }}:latest + ${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_REPO }}:${{ env.COMMIT_SHA }} + ${{ env.ACR_REGISTRY }}/${{ env.ACR_NAMESPACE }}/${{ env.DOCKER_REPO }}:latest + ${{ env.ACR_REGISTRY }}/${{ env.ACR_NAMESPACE }}/${{ env.DOCKER_REPO }}:${{ env.COMMIT_SHA }} + cache-from: type=registry,ref=${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_REPO }}:latest + cache-to: type=inline + context: ./docker/ + build-args: | + POLICY + PLATFORM + BASEIMAGE + DEVTOOLSET_ROOTPATH + PREPEND_PATH + LD_LIBRARY_PATH_ARG + + all_passed: + needs: [build_manylinux] + runs-on: ubuntu-latest + steps: + - run: echo "All jobs passed" diff --git a/.gitignore b/.gitignore index a6a86cf99..dc2ec39a5 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,4 @@ docker/sources # buildx cache .buildx-cache-*/ +.vscode/ diff --git a/build.sh b/build.sh index 9234d4582..029c756a8 100755 --- a/build.sh +++ b/build.sh @@ -32,7 +32,8 @@ if [ "${POLICY}" == "manylinux2014" ]; then if [ "${PLATFORM}" == "s390x" ]; then BASEIMAGE="s390x/clefos:7" else - BASEIMAGE="${MULTIARCH_PREFIX}centos:7" + DEFAULT_BASEIMAGE="${MULTIARCH_PREFIX}centos:7" + BASEIMAGE="${CUDA_BASE_IMAGE:-${DEFAULT_BASEIMAGE}}" fi DEVTOOLSET_ROOTPATH="/opt/rh/devtoolset-10/root" PREPEND_PATH="${DEVTOOLSET_ROOTPATH}/usr/bin:" @@ -84,31 +85,18 @@ fi if [ "${MANYLINUX_BUILD_FRONTEND}" == "docker" ]; then docker build ${BUILD_ARGS_COMMON} elif [ "${MANYLINUX_BUILD_FRONTEND}" == "docker-buildx" ]; then - docker buildx build \ - --load \ - --cache-from=type=local,src=$(pwd)/.buildx-cache-${POLICY}_${PLATFORM} \ - --cache-to=type=local,dest=$(pwd)/.buildx-cache-staging-${POLICY}_${PLATFORM} \ - ${BUILD_ARGS_COMMON} + env elif [ "${MANYLINUX_BUILD_FRONTEND}" == "buildkit" ]; then - buildctl build \ - --frontend=dockerfile.v0 \ - --local context=./docker/ \ - --local dockerfile=./docker/ \ - --import-cache type=local,src=$(pwd)/.buildx-cache-${POLICY}_${PLATFORM} \ - --export-cache type=local,dest=$(pwd)/.buildx-cache-staging-${POLICY}_${PLATFORM} \ - --opt build-arg:POLICY=${POLICY} --opt build-arg:PLATFORM=${PLATFORM} --opt build-arg:BASEIMAGE=${BASEIMAGE} \ - --opt "build-arg:DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}" --opt "build-arg:PREPEND_PATH=${PREPEND_PATH}" --opt "build-arg:LD_LIBRARY_PATH_ARG=${LD_LIBRARY_PATH_ARG}" \ - --output type=docker,name=quay.io/pypa/${POLICY}_${PLATFORM}:${COMMIT_SHA} | docker load + echo "Unsupported build frontend: buildkit" + exit 1 else echo "Unsupported build frontend: '${MANYLINUX_BUILD_FRONTEND}'" exit 1 fi -docker run --rm -v $(pwd)/tests:/tests:ro quay.io/pypa/${POLICY}_${PLATFORM}:${COMMIT_SHA} /tests/run_tests.sh - -if [ "${MANYLINUX_BUILD_FRONTEND}" != "docker" ]; then - if [ -d $(pwd)/.buildx-cache-${POLICY}_${PLATFORM} ]; then - rm -rf $(pwd)/.buildx-cache-${POLICY}_${PLATFORM} - fi - mv $(pwd)/.buildx-cache-staging-${POLICY}_${PLATFORM} $(pwd)/.buildx-cache-${POLICY}_${PLATFORM} -fi +echo "POLICY=${POLICY}" >> $GITHUB_ENV +echo "PLATFORM=${PLATFORM}" >> $GITHUB_ENV +echo "BASEIMAGE=${BASEIMAGE}" >> $GITHUB_ENV +echo "DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}" >> $GITHUB_ENV +echo "PREPEND_PATH=${PREPEND_PATH}" >> $GITHUB_ENV +echo "LD_LIBRARY_PATH_ARG=${LD_LIBRARY_PATH_ARG}" >> $GITHUB_ENV diff --git a/docker/Dockerfile b/docker/Dockerfile index 693ed64ea..2e7678a3c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -171,4 +171,13 @@ RUN manylinux-entrypoint /build_scripts/finalize.sh && rm -rf /build_scripts ENV SSL_CERT_FILE=/opt/_internal/certs.pem +ARG BAZEL_URL="https://github.com/bazelbuild/bazel/releases/download/3.4.1/bazel-3.4.1-linux-x86_64" +RUN curl -L $BAZEL_URL -o /usr/local/bin/bazel \ + && chmod +x /usr/local/bin/bazel +RUN yum install -y wget nasm rdma-core-devel rsync gdb ninja-build openblas-static devtoolset-7-gcc* vim ccache htop fuse-devel +RUN yum install -y devtoolset-10-libasan-devel devtoolset-10-libubsan-devel devtoolset-10-libtsan-devel + +RUN wget https://github.com/Oneflow-Inc/llvm-project/releases/download/maybe-14.0.4/clang-tidy-14.AppImage -O /usr/local/bin/clangd && \ + chmod +x /usr/local/bin/clangd + CMD ["/bin/bash"] diff --git a/docker/build_scripts/install-runtime-packages.sh b/docker/build_scripts/install-runtime-packages.sh index fac26640f..9985a2699 100755 --- a/docker/build_scripts/install-runtime-packages.sh +++ b/docker/build_scripts/install-runtime-packages.sh @@ -34,6 +34,14 @@ source $MY_DIR/build_utils.sh # MANYLINUX_DEPS: Install development packages (except for libgcc which is provided by gcc install) if [ "${AUDITWHEEL_POLICY}" == "manylinux2014" ] || [ "${AUDITWHEEL_POLICY}" == "manylinux_2_28" ]; then MANYLINUX_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel zlib-devel expat-devel" + yum -y install yum-versionlock + yum versionlock add libcudnn8-devel + yum versionlock add libcudnn8 + yum versionlock add cuda-* + yum versionlock add libnccl + yum versionlock add libnccl-devel + yum versionlock list libcudnn8-devel + yum versionlock list libcudnn8 elif [ "${AUDITWHEEL_POLICY}" == "manylinux_2_24" ]; then MANYLINUX_DEPS="libc6-dev libstdc++-6-dev libglib2.0-dev libx11-dev libxext-dev libxrender-dev libgl1-mesa-dev libice-dev libsm-dev libz-dev libexpat1-dev" elif [ "${AUDITWHEEL_POLICY}" == "musllinux_1_1" ]; then From 0c557758d35121efb5a5c3af5eecfe5de715d40c Mon Sep 17 00:00:00 2001 From: tsai Date: Wed, 25 Oct 2023 15:50:10 +0800 Subject: [PATCH 02/14] add cu12 --- .github/workflows/build-oneflow.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml index 6f9598de1..021b1dbcf 100644 --- a/.github/workflows/build-oneflow.yml +++ b/.github/workflows/build-oneflow.yml @@ -35,6 +35,18 @@ jobs: matrix: tag-suffix: ["cuda11.2", "cuda10.2"] include: + - tag-suffix: "cuda12.2" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.2.2-cudnn8-devel-centos7" + - tag-suffix: "cuda12.1" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.1.1-cudnn8-devel-centos7" + - tag-suffix: "cuda12.0" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.0.1-cudnn8-devel-centos7" - tag-suffix: "cuda11.8" policy: "manylinux2014" platform: "x86_64" From 4c8c8545bf549f577316abfe3bdd2459ef788dba Mon Sep 17 00:00:00 2001 From: tsai Date: Wed, 13 Dec 2023 11:03:59 +0800 Subject: [PATCH 03/14] refine --- .github/workflows/build-oneflow.yml | 36 +---------------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml index 021b1dbcf..ebd8f36da 100644 --- a/.github/workflows/build-oneflow.yml +++ b/.github/workflows/build-oneflow.yml @@ -17,7 +17,7 @@ on: - "*.sh" concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-v2 cancel-in-progress: true env: @@ -33,7 +33,6 @@ jobs: strategy: fail-fast: false matrix: - tag-suffix: ["cuda11.2", "cuda10.2"] include: - tag-suffix: "cuda12.2" policy: "manylinux2014" @@ -43,43 +42,10 @@ jobs: policy: "manylinux2014" platform: "x86_64" CUDA_BASE_IMAGE: "nvidia/cuda:12.1.1-cudnn8-devel-centos7" - - tag-suffix: "cuda12.0" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:12.0.1-cudnn8-devel-centos7" - tag-suffix: "cuda11.8" policy: "manylinux2014" platform: "x86_64" CUDA_BASE_IMAGE: "nvidia/cuda:11.8.0-cudnn8-devel-centos7" - - tag-suffix: "cuda11.6" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.6.2-cudnn8-devel-centos7" - - tag-suffix: "cuda11.5" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.5.1-cudnn8-devel-centos7" - - tag-suffix: "cuda11.4" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.4.3-cudnn8-devel-centos7" - - tag-suffix: "cuda11.3" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.3.1-cudnn8-devel-centos7" - - tag-suffix: "cuda11.2" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.2.2-cudnn8-devel-centos7" - - tag-suffix: "cuda11.0" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.0.3-cudnn8-devel-centos7" - target: "manylinux" - - tag-suffix: "cuda10.2" - policy: "manylinux2014" - platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:10.2-cudnn7-devel-centos7" - tag-suffix: "cpu" policy: "manylinux2014" platform: "x86_64" From 6455f9b8154333333e6285fde3747aaac4a92929 Mon Sep 17 00:00:00 2001 From: tsai Date: Wed, 13 Dec 2023 11:05:24 +0800 Subject: [PATCH 04/14] fix --- .github/workflows/build-oneflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml index ebd8f36da..a36b8a163 100644 --- a/.github/workflows/build-oneflow.yml +++ b/.github/workflows/build-oneflow.yml @@ -12,7 +12,7 @@ on: branches-ignore: - "update-dependencies-pr" paths: - - ".github/workflows/build.yml" + - ".github/workflows/**" - "docker/**" - "*.sh" From e11e421bc320370b0fd486ff9e90cd819c05fe8b Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Thu, 14 Nov 2024 11:54:21 +0800 Subject: [PATCH 05/14] perl-IPC-Cmd --- docker/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/Dockerfile b/docker/Dockerfile index 0b6eee303..25bf6f6b1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -173,6 +173,7 @@ RUN curl -L $BAZEL_URL -o /usr/local/bin/bazel \ && chmod +x /usr/local/bin/bazel RUN yum install -y wget nasm rdma-core-devel rsync gdb ninja-build openblas-static devtoolset-7-gcc* vim ccache htop fuse-devel RUN yum install -y devtoolset-10-libasan-devel devtoolset-10-libubsan-devel devtoolset-10-libtsan-devel +RUN yum install -y perl-IPC-Cmd RUN wget https://github.com/Oneflow-Inc/llvm-project/releases/download/maybe-14.0.4/clang-tidy-14.AppImage -O /usr/local/bin/clangd && \ chmod +x /usr/local/bin/clangd From ca19f81e7d1405ea84add255496890700713eae8 Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Thu, 14 Nov 2024 12:16:29 +0800 Subject: [PATCH 06/14] add --- build.sh | 9 --------- 1 file changed, 9 deletions(-) diff --git a/build.sh b/build.sh index 4e862a228..5d2079c19 100755 --- a/build.sh +++ b/build.sh @@ -88,15 +88,6 @@ elif [ "${MANYLINUX_BUILD_FRONTEND}" == "podman" ]; then podman build ${BUILD_ARGS_COMMON} elif [ "${MANYLINUX_BUILD_FRONTEND}" == "docker-buildx" ]; then env -elif [ "${MANYLINUX_BUILD_FRONTEND}" == "buildkit" ]; then - echo "Unsupported build frontend: buildkit" - exit 1 - USE_LOCAL_CACHE=1 - docker buildx build \ - --load \ - --cache-from=type=local,src=$(pwd)/.buildx-cache-${POLICY}_${PLATFORM} \ - --cache-to=type=local,dest=$(pwd)/.buildx-cache-staging-${POLICY}_${PLATFORM},mode=max \ - ${BUILD_ARGS_COMMON} else echo "Unsupported build frontend: '${MANYLINUX_BUILD_FRONTEND}'" exit 1 From f2b2f564d624b5d50390f83bc5a2c1d2393b0154 Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Thu, 14 Nov 2024 12:20:04 +0800 Subject: [PATCH 07/14] CUDA_BASE_IMAGE --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 5d2079c19..610902b01 100755 --- a/build.sh +++ b/build.sh @@ -37,7 +37,7 @@ fi # setup BASEIMAGE and its specific properties if [ "${POLICY}" == "manylinux2014" ]; then - BASEIMAGE="quay.io/pypa/manylinux2014_base:2024.11.03-3" + BASEIMAGE="${CUDA_BASE_IMAGE:-quay.io/pypa/manylinux2014_base:2024.11.03-3}" DEVTOOLSET_ROOTPATH="/opt/rh/devtoolset-10/root" PREPEND_PATH="${DEVTOOLSET_ROOTPATH}/usr/bin:" if [ "${PLATFORM}" == "i686" ]; then From f644f8e0482155dccf2b118701ae6f6c5722556c Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Thu, 14 Nov 2024 13:50:41 +0800 Subject: [PATCH 08/14] Update build-oneflow.yml --- .github/workflows/build-oneflow.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml index a36b8a163..939b9d133 100644 --- a/.github/workflows/build-oneflow.yml +++ b/.github/workflows/build-oneflow.yml @@ -34,18 +34,18 @@ jobs: fail-fast: false matrix: include: - - tag-suffix: "cuda12.2" + - tag-suffix: "cuda12.6" policy: "manylinux2014" platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:12.2.2-cudnn8-devel-centos7" - - tag-suffix: "cuda12.1" + CUDA_BASE_IMAGE: "12.6.2-cudnn-devel-rockylinux8" + - tag-suffix: "cuda12.5" policy: "manylinux2014" platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:12.1.1-cudnn8-devel-centos7" - - tag-suffix: "cuda11.8" + CUDA_BASE_IMAGE: "12.5.2-cudnn-devel-rockylinux8" + - tag-suffix: "cuda12.4" policy: "manylinux2014" platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:11.8.0-cudnn8-devel-centos7" + CUDA_BASE_IMAGE: "nvidia/cuda:12.4.1-cudnn-devel-rockylinux8" - tag-suffix: "cpu" policy: "manylinux2014" platform: "x86_64" From 8d0ced2570b3fdb7c7837799b082a3892b08ee6b Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Thu, 14 Nov 2024 13:51:46 +0800 Subject: [PATCH 09/14] Update build-oneflow.yml --- .github/workflows/build-oneflow.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml index 939b9d133..97b957852 100644 --- a/.github/workflows/build-oneflow.yml +++ b/.github/workflows/build-oneflow.yml @@ -37,11 +37,11 @@ jobs: - tag-suffix: "cuda12.6" policy: "manylinux2014" platform: "x86_64" - CUDA_BASE_IMAGE: "12.6.2-cudnn-devel-rockylinux8" + CUDA_BASE_IMAGE: "nvidia/cuda:12.6.2-cudnn-devel-rockylinux8" - tag-suffix: "cuda12.5" policy: "manylinux2014" platform: "x86_64" - CUDA_BASE_IMAGE: "12.5.2-cudnn-devel-rockylinux8" + CUDA_BASE_IMAGE: "nvidia/cuda:12.5.2-cudnn-devel-rockylinux8" - tag-suffix: "cuda12.4" policy: "manylinux2014" platform: "x86_64" From 8fb5d9bda569ba1a7c21463eaeb97478108db4e4 Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Thu, 14 Nov 2024 13:52:45 +0800 Subject: [PATCH 10/14] Update build-oneflow.yml --- .github/workflows/build-oneflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml index 97b957852..dc404a41d 100644 --- a/.github/workflows/build-oneflow.yml +++ b/.github/workflows/build-oneflow.yml @@ -41,7 +41,7 @@ jobs: - tag-suffix: "cuda12.5" policy: "manylinux2014" platform: "x86_64" - CUDA_BASE_IMAGE: "nvidia/cuda:12.5.2-cudnn-devel-rockylinux8" + CUDA_BASE_IMAGE: "nvidia/cuda:12.5.1-cudnn-devel-rockylinux8" - tag-suffix: "cuda12.4" policy: "manylinux2014" platform: "x86_64" From 02fb1590c0c0ce69aed97d5a8d97676f643abac5 Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Thu, 14 Nov 2024 13:57:24 +0800 Subject: [PATCH 11/14] fix lock --- docker/build_scripts/install-runtime-packages.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/docker/build_scripts/install-runtime-packages.sh b/docker/build_scripts/install-runtime-packages.sh index 8a4661dcd..4379310be 100755 --- a/docker/build_scripts/install-runtime-packages.sh +++ b/docker/build_scripts/install-runtime-packages.sh @@ -34,14 +34,11 @@ source $MY_DIR/build_utils.sh # MANYLINUX_DEPS: Install development packages (except for libgcc which is provided by gcc install) if [ "${AUDITWHEEL_POLICY}" == "manylinux2014" ] || [ "${AUDITWHEEL_POLICY}" == "manylinux_2_28" ]; then MANYLINUX_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel zlib-devel expat-devel" - yum -y install yum-versionlock - yum versionlock add libcudnn8-devel - yum versionlock add libcudnn8 + yum -y install yum-plugin-versionlock + yum versionlock add libcudnn* yum versionlock add cuda-* yum versionlock add libnccl yum versionlock add libnccl-devel - yum versionlock list libcudnn8-devel - yum versionlock list libcudnn8 elif [ "${BASE_POLICY}" == "musllinux" ]; then MANYLINUX_DEPS="musl-dev libstdc++ glib-dev libx11-dev libxext-dev libxrender-dev mesa-dev libice-dev libsm-dev zlib-dev expat-dev" else From d78473e2a5176f94ff250abc476fc40b5cc60c76 Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Thu, 14 Nov 2024 14:01:47 +0800 Subject: [PATCH 12/14] fix --- docker/build_scripts/fixup-mirrors.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/build_scripts/fixup-mirrors.sh b/docker/build_scripts/fixup-mirrors.sh index 719fe0fb4..6eb05120c 100755 --- a/docker/build_scripts/fixup-mirrors.sh +++ b/docker/build_scripts/fixup-mirrors.sh @@ -6,6 +6,7 @@ set -exuo pipefail if [ "${AUDITWHEEL_POLICY}" == "manylinux2014" ] && [ "${AUDITWHEEL_ARCH}" != "s390x" ]; then # Centos 7 is EOL and is no longer available from the usual mirrors, so switch # to https://vault.centos.org + touch /etc/yum/pluginconf.d/fastestmirror.conf sed -i 's/enabled=1/enabled=0/g' /etc/yum/pluginconf.d/fastestmirror.conf sed -i 's/^mirrorlist/#mirrorlist/g' /etc/yum.repos.d/*.repo sed -i 's;^.*baseurl=http://mirror;baseurl=https://vault;g' /etc/yum.repos.d/*.repo From b91a6b94808e0f646f01628e6a0c1f732192e88f Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Thu, 14 Nov 2024 14:02:56 +0800 Subject: [PATCH 13/14] add back --- .github/workflows/build-oneflow.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml index dc404a41d..1fda7108a 100644 --- a/.github/workflows/build-oneflow.yml +++ b/.github/workflows/build-oneflow.yml @@ -46,6 +46,18 @@ jobs: policy: "manylinux2014" platform: "x86_64" CUDA_BASE_IMAGE: "nvidia/cuda:12.4.1-cudnn-devel-rockylinux8" + - tag-suffix: "cuda12.2" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.2.2-cudnn8-devel-centos7" + - tag-suffix: "cuda12.1" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.1.1-cudnn8-devel-centos7" + - tag-suffix: "cuda11.8" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.8.0-cudnn8-devel-centos7" - tag-suffix: "cpu" policy: "manylinux2014" platform: "x86_64" From 7725348ba96e391b4660a4190e7d4e757e7cb83f Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Thu, 14 Nov 2024 14:07:15 +0800 Subject: [PATCH 14/14] fix --- docker/build_scripts/install-runtime-packages.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/build_scripts/install-runtime-packages.sh b/docker/build_scripts/install-runtime-packages.sh index 4379310be..921a56f7e 100755 --- a/docker/build_scripts/install-runtime-packages.sh +++ b/docker/build_scripts/install-runtime-packages.sh @@ -34,6 +34,7 @@ source $MY_DIR/build_utils.sh # MANYLINUX_DEPS: Install development packages (except for libgcc which is provided by gcc install) if [ "${AUDITWHEEL_POLICY}" == "manylinux2014" ] || [ "${AUDITWHEEL_POLICY}" == "manylinux_2_28" ]; then MANYLINUX_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel zlib-devel expat-devel" + fixup-mirrors yum -y install yum-plugin-versionlock yum versionlock add libcudnn* yum versionlock add cuda-*