diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml new file mode 100644 index 000000000..1fda7108a --- /dev/null +++ b/.github/workflows/build-oneflow.yml @@ -0,0 +1,125 @@ +name: Build (OneFlow) + +on: + workflow_dispatch: + inputs: + useCache: + description: Use GHA cache + type: boolean + required: false + default: true + push: + branches-ignore: + - "update-dependencies-pr" + paths: + - ".github/workflows/**" + - "docker/**" + - "*.sh" + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-v2 + cancel-in-progress: true + +env: + REGION_ID: cn-beijing + ACR_REGISTRY: registry.cn-beijing.aliyuncs.com + ACR_NAMESPACE: oneflow + DOCKER_HUB_NAMESPACE: oneflowinc + +jobs: + build_manylinux: + name: ${{ matrix.policy }}_${{ matrix.platform }}_${{ matrix.tag-suffix }} + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + include: + - tag-suffix: "cuda12.6" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.6.2-cudnn-devel-rockylinux8" + - tag-suffix: "cuda12.5" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.5.1-cudnn-devel-rockylinux8" + - tag-suffix: "cuda12.4" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.4.1-cudnn-devel-rockylinux8" + - tag-suffix: "cuda12.2" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.2.2-cudnn8-devel-centos7" + - tag-suffix: "cuda12.1" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.1.1-cudnn8-devel-centos7" + - tag-suffix: "cuda11.8" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.8.0-cudnn8-devel-centos7" + - tag-suffix: "cpu" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "" + + env: + POLICY: ${{ matrix.policy }} + PLATFORM: ${{ matrix.platform }} + COMMIT_SHA: ${{ github.sha }} + DOCKER_REPO: "${{ matrix.policy }}_${{ matrix.platform }}_${{ matrix.tag-suffix }}" + TEST_TAG: ${{ matrix.policy }}_${{ matrix.platform }}_${{ matrix.tag-suffix }}:${{ github.sha }} + CUDA_BASE_IMAGE: ${{ matrix.CUDA_BASE_IMAGE }} + + steps: + - name: Checkout + uses: actions/checkout@v3 + # 1.1 Login to ACR + - name: Login to ACR with the AccessKey pair + uses: aliyun/acr-login@v1 + with: + login-server: https://registry.${{env.REGION_ID}}.aliyuncs.com + username: "${{ secrets.ACR_USERNAME }}" + password: "${{ secrets.ACR_PASSWORD }}" + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up emulation + if: matrix.platform != 'i686' && matrix.platform != 'x86_64' + uses: docker/setup-qemu-action@v2 + with: + platforms: ${{ matrix.platform }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Build + run: ./build.sh + - name: Build and push + uses: docker/build-push-action@v2 + with: + push: true + tags: | + ${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_REPO }}:latest + ${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_REPO }}:${{ env.COMMIT_SHA }} + ${{ env.ACR_REGISTRY }}/${{ env.ACR_NAMESPACE }}/${{ env.DOCKER_REPO }}:latest + ${{ env.ACR_REGISTRY }}/${{ env.ACR_NAMESPACE }}/${{ env.DOCKER_REPO }}:${{ env.COMMIT_SHA }} + cache-from: type=registry,ref=${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_REPO }}:latest + cache-to: type=inline + context: ./docker/ + build-args: | + POLICY + PLATFORM + BASEIMAGE + DEVTOOLSET_ROOTPATH + PREPEND_PATH + LD_LIBRARY_PATH_ARG + + all_passed: + needs: [build_manylinux] + runs-on: ubuntu-latest + steps: + - run: echo "All jobs passed" diff --git a/.gitignore b/.gitignore index a6a86cf99..dc2ec39a5 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,4 @@ docker/sources # buildx cache .buildx-cache-*/ +.vscode/ diff --git a/build.sh b/build.sh index 6ec2c1173..610902b01 100755 --- a/build.sh +++ b/build.sh @@ -37,7 +37,7 @@ fi # setup BASEIMAGE and its specific properties if [ "${POLICY}" == "manylinux2014" ]; then - BASEIMAGE="quay.io/pypa/manylinux2014_base:2024.11.03-3" + BASEIMAGE="${CUDA_BASE_IMAGE:-quay.io/pypa/manylinux2014_base:2024.11.03-3}" DEVTOOLSET_ROOTPATH="/opt/rh/devtoolset-10/root" PREPEND_PATH="${DEVTOOLSET_ROOTPATH}/usr/bin:" if [ "${PLATFORM}" == "i686" ]; then @@ -87,22 +87,15 @@ if [ "${MANYLINUX_BUILD_FRONTEND}" == "docker" ]; then elif [ "${MANYLINUX_BUILD_FRONTEND}" == "podman" ]; then podman build ${BUILD_ARGS_COMMON} elif [ "${MANYLINUX_BUILD_FRONTEND}" == "docker-buildx" ]; then - USE_LOCAL_CACHE=1 - docker buildx build \ - --load \ - --cache-from=type=local,src=$(pwd)/.buildx-cache-${POLICY}_${PLATFORM} \ - --cache-to=type=local,dest=$(pwd)/.buildx-cache-staging-${POLICY}_${PLATFORM},mode=max \ - ${BUILD_ARGS_COMMON} + env else echo "Unsupported build frontend: '${MANYLINUX_BUILD_FRONTEND}'" exit 1 fi -docker run --rm -v $(pwd)/tests:/tests:ro quay.io/pypa/${POLICY}_${PLATFORM}:${COMMIT_SHA} /tests/run_tests.sh - -if [ ${USE_LOCAL_CACHE} -ne 0 ]; then - if [ -d $(pwd)/.buildx-cache-${POLICY}_${PLATFORM} ]; then - rm -rf $(pwd)/.buildx-cache-${POLICY}_${PLATFORM} - fi - mv $(pwd)/.buildx-cache-staging-${POLICY}_${PLATFORM} $(pwd)/.buildx-cache-${POLICY}_${PLATFORM} -fi +echo "POLICY=${POLICY}" >> $GITHUB_ENV +echo "PLATFORM=${PLATFORM}" >> $GITHUB_ENV +echo "BASEIMAGE=${BASEIMAGE}" >> $GITHUB_ENV +echo "DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}" >> $GITHUB_ENV +echo "PREPEND_PATH=${PREPEND_PATH}" >> $GITHUB_ENV +echo "LD_LIBRARY_PATH_ARG=${LD_LIBRARY_PATH_ARG}" >> $GITHUB_ENV diff --git a/docker/Dockerfile b/docker/Dockerfile index 8edbad587..43d4652e2 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -181,4 +181,14 @@ RUN --mount=type=bind,target=/build_cpython36,from=build_cpython36 \ ENV SSL_CERT_FILE=/opt/_internal/certs.pem +ARG BAZEL_URL="https://github.com/bazelbuild/bazel/releases/download/3.4.1/bazel-3.4.1-linux-x86_64" +RUN curl -L $BAZEL_URL -o /usr/local/bin/bazel \ + && chmod +x /usr/local/bin/bazel +RUN yum install -y wget nasm rdma-core-devel rsync gdb ninja-build openblas-static devtoolset-7-gcc* vim ccache htop fuse-devel +RUN yum install -y devtoolset-10-libasan-devel devtoolset-10-libubsan-devel devtoolset-10-libtsan-devel +RUN yum install -y perl-IPC-Cmd + +RUN wget https://github.com/Oneflow-Inc/llvm-project/releases/download/maybe-14.0.4/clang-tidy-14.AppImage -O /usr/local/bin/clangd && \ + chmod +x /usr/local/bin/clangd + CMD ["/bin/bash"] diff --git a/docker/build_scripts/fixup-mirrors.sh b/docker/build_scripts/fixup-mirrors.sh index 719fe0fb4..6eb05120c 100755 --- a/docker/build_scripts/fixup-mirrors.sh +++ b/docker/build_scripts/fixup-mirrors.sh @@ -6,6 +6,7 @@ set -exuo pipefail if [ "${AUDITWHEEL_POLICY}" == "manylinux2014" ] && [ "${AUDITWHEEL_ARCH}" != "s390x" ]; then # Centos 7 is EOL and is no longer available from the usual mirrors, so switch # to https://vault.centos.org + touch /etc/yum/pluginconf.d/fastestmirror.conf sed -i 's/enabled=1/enabled=0/g' /etc/yum/pluginconf.d/fastestmirror.conf sed -i 's/^mirrorlist/#mirrorlist/g' /etc/yum.repos.d/*.repo sed -i 's;^.*baseurl=http://mirror;baseurl=https://vault;g' /etc/yum.repos.d/*.repo diff --git a/docker/build_scripts/install-runtime-packages.sh b/docker/build_scripts/install-runtime-packages.sh index 160b335ba..921a56f7e 100755 --- a/docker/build_scripts/install-runtime-packages.sh +++ b/docker/build_scripts/install-runtime-packages.sh @@ -34,6 +34,12 @@ source $MY_DIR/build_utils.sh # MANYLINUX_DEPS: Install development packages (except for libgcc which is provided by gcc install) if [ "${AUDITWHEEL_POLICY}" == "manylinux2014" ] || [ "${AUDITWHEEL_POLICY}" == "manylinux_2_28" ]; then MANYLINUX_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel zlib-devel expat-devel" + fixup-mirrors + yum -y install yum-plugin-versionlock + yum versionlock add libcudnn* + yum versionlock add cuda-* + yum versionlock add libnccl + yum versionlock add libnccl-devel elif [ "${BASE_POLICY}" == "musllinux" ]; then MANYLINUX_DEPS="musl-dev libstdc++ glib-dev libx11-dev libxext-dev libxrender-dev mesa-dev libice-dev libsm-dev zlib-dev expat-dev" else