Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/Dockerfile.buildwheel
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#
ARG PY_VERSION=3.11
FROM quay.io/ascend/manylinux:8.2.rc1-910b-manylinux_2_28-py${PY_VERSION}
FROM quay.io/ascend/manylinux:8.3.rc1-910b-manylinux_2_28-py${PY_VERSION}

ARG COMPILE_CUSTOM_KERNELS=1

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_accuracy_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
runs-on: ${{ inputs.runner }}
name: ${{ inputs.model_name }} accuracy
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
env:
VLLM_USE_MODELSCOPE: True
# 1. If version specified (work_dispatch), do specified branch accuracy test
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/_e2e_nightly_multi_node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ on:
required: false
type: string
description: base image for pods
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11"
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
config_file_path:
required: true
type: string
Expand Down Expand Up @@ -69,7 +69,7 @@ jobs:
# This is the runner with no NPU for k8s controller
runs-on: ${{ inputs.runner }}
container:
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
env:
KUBECONFIG: /tmp/kubeconfig
KUBECTL: /root/.cache/.kube/kubectl
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_e2e_nightly_single_node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ on:
image:
required: false
type: string
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11"
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
tests:
required: true
type: string
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_kill_lws_resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
# This is a runner with no NPU for k8s controller
runs-on: ${{ inputs.runner }}
container:
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
env:
KUBECONFIG: /tmp/kubeconfig
KUBECTL: /root/.cache/.kube/kubectl
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/accuracy_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,5 +70,5 @@ jobs:
with:
vllm: v0.11.0
runner: linux-aarch64-${{ matrix.runner }}
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
model_name: ${{ matrix.model_name }}
2 changes: 1 addition & 1 deletion .github/workflows/nightly_benchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ jobs:
vllm_use_v1: 1
max-parallel: 1
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
volumes:
- /usr/local/dcmi:/usr/local/dcmi
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_dist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ jobs:
name: vLLM Ascend test
runs-on: ${{ matrix.os }}
container:
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
env:
DEBIAN_FRONTEND: noninteractive
steps:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/vllm_ascend_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ jobs:
if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
runs-on: ubuntu-latest
container:
# fixme: vllm-ascend install failed with 8.3.rc1 on github action
image: quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
Expand Down Expand Up @@ -146,5 +147,5 @@ jobs:
with:
vllm: ${{ matrix.vllm_version }}
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
type: light
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_test_310p.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
runs-on: ${{ matrix.os }}
container:
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-310p-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-310p-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_test_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,5 @@ jobs:
with:
vllm: ${{ matrix.vllm_version }}
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
type: full
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_test_full_vllm_main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@ jobs:
with:
vllm: main
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
type: full
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_test_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
with:
vllm: v0.11.0
runner: linux-aarch64-${{ matrix.runner }}
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
model_name: ${{ matrix.model_name }}
upload: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_test_nightly_a2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ jobs:
with:
soc_version: a2
runner: linux-aarch64-a2-0
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
replicas: 1
size: ${{ matrix.test_config.size }}
config_file_path: ${{ matrix.test_config.config_file_path }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/vllm_ascend_test_nightly_a3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ jobs:
with:
vllm: v0.11.0
runner: ${{ matrix.test_config.os }}
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11
tests: ${{ matrix.test_config.tests }}

multi-node-tests:
Expand Down Expand Up @@ -113,7 +113,7 @@ jobs:
with:
soc_version: a3
runner: linux-aarch64-a3-0
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
replicas: 1
size: ${{ matrix.test_config.size }}
config_file_path: ${{ matrix.test_config.config_file_path }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_test_pd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
runs-on: linux-arm64-npu-static-8

container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
volumes:
- /usr/local/dcmi:/usr/local/dcmi
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#

FROM quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11
FROM quay.io/ascend/cann:8.3.rc1-910b-ubuntu22.04-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG COMPILE_CUSTOM_KERNELS=1
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.310p
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#

FROM quay.io/ascend/cann:8.2.rc1-310p-ubuntu22.04-py3.11
FROM quay.io/ascend/cann:8.3.rc1-310p-ubuntu22.04-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG COMPILE_CUSTOM_KERNELS=1
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.310p.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#

FROM quay.io/ascend/cann:8.2.rc1-310p-openeuler24.03-py3.11
FROM quay.io/ascend/cann:8.3.rc1-310p-openeuler24.03-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG COMPILE_CUSTOM_KERNELS=1
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.a3
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#

FROM quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
FROM quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG COMPILE_CUSTOM_KERNELS=1
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.a3.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#

FROM quay.io/ascend/cann:8.2.rc1-a3-openeuler24.03-py3.11
FROM quay.io/ascend/cann:8.3.rc1-a3-openeuler24.03-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG COMPILE_CUSTOM_KERNELS=1
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#

FROM quay.io/ascend/cann:8.2.rc1-910b-openeuler24.03-py3.11
FROM quay.io/ascend/cann:8.3.rc1-910b-openeuler24.03-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG COMPILE_CUSTOM_KERNELS=1
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l
- OS: Linux
- Software:
* Python >= 3.9, < 3.12
* CANN >= 8.2.rc1 (Ascend HDK version refers to [here](https://www.hiascend.com/document/detail/zh/canncommercial/82RC1/releasenote/releasenote_0000.html))
* CANN >= 8.3.rc1 (Ascend HDK version refers to [here](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/releasenote/releasenote_0000.html))
* PyTorch == 2.7.1, torch-npu == 2.7.1
* vLLM (the same version as vllm-ascend)

Expand Down
2 changes: 1 addition & 1 deletion README.zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP
- 操作系统:Linux
- 软件:
* Python >= 3.9, < 3.12
* CANN >= 8.2.rc1 (Ascend HDK 版本参考[这里](https://www.hiascend.com/document/detail/zh/canncommercial/82RC1/releasenote/releasenote_0000.html))
* CANN >= 8.3.rc1 (Ascend HDK 版本参考[这里](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/releasenote/releasenote_0000.html))
* PyTorch == 2.7.1, torch-npu == 2.7.1
* vLLM (与vllm-ascend版本一致)

Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
'pip_vllm_ascend_version': "0.11.0rc0",
'pip_vllm_version': "0.11.0",
# CANN image tag
'cann_image_tag': "8.2.rc1-910b-ubuntu22.04-py3.11",
'cann_image_tag': "8.3.rc1-910b-ubuntu22.04-py3.11",
# vllm version in ci
'ci_vllm_version': 'v0.11.0',
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ currently, the multi-node test workflow defined in the [vllm_ascend_test_nightly
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
with:
soc_version: a3
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
replicas: 1
size: ${{ matrix.test_config.size }}
config_file_path: ${{ matrix.test_config.config_file_path }}
Expand Down
24 changes: 12 additions & 12 deletions docs/source/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ This document describes how to install vllm-ascend manually.

| Software | Supported version | Note |
|---------------|----------------------------------|-------------------------------------------|
| Ascend HDK | Refer to [here](https://www.hiascend.com/document/detail/zh/canncommercial/82RC1/releasenote/releasenote_0000.html) | Required for CANN |
| CANN | >= 8.2.RC1 | Required for vllm-ascend and torch-npu |
| Ascend HDK | Refer to [here](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/releasenote/releasenote_0000.html) | Required for CANN |
| CANN | >= 8.3.RC1 | Required for vllm-ascend and torch-npu |
| torch-npu | == 2.7.1 | Required for vllm-ascend, No need to install manually, it will be auto installed in below steps |
| torch | == 2.7.1 | Required for torch-npu and vllm |

Expand Down Expand Up @@ -80,19 +80,19 @@ source vllm-ascend-env/bin/activate
pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple attrs 'numpy<2.0.0' decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions

# Download and install the CANN package.
wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.2.RC1/Ascend-cann-toolkit_8.2.RC1_linux-"$(uname -i)".run
chmod +x ./Ascend-cann-toolkit_8.2.RC1_linux-"$(uname -i)".run
./Ascend-cann-toolkit_8.2.RC1_linux-"$(uname -i)".run --full
# https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-kernels-910b_8.2.rc1_linux-aarch64.run
wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run
chmod +x ./Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run
./Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run --full
# https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-kernels-910b_8.3.rc1_linux-aarch64.run

source /usr/local/Ascend/ascend-toolkit/set_env.sh
wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.2.RC1/Ascend-cann-kernels-910b_8.2.RC1_linux-"$(uname -i)".run
chmod +x ./Ascend-cann-kernels-910b_8.2.RC1_linux-"$(uname -i)".run
./Ascend-cann-kernels-910b_8.2.RC1_linux-"$(uname -i)".run --install
wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run
chmod +x ./Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run
./Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run --install

wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.2.RC1/Ascend-cann-nnal_8.2.RC1_linux-"$(uname -i)".run
chmod +x ./Ascend-cann-nnal_8.2.RC1_linux-"$(uname -i)".run
./Ascend-cann-nnal_8.2.RC1_linux-"$(uname -i)".run --install
wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run
chmod +x ./Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run
./Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run --install

source /usr/local/Ascend/nnal/atb/set_env.sh
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

* Software:
* Python >= 3.9, < 3.12
* CANN >= 8.2.rc1
* CANN >= 8.3.rc1
* PyTorch == 2.7.1, torch-npu == 2.7.1
* vLLM (same version as vllm-ascend)
* mooncake-transfer-engine reference documentation: https://github.com/kvcache-ai/Mooncake/blob/main/doc/zh/ascend_transport.md
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

* Software:
* Python >= 3.9, < 3.12
* CANN >= 8.2.rc1
* CANN >= 8.3.rc1
* PyTorch == 2.7.1, torch-npu == 2.7.1
* vLLM:main branch
* vLLM-Ascend:main branch
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ spec:
spec:
containers:
- name: vllm-leader
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11") }}
env:
- name: CONFIG_YAML_PATH
value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
Expand Down Expand Up @@ -75,7 +75,7 @@ spec:
spec:
containers:
- name: vllm-worker
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11") }}
env:
- name: CONFIG_YAML_PATH
value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/vllm_interface/vllm_test.cfg
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Base docker image used to build the vllm-ascend e2e test image, which is built in the vLLM repository
BASE_IMAGE_NAME="quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11"
BASE_IMAGE_NAME="quay.io/ascend/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
47 changes: 4 additions & 43 deletions tests/ut/attention/test_attention_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,10 @@ def test_get_attn_mask(self):
attn_mask = attention_mask_builder.get_attn_mask(
max_seq_len=2048, dtype=torch.float16, device=torch.device("cpu"))
self.assertEqual(attn_mask.shape, (2048, 2048))
self.assertEqual(attn_mask[0][-1],
torch.tensor(float("-inf"), dtype=torch.float16))
self.assertEqual(attention_mask_builder._seq_len_cached, 2048)
self.assertEqual(attn_mask[0][-1], torch.tensor(True))
self.assertEqual(attention_mask_builder._seq_len_cached, 1024)
self.assertEqual(attention_mask_builder.attn_mask_cache.shape,
(2048, 2048))
(1024, 1024))
self.assertEqual(attention_mask_builder.attn_mask_cache[0][-1],
torch.tensor(float("-inf"), dtype=torch.float16))

Expand All @@ -91,43 +90,5 @@ def test_get_splitfuse_attn_mask(self):
dtype=torch.float16,
device=torch.device("cpu"),
)
self.assertEqual(attn_mask.shape, (6, 100))
self.assertEqual(attn_mask.shape, (2048, 2048))
self.assertEqual(attention_mask_builder._seq_len_cached, 1024)

attn_mask = attention_mask_builder.get_splitfuse_attn_mask(
seq_lens=torch.tensor([10, 3000, 2000]),
position=torch.tensor([7, 8, 9, 2999, 1999]),
dtype=torch.float16,
device=torch.device("cpu"),
)
self.assertEqual(attn_mask.shape, (5, 3000))
self.assertEqual(attention_mask_builder._seq_len_cached, 3000)

# splitfuse_attn_mask now only supports data types: torch.float16 and torch.bfloat16
# otherwise raise ValueError
with self.assertRaises(ValueError):
attn_mask = attention_mask_builder.get_splitfuse_attn_mask(
seq_lens=torch.tensor([10, 20, 100]),
position=torch.tensor([7, 8, 9, 18, 19, 99]),
dtype=torch.int8,
device=torch.device("cpu"),
)

def test_mask_value_cleanliness(self):
attention_mask_builder = AttentionMaskBuilder(max_seq_len=6,
dtype=torch.bfloat16)
self.assertEqual(attention_mask_builder.attn_mask_cache[-2][-1],
torch.tensor(1, dtype=torch.bfloat16))

attn_mask = attention_mask_builder.get_splitfuse_attn_mask(
seq_lens=torch.tensor([6]),
position=torch.tensor([3, 4, 5]),
dtype=torch.bfloat16,
device=torch.device("cpu"),
)
self.assertEqual(
attn_mask[-2][-1],
torch.tensor(-10000, dtype=torch.bfloat16,
device=attn_mask.device))
self.assertEqual(attention_mask_builder.attn_mask_cache[-2][-1],
torch.tensor(1, dtype=torch.bfloat16))
Loading
Loading