Skip to content

Commit f846bd2

Browse files
authored
[CI] Add multi-node test case for a2 (#3805)
### What this PR does / why we need it? This patch add multi-node test case for a2 ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: vllm-project/vllm@c9461e0 --------- Signed-off-by: wangli <wangli858794774@gmail.com>
1 parent 9030106 commit f846bd2

File tree

5 files changed

+97
-8
lines changed

5 files changed

+97
-8
lines changed

.github/workflows/_e2e_nightly_multi_node.yaml

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ on:
77
required: true
88
type: string
99
description: use a2 or a3
10+
runner:
11+
required: false
12+
type: string
13+
default: linux-aarch64-a3-0
1014
image:
1115
required: false
1216
type: string
@@ -62,7 +66,7 @@ concurrency:
6266
jobs:
6367
e2e:
6468
# This is a runner with no NPU for k8s controller
65-
runs-on: linux-aarch64-a3-0
69+
runs-on: ${{ inputs.runner }}
6670
container:
6771
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
6872
env:
@@ -90,8 +94,7 @@ jobs:
9094
kubectl version --client=true
9195
9296
# TODO: Add A2 tests
93-
- name: Setup kubeconfig for A3
94-
if: inputs.soc_version == 'a3'
97+
- name: Decode kubeconfig from secrets
9598
run: |
9699
# Decode and save kubeconfig
97100
echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
@@ -129,6 +132,12 @@ jobs:
129132
fi
130133
done
131134
135+
if [ "${{ inputs.soc_version }}" = "a3" ]; then
136+
npu_per_node=16
137+
else
138+
npu_per_node=8
139+
fi
140+
132141
jinja2 tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 \
133142
-D size="$size" \
134143
-D replicas="$replicas" \
@@ -138,6 +147,7 @@ jobs:
138147
-D vllm_ascend_remote_url="$vllm_ascend_remote_url" \
139148
-D vllm_ascend_ref="$vllm_ascend_ref" \
140149
-D result_file_path="$result_file_path" \
150+
-D npu_per_node="$npu_per_node" \
141151
--outfile lws.yaml
142152
143153
kubectl apply -f ./lws.yaml

.github/workflows/vllm_ascend_test_nightly_a2.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,25 @@ jobs:
6161
vllm: v0.11.0
6262
runner: ${{ matrix.test_config.os }}
6363
tests: ${{ matrix.test_config.tests }}
64+
65+
multi-node-tests:
66+
needs: single-node-tests
67+
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
68+
strategy:
69+
fail-fast: false
70+
max-parallel: 1
71+
matrix:
72+
test_config:
73+
- name: multi-node-deepseek-dp
74+
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml
75+
size: 2
76+
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
77+
with:
78+
soc_version: a2
79+
runner: linux-aarch64-a2-0
80+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
81+
replicas: 1
82+
size: ${{ matrix.test_config.size }}
83+
config_file_path: ${{ matrix.test_config.config_file_path }}
84+
secrets:
85+
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_A2_B64 }}

.github/workflows/vllm_ascend_test_nightly_a3.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,10 @@ jobs:
104104
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
105105
with:
106106
soc_version: a3
107+
runner: linux-aarch64-a3-0
107108
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
108109
replicas: 1
109110
size: ${{ matrix.test_config.size }}
110111
config_file_path: ${{ matrix.test_config.config_file_path }}
111112
secrets:
112113
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}
113-
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
test_name: "test DeepSeek-R1-W8A8 on A2"
2+
model: "vllm-ascend/DeepSeek-R1-0528-W8A8"
3+
num_nodes: 2
4+
npu_per_node: 8
5+
env_common:
6+
VLLM_USE_MODELSCOPE: true
7+
HCCL_BUFFSIZE: 1024
8+
SERVER_PORT: 8080
9+
OMP_PROC_BIND: false
10+
OMP_NUM_THREADS: 10
11+
12+
13+
deployment:
14+
-
15+
server_cmd: >
16+
vllm serve vllm-ascend/DeepSeek-R1-0528-W8A8
17+
--host 0.0.0.0
18+
--port $SERVER_PORT
19+
--data-parallel-size 4
20+
--data-parallel-size-local 2
21+
--data-parallel-address $LOCAL_IP
22+
--data-parallel-rpc-port 13399
23+
--no-enable-prefix-caching
24+
--max-num-seqs 16
25+
--tensor-parallel-size 4
26+
--max-model-len 36864
27+
--max-num-batched-tokens 6000
28+
--enable-expert-parallel
29+
--trust-remote-code
30+
--quantization ascend
31+
--gpu-memory-utilization 0.9
32+
--enforce-eager
33+
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' \
34+
--additional-config '{"ascend_scheduler_config":{"enabled":false},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
35+
36+
-
37+
server_cmd: >
38+
vllm serve vllm-ascend/DeepSeek-R1-0528-W8A8
39+
--headless
40+
--data-parallel-size 4
41+
--data-parallel-rpc-port 13399
42+
--data-parallel-size-local 2
43+
--data-parallel-start-rank 2
44+
--data-parallel-address $MASTER_IP
45+
--no-enable-prefix-caching
46+
--max-num-seqs 16
47+
--tensor-parallel-size 4
48+
--max-model-len 36864
49+
--max-num-batched-tokens 6000
50+
--enable-expert-parallel
51+
--trust-remote-code
52+
--quantization ascend
53+
--gpu-memory-utilization 0.9
54+
--enforce-eager
55+
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' \
56+
--additional-config '{"ascend_scheduler_config":{"enabled":false},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
57+
benchmarks:

tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ spec:
3737
bash /root/.cache/tests/run.sh
3838
resources:
3939
limits:
40-
huawei.com/ascend-1980: "16"
40+
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
4141
memory: 512Gi
4242
ephemeral-storage: 100Gi
4343
requests:
44-
huawei.com/ascend-1980: "16"
44+
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
4545
ephemeral-storage: 100Gi
4646
cpu: 125
4747
ports:
@@ -95,11 +95,11 @@ spec:
9595
bash /root/.cache/tests/run.sh
9696
resources:
9797
limits:
98-
huawei.com/ascend-1980: "16"
98+
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
9999
memory: 512Gi
100100
ephemeral-storage: 100Gi
101101
requests:
102-
huawei.com/ascend-1980: "16"
102+
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
103103
ephemeral-storage: 100Gi
104104
cpu: 125
105105
volumeMounts:

0 commit comments

Comments
 (0)