Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .github/workflows/_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ jobs:
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
if: ${{ inputs.type == 'light' }}
run: |
pytest -sv tests/e2e/singlecard/test_aclgraph.py
pytest -sv tests/e2e/singlecard/test_quantization.py
# pytest -sv tests/e2e/singlecard/test_aclgraph.py
# pytest -sv tests/e2e/singlecard/test_quantization.py
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl

- name: Run e2e test
Expand Down Expand Up @@ -171,13 +171,15 @@ jobs:
if: ${{ inputs.type == 'light' }}
run: |
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_deepseekv2lite_with_torchair

- name: Run vllm-project/vllm-ascend test (full)
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
if: ${{ inputs.type == 'full' }}
run: |
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
pytest -sv tests/e2e/multicard/test_data_parallel.py
pytest -sv tests/e2e/multicard/test_expert_parallel.py
# pytest -sv tests/e2e/multicard/test_external_launcher.py
Expand All @@ -199,4 +201,4 @@ jobs:
pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
pytest -sv tests/e2e/multicard/test_prefix_caching.py
pytest -sv tests/e2e/multicard/test_qwen3_moe.py
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py

63 changes: 63 additions & 0 deletions tests/e2e/multicard/test_torchair_graph_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,3 +225,66 @@ def test_e2e_qwen2_with_torchair():

def test_e2e_qwen3_moe_with_torchair():
_qwen_torchair_test_fixture("Qwen/Qwen3-30B-A3B", 2, True)


# test deepseek-v2-lite
def _deepseek_v2_lite_torchair_test_fixure(
additional_config: Dict,
*,
tensor_parallel_size=2,
use_v1_schduler=False,
):
example_prompts = [
"Hello, my name is",
"The president of the United States is",
"The capital of France is",
"The future of AI is",
]

kwargs = {}
if not use_v1_schduler:
kwargs = {
"ascend_scheduler_config": {
"enable": True,
},
"refresh": True,
}
additional_config.update(**kwargs)

with VllmRunner(
"deepseek-ai/DeepSeek-V2-Lite",
dtype="half",
tensor_parallel_size=tensor_parallel_size,
distributed_executor_backend="mp",
additional_config=additional_config,
) as vllm_model:
vllm_output = vllm_model.generate_greedy(example_prompts, 5)

# NOTE: deepseek-ai/DeepSeek-V2-Lite is a random weight of
# DeepSeek-V2-Lite with 2 hidden layers, thus the golden results seems
# inaccurate. This will only change if accuracy improves with the
# official weights of DeepSeek-V2-Lite.

for i in range(len(vllm_output)):
generated_text = vllm_output[i][1]
assert len(
generated_text.strip()) > 0, f"The {i}-th output is null, failed"


def test_e2e_deepseekv2lite_with_torchair():
additional_config = {
"torchair_graph_config": {
"enabled": True,
},
}
_deepseek_v2_lite_torchair_test_fixure(additional_config)


def test_e2e_deepseekv2lite_with_torchair_v1scheduler():
additional_config = {
"torchair_graph_config": {
"enabled": True,
},
}
_deepseek_v2_lite_torchair_test_fixure(additional_config,
use_v1_schduler=True)
Loading