vllm-project · yiz-liu · Nov 3, 2025 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml
@@ -74,8 +74,8 @@ jobs:
           PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
         if: ${{ inputs.type == 'light' }}
         run: |
-          pytest -sv tests/e2e/singlecard/test_aclgraph.py
-          pytest -sv tests/e2e/singlecard/test_quantization.py
+          # pytest -sv tests/e2e/singlecard/test_aclgraph.py
+          # pytest -sv tests/e2e/singlecard/test_quantization.py
           pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
 
       - name: Run e2e test
@@ -171,13 +171,15 @@ jobs:
         if: ${{ inputs.type == 'light' }}
         run: |
           pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
+          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_deepseekv2lite_with_torchair
 
       - name: Run vllm-project/vllm-ascend test (full)
         env:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
           VLLM_USE_MODELSCOPE: True
         if: ${{ inputs.type == 'full' }}
         run: |
+          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
           pytest -sv tests/e2e/multicard/test_data_parallel.py
           pytest -sv tests/e2e/multicard/test_expert_parallel.py
           # pytest -sv tests/e2e/multicard/test_external_launcher.py
@@ -199,4 +201,4 @@ jobs:
           pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
           pytest -sv tests/e2e/multicard/test_prefix_caching.py
           pytest -sv tests/e2e/multicard/test_qwen3_moe.py
-          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
+
diff --git a/tests/e2e/multicard/test_torchair_graph_mode.py b/tests/e2e/multicard/test_torchair_graph_mode.py
@@ -225,3 +225,66 @@ def test_e2e_qwen2_with_torchair():
 
 def test_e2e_qwen3_moe_with_torchair():
     _qwen_torchair_test_fixture("Qwen/Qwen3-30B-A3B", 2, True)
+
+
+# test deepseek-v2-lite
+def _deepseek_v2_lite_torchair_test_fixure(
+    additional_config: Dict,
+    *,
+    tensor_parallel_size=2,
+    use_v1_schduler=False,
+):
+    example_prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+        "The future of AI is",
+    ]
+
+    kwargs = {}
+    if not use_v1_schduler:
+        kwargs = {
+            "ascend_scheduler_config": {
+                "enable": True,
+            },
+            "refresh": True,
+        }
+    additional_config.update(**kwargs)
+
+    with VllmRunner(
+            "deepseek-ai/DeepSeek-V2-Lite",
+            dtype="half",
+            tensor_parallel_size=tensor_parallel_size,
+            distributed_executor_backend="mp",
+            additional_config=additional_config,
+    ) as vllm_model:
+        vllm_output = vllm_model.generate_greedy(example_prompts, 5)
+
+    # NOTE: deepseek-ai/DeepSeek-V2-Lite is a random weight of
+    # DeepSeek-V2-Lite with 2 hidden layers, thus the golden results seems
+    # inaccurate. This will only change if accuracy improves with the
+    # official weights of DeepSeek-V2-Lite.
+
+    for i in range(len(vllm_output)):
+        generated_text = vllm_output[i][1]
+        assert len(
+            generated_text.strip()) > 0, f"The {i}-th output is null, failed"
+
+
+def test_e2e_deepseekv2lite_with_torchair():
+    additional_config = {
+        "torchair_graph_config": {
+            "enabled": True,
+        },
+    }
+    _deepseek_v2_lite_torchair_test_fixure(additional_config)
+
+
+def test_e2e_deepseekv2lite_with_torchair_v1scheduler():
+    additional_config = {
+        "torchair_graph_config": {
+            "enabled": True,
+        },
+    }
+    _deepseek_v2_lite_torchair_test_fixure(additional_config,
+                                           use_v1_schduler=True)