Skip to content

Commit a6f8422

Browse files
committed
add acc test
Signed-off-by: MrZ20 <2609716663@qq.com>
1 parent 14ca1e5 commit a6f8422

File tree

7 files changed

+75
-2
lines changed

7 files changed

+75
-2
lines changed

.github/workflows/accuracy_test.yaml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,23 @@ jobs:
5353
model_name: Qwen2-Audio-7B-Instruct
5454
- runner: a2-2
5555
model_name: Qwen3-30B-A3B
56-
- runner: a2-2
57-
model_name: Qwen3-VL-30B-A3B-Instruct
56+
# This model has a bug that needs to be fixed and re added
57+
# - runner: a2-2
58+
# model_name: Qwen3-VL-30B-A3B-Instruct
5859
- runner: a2-2
5960
model_name: DeepSeek-V2-Lite
6061
- runner: a2-4
6162
model_name: Qwen3-Next-80B-A3B-Instruct
63+
- runner: a2-1
64+
model_name: Qwen3-VL-8B-Instruct
65+
- runner: a2-1
66+
model_name: Qwen2.5-Omni-7B
67+
- runner: a2-1
68+
model_name: Meta-Llama-3.1-8B-Instruct
69+
- runner: a2-2
70+
model_name: ERNIE-4.5-21B-A3B-PT
71+
- runner: a2-1
72+
model_name: Mistral-7B-Instruct-v0.1
6273
fail-fast: false
6374
# test will be triggered when tag 'accuracy-test' & 'ready-for-test'
6475
if: >-
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
model_name: "PaddlePaddle/ERNIE-4.5-21B-A3B-PT"
2+
tasks:
3+
- name: "gsm8k"
4+
metrics:
5+
- name: "exact_match,flexible-extract"
6+
value: 0.72
7+
- name: "ceval-valid"
8+
metrics:
9+
- name: "acc,none"
10+
value: 0.85
11+
num_fewshot: 5
12+
tensor_parallel_size: 2
13+
batch_size: 16
14+
gpu_memory_utilization: 0.6
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
model_name: "LLM-Research/Meta-Llama-3.1-8B-Instruct"
2+
hardware: "Atlas A2 Series"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.82
8+
- name: "exact_match,flexible-extract"
9+
value: 0.84
10+
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
model_name: "AI-ModelScope/Mistral-7B-Instruct-v0.1"
2+
runner: "linux-aarch64-a2-1"
3+
hardware: "Atlas A2 Series"
4+
tasks:
5+
- name: "gsm8k"
6+
metrics:
7+
- name: "exact_match,strict-match"
8+
value: 0.35
9+
- name: "exact_match,flexible-extract"
10+
value: 0.38
11+
trust_remote_code: True
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
model_name: "Qwen/Qwen2.5-Omni-7B"
2+
hardware: "Atlas A2 Series"
3+
model: "vllm-vlm"
4+
tasks:
5+
- name: "mmmu_val"
6+
metrics:
7+
- name: "acc,none"
8+
value: 0.52
9+
max_model_len: 8192
10+
gpu_memory_utilization: 0.7
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
model_name: "Qwen/Qwen3-VL-8B-Instruct"
2+
hardware: "Atlas A2 Series"
3+
model: "vllm-vlm"
4+
tasks:
5+
- name: "mmmu_val"
6+
metrics:
7+
- name: "acc,none"
8+
value: 0.55
9+
max_model_len: 8192
10+
batch_size: 32
11+
gpu_memory_utilization: 0.7

tests/e2e/models/configs/accuracy.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,8 @@ Qwen2-7B.yaml
66
Qwen2-VL-7B-Instruct.yaml
77
Qwen2-Audio-7B-Instruct.yaml
88
Qwen3-VL-30B-A3B-Instruct.yaml
9+
Qwen3-VL-8B-Instruct.yaml
10+
Qwen2.5-Omni-7B.yaml
11+
Meta-Llama-3.1-8B-Instruct.yaml
12+
ERNIE-4.5-21B-A3B-PT.yaml
13+
Mistral-7B-Instruct-v0.1.yaml

0 commit comments

Comments
 (0)