File tree Expand file tree Collapse file tree 7 files changed +75
-2
lines changed Expand file tree Collapse file tree 7 files changed +75
-2
lines changed Original file line number Diff line number Diff line change @@ -53,12 +53,23 @@ jobs:
5353 model_name : Qwen2-Audio-7B-Instruct
5454 - runner : a2-2
5555 model_name : Qwen3-30B-A3B
56- - runner : a2-2
57- model_name : Qwen3-VL-30B-A3B-Instruct
56+ # This model has a bug that needs to be fixed and re added
57+ # - runner: a2-2
58+ # model_name: Qwen3-VL-30B-A3B-Instruct
5859 - runner : a2-2
5960 model_name : DeepSeek-V2-Lite
6061 - runner : a2-4
6162 model_name : Qwen3-Next-80B-A3B-Instruct
63+ - runner : a2-1
64+ model_name : Qwen3-VL-8B-Instruct
65+ - runner : a2-1
66+ model_name : Qwen2.5-Omni-7B
67+ - runner : a2-1
68+ model_name : Meta-Llama-3.1-8B-Instruct
69+ - runner : a2-2
70+ model_name : ERNIE-4.5-21B-A3B-PT
71+ - runner : a2-1
72+ model_name : Mistral-7B-Instruct-v0.1
6273 fail-fast : false
6374 # test will be triggered when tag 'accuracy-test' & 'ready-for-test'
6475 if : >-
Original file line number Diff line number Diff line change 1+ model_name : " PaddlePaddle/ERNIE-4.5-21B-A3B-PT"
2+ tasks :
3+ - name : " gsm8k"
4+ metrics :
5+ - name : " exact_match,flexible-extract"
6+ value : 0.72
7+ - name : " ceval-valid"
8+ metrics :
9+ - name : " acc,none"
10+ value : 0.85
11+ num_fewshot : 5
12+ tensor_parallel_size : 2
13+ batch_size : 16
14+ gpu_memory_utilization : 0.6
Original file line number Diff line number Diff line change 1+ model_name : " LLM-Research/Meta-Llama-3.1-8B-Instruct"
2+ hardware : " Atlas A2 Series"
3+ tasks :
4+ - name : " gsm8k"
5+ metrics :
6+ - name : " exact_match,strict-match"
7+ value : 0.82
8+ - name : " exact_match,flexible-extract"
9+ value : 0.84
10+
11+ num_fewshot : 5
Original file line number Diff line number Diff line change 1+ model_name : " AI-ModelScope/Mistral-7B-Instruct-v0.1"
2+ runner : " linux-aarch64-a2-1"
3+ hardware : " Atlas A2 Series"
4+ tasks :
5+ - name : " gsm8k"
6+ metrics :
7+ - name : " exact_match,strict-match"
8+ value : 0.35
9+ - name : " exact_match,flexible-extract"
10+ value : 0.38
11+ trust_remote_code : True
Original file line number Diff line number Diff line change 1+ model_name : " Qwen/Qwen2.5-Omni-7B"
2+ hardware : " Atlas A2 Series"
3+ model : " vllm-vlm"
4+ tasks :
5+ - name : " mmmu_val"
6+ metrics :
7+ - name : " acc,none"
8+ value : 0.52
9+ max_model_len : 8192
10+ gpu_memory_utilization : 0.7
Original file line number Diff line number Diff line change 1+ model_name : " Qwen/Qwen3-VL-8B-Instruct"
2+ hardware : " Atlas A2 Series"
3+ model : " vllm-vlm"
4+ tasks :
5+ - name : " mmmu_val"
6+ metrics :
7+ - name : " acc,none"
8+ value : 0.55
9+ max_model_len : 8192
10+ batch_size : 32
11+ gpu_memory_utilization : 0.7
Original file line number Diff line number Diff line change @@ -6,3 +6,8 @@ Qwen2-7B.yaml
66Qwen2-VL-7B-Instruct.yaml
77Qwen2-Audio-7B-Instruct.yaml
88Qwen3-VL-30B-A3B-Instruct.yaml
9+ Qwen3-VL-8B-Instruct.yaml
10+ Qwen2.5-Omni-7B.yaml
11+ Meta-Llama-3.1-8B-Instruct.yaml
12+ ERNIE-4.5-21B-A3B-PT.yaml
13+ Mistral-7B-Instruct-v0.1.yaml
You can’t perform that action at this time.
0 commit comments