File tree Expand file tree Collapse file tree 4 files changed +785
-18
lines changed
site-src/performance/benchmark Expand file tree Collapse file tree 4 files changed +785
-18
lines changed Original file line number Diff line number Diff line change 1+ job :
2+ image :
3+ repository : quay.io/inference-perf/inference-perf
4+ tag : " " # Defaults to .Chart.AppVersion
5+ nodeSelector : {}
6+ # Example resources:
7+ # resources:
8+ # requests:
9+ # cpu: "1"
10+ # memory: "4Gi"
11+ # limits:
12+ # cpu: "2"
13+ # memory: "8Gi"
14+ resources : {}
15+
16+ logLevel : INFO
17+
18+ # A GCS bucket path that points to the dataset file.
19+ # The file will be copied from this path to the local file system
20+ # at /dataset/dataset.json for use during the run.
21+ # NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/dataset.json.
22+ gcsPath : " "
23+
24+ # hfToken optionally creates a secret with the specified token.
25+ # Can be set using helm install --set hftoken=<token>
26+ hfToken : " "
27+
28+ config :
29+ load :
30+ type : constant
31+ interval : 15
32+ stages :
33+ - rate : 10
34+ duration : 20
35+ - rate : 20
36+ duration : 20
37+ - rate : 30
38+ duration : 20
39+ api :
40+ type : completion
41+ streaming : true
42+ server :
43+ type : vllm
44+ model_name : meta-llama/Llama-3.1-8B-Instruct
45+ base_url : http://0.0.0.0:8000
46+ ignore_eos : true
47+ tokenizer :
48+ pretrained_model_name_or_path : meta-llama/Llama-3.1-8B-Instruct
49+ data :
50+ type : shareGPT
51+ metrics :
52+ type : prometheus
53+ prometheus :
54+ google_managed : true
55+ report :
56+ request_lifecycle :
57+ summary : true
58+ per_stage : true
59+ per_request : true
60+ prometheus :
61+ summary : true
62+ per_stage : true
You can’t perform that action at this time.
0 commit comments