Skip to content

Commit e6af4b2

Browse files
committed
Rework how warmup, cooldown, and rampup works due to issues identified during testing and review
Signed-off-by: Mark Kurtz <mark.kurtz@neuralmagic.com>
1 parent 9514423 commit e6af4b2

File tree

23 files changed

+810
-405
lines changed

23 files changed

+810
-405
lines changed

src/guidellm/__main__.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -291,22 +291,45 @@ def benchmark():
291291
"--warmup",
292292
"--warmup-percent", # legacy alias
293293
"warmup",
294-
type=float,
295294
default=BenchmarkGenerativeTextArgs.get_default("warmup"),
295+
callback=cli_tools.parse_json,
296296
help=(
297-
"Warmup specification: if in (0,1) = percent, if >=1 = number of "
298-
"requests/seconds (depends on active constraint)."
297+
"Warmup specification: int, float, or dict as string "
298+
"(json or key=value). "
299+
"Controls time or requests before measurement starts. "
300+
"Numeric in (0, 1): percent of duration or request count. "
301+
"Numeric >=1: duration in seconds or request count. "
302+
"Advanced config: see TransientPhaseConfig schema."
299303
),
300304
)
301305
@click.option(
302306
"--cooldown",
303307
"--cooldown-percent", # legacy alias
304308
"cooldown",
305-
type=float,
306309
default=BenchmarkGenerativeTextArgs.get_default("cooldown"),
310+
callback=cli_tools.parse_json,
311+
help=(
312+
"Cooldown specification: int, float, or dict as string "
313+
"(json or key=value). "
314+
"Controls time or requests after measurement ends. "
315+
"Numeric in (0, 1): percent of duration or request count. "
316+
"Numeric >=1: duration in seconds or request count. "
317+
"Advanced config: see TransientPhaseConfig schema."
318+
),
319+
)
320+
@click.option(
321+
"--rampup",
322+
default=BenchmarkGenerativeTextArgs.get_default("rampup"),
323+
callback=cli_tools.parse_json,
307324
help=(
308-
"Cooldown specification: if in (0,1) = percent, if >=1 = number of "
309-
"requests/seconds (depends on active constraint)."
325+
"Rampup specification: int, float, or dict as string "
326+
"(json or key=value). "
327+
"Controls time to linearly ramp up requests. "
328+
"Only for Throughput/Concurrent strategies, "
329+
"not Synchronous/Rate-based. "
330+
"Numeric in (0, 1): percent of duration. "
331+
"Numeric >=1: duration in seconds. "
332+
"Advanced config: see TransientPhaseConfig schema."
310333
),
311334
)
312335
@click.option(

src/guidellm/benchmark/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
GenerativeBenchmarkerHTML,
1919
GenerativeBenchmarkerOutput,
2020
)
21-
from .profile import (
21+
from .profiles import (
2222
AsyncProfile,
2323
ConcurrentProfile,
2424
Profile,

src/guidellm/benchmark/benchmarker.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
"""
22
Benchmark execution orchestration and lifecycle management.
33
4-
Provides the core benchmarking engine coordinating request scheduling,
5-
data aggregation, and result compilation across execution strategies
6-
and environments. The Benchmarker manages the complete benchmark lifecycle
7-
from request submission through result compilation while supporting
8-
thread-safe singleton operations for consistent state management.
4+
Provides the core benchmarking engine that coordinates request scheduling,
5+
data aggregation, and result compilation across execution strategies and
6+
environments. The Benchmarker manages the complete benchmark lifecycle from
7+
request submission through result compilation while implementing thread-safe
8+
singleton operations for consistent state management across concurrent workflows.
99
"""
1010

1111
from __future__ import annotations
@@ -15,13 +15,14 @@
1515
from collections.abc import AsyncIterator, Iterable
1616
from typing import Generic
1717

18-
from guidellm.benchmark.profile import Profile
18+
from guidellm.benchmark.profiles import Profile
1919
from guidellm.benchmark.progress import BenchmarkerProgress
2020
from guidellm.benchmark.schemas import (
2121
BenchmarkAccumulatorT,
2222
BenchmarkConfig,
2323
BenchmarkT,
2424
)
25+
from guidellm.benchmark.schemas.base import TransientPhaseConfig
2526
from guidellm.logger import logger
2627
from guidellm.scheduler import (
2728
BackendInterface,
@@ -45,13 +46,12 @@ class Benchmarker(
4546
ThreadSafeSingletonMixin,
4647
):
4748
"""
48-
Abstract benchmark orchestrator for request processing workflows.
49+
Orchestrates benchmark execution across scheduling strategies.
4950
50-
Coordinates benchmarking runs across scheduling strategies, aggregating
51-
metrics and compiling results. Manages the complete benchmark lifecycle
52-
from request submission through result compilation while implementing a
53-
thread-safe singleton pattern for consistent state across concurrent
54-
operations.
51+
Coordinates benchmarking runs by managing request scheduling, metric aggregation,
52+
and result compilation. Implements a thread-safe singleton pattern to ensure
53+
consistent state management across concurrent operations while supporting multiple
54+
scheduling strategies and execution environments.
5555
"""
5656

5757
async def run(
@@ -62,29 +62,30 @@ async def run(
6262
backend: BackendInterface[RequestT, ResponseT],
6363
profile: Profile,
6464
environment: Environment,
65+
warmup: TransientPhaseConfig,
66+
cooldown: TransientPhaseConfig,
67+
sample_requests: int | None = 20,
68+
prefer_response_metrics: bool = True,
6569
progress: (
6670
BenchmarkerProgress[BenchmarkAccumulatorT, BenchmarkT] | None
6771
) = None,
68-
sample_requests: int | None = 20,
69-
warmup: float | None = None,
70-
cooldown: float | None = None,
71-
prefer_response_metrics: bool = True,
7272
) -> AsyncIterator[BenchmarkT]:
7373
"""
74-
Execute benchmark runs across scheduling strategies defined in the profile.
74+
Execute benchmark runs across scheduling strategies in the profile.
7575
7676
:param accumulator_class: Class for accumulating metrics during execution
7777
:param benchmark_class: Class for constructing final benchmark results
7878
:param requests: Request datasets to process across strategies
7979
:param backend: Backend interface for executing requests
8080
:param profile: Profile defining scheduling strategies and constraints
8181
:param environment: Environment for execution coordination
82-
:param progress: Optional tracker for benchmark lifecycle events
83-
:param sample_requests: Number of requests to sample for estimation
84-
:param warmup: Warmup duration in seconds before benchmarking
85-
:param cooldown: Cooldown duration in seconds after benchmarking
82+
:param warmup: Warmup phase configuration before benchmarking
83+
:param cooldown: Cooldown phase configuration after benchmarking
84+
:param sample_requests: Number of requests to sample for estimation,
85+
defaults to 20
8686
:param prefer_response_metrics: Whether to prefer response metrics over
87-
request metrics
87+
request metrics, defaults to True
88+
:param progress: Optional tracker for benchmark lifecycle events
8889
:yield: Compiled benchmark result for each strategy execution
8990
:raises Exception: If benchmark execution or compilation fails
9091
"""
@@ -138,7 +139,6 @@ async def run(
138139
requests=requests,
139140
backend=backend,
140141
strategy=strategy,
141-
startup_duration=warmup if warmup and warmup >= 1 else 0.0,
142142
env=environment,
143143
**constraints or {},
144144
):

src/guidellm/benchmark/entrypoints.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,15 @@
2222
from guidellm.backends import Backend, BackendType
2323
from guidellm.benchmark.benchmarker import Benchmarker
2424
from guidellm.benchmark.outputs import GenerativeBenchmarkerOutput
25-
from guidellm.benchmark.profile import Profile, ProfileType
25+
from guidellm.benchmark.profiles import Profile, ProfileType
2626
from guidellm.benchmark.progress import GenerativeConsoleBenchmarkerProgress
2727
from guidellm.benchmark.schemas import (
2828
BenchmarkGenerativeTextArgs,
2929
GenerativeBenchmark,
3030
GenerativeBenchmarkAccumulator,
3131
GenerativeBenchmarksReport,
3232
)
33+
from guidellm.benchmark.schemas.base import TransientPhaseConfig
3334
from guidellm.data import (
3435
DataLoader,
3536
DatasetPreprocessor,
@@ -312,6 +313,7 @@ async def resolve_profile(
312313
profile: StrategyType | ProfileType | Profile,
313314
rate: list[float] | None,
314315
random_seed: int,
316+
rampup: TransientPhaseConfig,
315317
constraints: MutableMapping[str, ConstraintInitializer | Any],
316318
max_seconds: int | float | None,
317319
max_requests: int | None,
@@ -330,6 +332,7 @@ async def resolve_profile(
330332
:param profile: Profile type identifier or pre-configured Profile instance
331333
:param rate: Request rate(s) for the benchmark execution
332334
:param random_seed: Seed for reproducible random operations
335+
:param rampup: Ramp-up phase configuration for the benchmark execution
333336
:param constraints: Dictionary of constraint initializers for benchmark limits
334337
:param max_seconds: Maximum duration in seconds for the benchmark
335338
:param max_requests: Maximum number of requests to process
@@ -355,18 +358,28 @@ async def resolve_profile(
355358
}.items():
356359
if val is not None:
357360
constraints[key] = val
361+
rampup_duration, _ = rampup.compute_limits(
362+
max_requests=max_requests, max_seconds=max_seconds
363+
)
364+
358365
if not isinstance(profile, Profile):
359366
profile = Profile.create(
360367
rate_type=profile,
361368
rate=rate,
362369
random_seed=random_seed,
370+
rampup_duration=rampup_duration or 0.0,
363371
constraints={**constraints},
364372
)
365373
elif constraints:
366374
raise ValueError(
367375
"Constraints must be empty when providing a Profile instance. "
368376
f"Provided constraints: {constraints} ; provided profile: {profile}"
369377
)
378+
elif rampup_duration is not None:
379+
raise ValueError(
380+
"Ramp-up duration must not be set when providing a Profile instance. "
381+
f"Provided rampup: {rampup} ; provided profile: {profile}"
382+
)
370383

371384
if console_step:
372385
console_step.finish(
@@ -459,10 +472,29 @@ async def benchmark_generative_text(
459472
console=console,
460473
**(args.dataloader_kwargs or {}),
461474
)
475+
476+
rampup = TransientPhaseConfig.create_from_value(args.rampup)
477+
rampup.mode = "duration"
478+
warmup = TransientPhaseConfig.create_from_value(args.warmup)
479+
cooldown = TransientPhaseConfig.create_from_value(args.cooldown)
480+
if console:
481+
console.print_update(
482+
title="Resolved transient phase configurations",
483+
details="\n".join(
484+
[
485+
f"Rampup: {rampup}",
486+
f"Warmup: {warmup}",
487+
f"Cooldown: {cooldown}",
488+
]
489+
),
490+
status="success",
491+
)
492+
462493
profile = await resolve_profile(
463494
profile=args.profile,
464495
rate=args.rate,
465496
random_seed=args.random_seed,
497+
rampup=rampup,
466498
constraints=constraints,
467499
max_seconds=args.max_seconds,
468500
max_requests=args.max_requests,
@@ -496,8 +528,8 @@ async def benchmark_generative_text(
496528
environment=NonDistributedEnvironment(),
497529
progress=progress,
498530
sample_requests=args.sample_requests,
499-
warmup=args.warmup,
500-
cooldown=args.cooldown,
531+
warmup=warmup,
532+
cooldown=cooldown,
501533
prefer_response_metrics=args.prefer_response_metrics,
502534
):
503535
if benchmark:

0 commit comments

Comments
 (0)