vllm-project
diff --git a/‎src/guidellm/__main__.py‎
Lines changed: 29 additions & 6 deletions b/‎src/guidellm/__main__.py‎
Lines changed: 29 additions & 6 deletions
diff --git a/‎src/guidellm/benchmark/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/guidellm/benchmark/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/guidellm/benchmark/benchmarker.py‎
Lines changed: 23 additions & 23 deletions b/‎src/guidellm/benchmark/benchmarker.py‎
Lines changed: 23 additions & 23 deletions
diff --git a/‎src/guidellm/benchmark/entrypoints.py‎
Lines changed: 35 additions & 3 deletions b/‎src/guidellm/benchmark/entrypoints.py‎
Lines changed: 35 additions & 3 deletions
@@ -291,22 +291,45 @@ def benchmark():
     "--warmup",
     "--warmup-percent",  # legacy alias
     "warmup",
-    type=float,
     default=BenchmarkGenerativeTextArgs.get_default("warmup"),
+    callback=cli_tools.parse_json,
     help=(
-        "Warmup specification: if in (0,1) = percent, if >=1 = number of "
-        "requests/seconds (depends on active constraint)."
+        "Warmup specification: int, float, or dict as string "
+        "(json or key=value). "
+        "Controls time or requests before measurement starts. "
+        "Numeric in (0, 1): percent of duration or request count. "
+        "Numeric >=1: duration in seconds or request count. "
+        "Advanced config: see TransientPhaseConfig schema."
     ),
 )
 @click.option(
     "--cooldown",
     "--cooldown-percent",  # legacy alias
     "cooldown",
-    type=float,
     default=BenchmarkGenerativeTextArgs.get_default("cooldown"),
+    callback=cli_tools.parse_json,
+    help=(
+        "Cooldown specification: int, float, or dict as string "
+        "(json or key=value). "
+        "Controls time or requests after measurement ends. "
+        "Numeric in (0, 1): percent of duration or request count. "
+        "Numeric >=1: duration in seconds or request count. "
+        "Advanced config: see TransientPhaseConfig schema."
+    ),
+)
+@click.option(
+    "--rampup",
+    default=BenchmarkGenerativeTextArgs.get_default("rampup"),
+    callback=cli_tools.parse_json,
     help=(
-        "Cooldown specification: if in (0,1) = percent, if >=1 = number of "
-        "requests/seconds (depends on active constraint)."
+        "Rampup specification: int, float, or dict as string "
+        "(json or key=value). "
+        "Controls time to linearly ramp up requests. "
+        "Only for Throughput/Concurrent strategies, "
+        "not Synchronous/Rate-based. "
+        "Numeric in (0, 1): percent of duration. "
+        "Numeric >=1: duration in seconds. "
+        "Advanced config: see TransientPhaseConfig schema."
     ),
 )
 @click.option(
 
@@ -18,7 +18,7 @@
     GenerativeBenchmarkerHTML,
     GenerativeBenchmarkerOutput,
 )
-from .profile import (
+from .profiles import (
     AsyncProfile,
     ConcurrentProfile,
     Profile,
 
@@ -1,11 +1,11 @@
 """
 Benchmark execution orchestration and lifecycle management.
 
-Provides the core benchmarking engine coordinating request scheduling,
-data aggregation, and result compilation across execution strategies
-and environments. The Benchmarker manages the complete benchmark lifecycle
-from request submission through result compilation while supporting
-thread-safe singleton operations for consistent state management.
+Provides the core benchmarking engine that coordinates request scheduling,
+data aggregation, and result compilation across execution strategies and
+environments. The Benchmarker manages the complete benchmark lifecycle from
+request submission through result compilation while implementing thread-safe
+singleton operations for consistent state management across concurrent workflows.
 """
 
 from __future__ import annotations
@@ -15,13 +15,14 @@
 from collections.abc import AsyncIterator, Iterable
 from typing import Generic
 
-from guidellm.benchmark.profile import Profile
+from guidellm.benchmark.profiles import Profile
 from guidellm.benchmark.progress import BenchmarkerProgress
 from guidellm.benchmark.schemas import (
     BenchmarkAccumulatorT,
     BenchmarkConfig,
     BenchmarkT,
 )
+from guidellm.benchmark.schemas.base import TransientPhaseConfig
 from guidellm.logger import logger
 from guidellm.scheduler import (
     BackendInterface,
@@ -45,13 +46,12 @@ class Benchmarker(
     ThreadSafeSingletonMixin,
 ):
     """
-    Abstract benchmark orchestrator for request processing workflows.
+    Orchestrates benchmark execution across scheduling strategies.
 
-    Coordinates benchmarking runs across scheduling strategies, aggregating
-    metrics and compiling results. Manages the complete benchmark lifecycle
-    from request submission through result compilation while implementing a
-    thread-safe singleton pattern for consistent state across concurrent
-    operations.
+    Coordinates benchmarking runs by managing request scheduling, metric aggregation,
+    and result compilation. Implements a thread-safe singleton pattern to ensure
+    consistent state management across concurrent operations while supporting multiple
+    scheduling strategies and execution environments.
     """
 
     async def run(
@@ -62,29 +62,30 @@ async def run(
         backend: BackendInterface[RequestT, ResponseT],
         profile: Profile,
         environment: Environment,
+        warmup: TransientPhaseConfig,
+        cooldown: TransientPhaseConfig,
+        sample_requests: int | None = 20,
+        prefer_response_metrics: bool = True,
         progress: (
             BenchmarkerProgress[BenchmarkAccumulatorT, BenchmarkT] | None
         ) = None,
-        sample_requests: int | None = 20,
-        warmup: float | None = None,
-        cooldown: float | None = None,
-        prefer_response_metrics: bool = True,
     ) -> AsyncIterator[BenchmarkT]:
         """
-        Execute benchmark runs across scheduling strategies defined in the profile.
+        Execute benchmark runs across scheduling strategies in the profile.
 
         :param accumulator_class: Class for accumulating metrics during execution
         :param benchmark_class: Class for constructing final benchmark results
         :param requests: Request datasets to process across strategies
         :param backend: Backend interface for executing requests
         :param profile: Profile defining scheduling strategies and constraints
         :param environment: Environment for execution coordination
-        :param progress: Optional tracker for benchmark lifecycle events
-        :param sample_requests: Number of requests to sample for estimation
-        :param warmup: Warmup duration in seconds before benchmarking
-        :param cooldown: Cooldown duration in seconds after benchmarking
+        :param warmup: Warmup phase configuration before benchmarking
+        :param cooldown: Cooldown phase configuration after benchmarking
+        :param sample_requests: Number of requests to sample for estimation,
+            defaults to 20
         :param prefer_response_metrics: Whether to prefer response metrics over
-            request metrics
+            request metrics, defaults to True
+        :param progress: Optional tracker for benchmark lifecycle events
         :yield: Compiled benchmark result for each strategy execution
         :raises Exception: If benchmark execution or compilation fails
         """
@@ -138,7 +139,6 @@ async def run(
                     requests=requests,
                     backend=backend,
                     strategy=strategy,
-                    startup_duration=warmup if warmup and warmup >= 1 else 0.0,
                     env=environment,
                     **constraints or {},
                 ):
 
@@ -22,14 +22,15 @@
 from guidellm.backends import Backend, BackendType
 from guidellm.benchmark.benchmarker import Benchmarker
 from guidellm.benchmark.outputs import GenerativeBenchmarkerOutput
-from guidellm.benchmark.profile import Profile, ProfileType
+from guidellm.benchmark.profiles import Profile, ProfileType
 from guidellm.benchmark.progress import GenerativeConsoleBenchmarkerProgress
 from guidellm.benchmark.schemas import (
     BenchmarkGenerativeTextArgs,
     GenerativeBenchmark,
     GenerativeBenchmarkAccumulator,
     GenerativeBenchmarksReport,
 )
+from guidellm.benchmark.schemas.base import TransientPhaseConfig
 from guidellm.data import (
     DataLoader,
     DatasetPreprocessor,
@@ -312,6 +313,7 @@ async def resolve_profile(
     profile: StrategyType | ProfileType | Profile,
     rate: list[float] | None,
     random_seed: int,
+    rampup: TransientPhaseConfig,
     constraints: MutableMapping[str, ConstraintInitializer | Any],
     max_seconds: int | float | None,
     max_requests: int | None,
@@ -330,6 +332,7 @@ async def resolve_profile(
     :param profile: Profile type identifier or pre-configured Profile instance
     :param rate: Request rate(s) for the benchmark execution
     :param random_seed: Seed for reproducible random operations
+    :param rampup: Ramp-up phase configuration for the benchmark execution
     :param constraints: Dictionary of constraint initializers for benchmark limits
     :param max_seconds: Maximum duration in seconds for the benchmark
     :param max_requests: Maximum number of requests to process
@@ -355,18 +358,28 @@ async def resolve_profile(
     }.items():
         if val is not None:
             constraints[key] = val
+    rampup_duration, _ = rampup.compute_limits(
+        max_requests=max_requests, max_seconds=max_seconds
+    )
+
     if not isinstance(profile, Profile):
         profile = Profile.create(
             rate_type=profile,
             rate=rate,
             random_seed=random_seed,
+            rampup_duration=rampup_duration or 0.0,
             constraints={**constraints},
         )
     elif constraints:
         raise ValueError(
             "Constraints must be empty when providing a Profile instance. "
             f"Provided constraints: {constraints} ; provided profile: {profile}"
         )
+    elif rampup_duration is not None:
+        raise ValueError(
+            "Ramp-up duration must not be set when providing a Profile instance. "
+            f"Provided rampup: {rampup} ; provided profile: {profile}"
+        )
 
     if console_step:
         console_step.finish(
@@ -459,10 +472,29 @@ async def benchmark_generative_text(
         console=console,
         **(args.dataloader_kwargs or {}),
     )
+
+    rampup = TransientPhaseConfig.create_from_value(args.rampup)
+    rampup.mode = "duration"
+    warmup = TransientPhaseConfig.create_from_value(args.warmup)
+    cooldown = TransientPhaseConfig.create_from_value(args.cooldown)
+    if console:
+        console.print_update(
+            title="Resolved transient phase configurations",
+            details="\n".join(
+                [
+                    f"Rampup: {rampup}",
+                    f"Warmup: {warmup}",
+                    f"Cooldown: {cooldown}",
+                ]
+            ),
+            status="success",
+        )
+
     profile = await resolve_profile(
         profile=args.profile,
         rate=args.rate,
         random_seed=args.random_seed,
+        rampup=rampup,
         constraints=constraints,
         max_seconds=args.max_seconds,
         max_requests=args.max_requests,
@@ -496,8 +528,8 @@ async def benchmark_generative_text(
         environment=NonDistributedEnvironment(),
         progress=progress,
         sample_requests=args.sample_requests,
-        warmup=args.warmup,
-        cooldown=args.cooldown,
+        warmup=warmup,
+        cooldown=cooldown,
         prefer_response_metrics=args.prefer_response_metrics,
     ):
         if benchmark:
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@`
`18`	`18`	`GenerativeBenchmarkerHTML,`
`19`	`19`	`GenerativeBenchmarkerOutput,`
`20`	`20`	`)`
`21`		`-from .profile import (`
	`21`	`+from .profiles import (`
`22`	`22`	`AsyncProfile,`
`23`	`23`	`ConcurrentProfile,`
`24`	`24`	`Profile,`