[Benchmarks] add RR benchmarks with test

lslusarczyk · lslusarczyk · commit 518f3b8fab43 · 2025-10-28T10:39:51.000+01:00
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
@@ -269,6 +269,56 @@ def benchmarks(self) -> list[Benchmark]:
                 )
             )
 
+        record_and_reply_params = product(list(PROFILERS), [0, 1], [0, 1])
+        for profiler_type, emulate, instantiate in record_and_reply_params:
+            benches += [
+                RecordAndReplay(
+                    self,
+                    RUNTIMES.LEVEL_ZERO,
+                    profiler_type,
+                    nForksInLvl=2,
+                    nLvls=4,
+                    nCmdSetsInLvl=10,
+                    nInstantiations=10,
+                    nAppendKern=10,
+                    nAppendCopy=1,
+                    mRec=1,
+                    mInst=instantiate,
+                    mDest=0,
+                    emulate=emulate,
+                ),
+                RecordAndReplay(
+                    self,
+                    RUNTIMES.LEVEL_ZERO,
+                    profiler_type,
+                    nForksInLvl=1,
+                    nLvls=1,
+                    nCmdSetsInLvl=10,
+                    nInstantiations=10,
+                    nAppendKern=10,
+                    nAppendCopy=10,
+                    mRec=1,
+                    mInst=instantiate,
+                    mDest=0,
+                    emulate=emulate,
+                ),
+                RecordAndReplay(
+                    self,
+                    RUNTIMES.LEVEL_ZERO,
+                    profiler_type,
+                    nForksInLvl=1,
+                    nLvls=4,
+                    nCmdSetsInLvl=1,
+                    nInstantiations=0,
+                    nAppendKern=1,
+                    nAppendCopy=0,
+                    mRec=1,
+                    mInst=instantiate,
+                    mDest=0,
+                    emulate=emulate,
+                ),
+            ]
+
         # Add UR-specific benchmarks
         benches += [
             # TODO: multithread_benchmark_ur fails with segfault
@@ -647,6 +697,46 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
         ]
 
 
+class RecordAndReplay(ComputeBenchmark):
+    def __init__(self, bench, runtime: RUNTIMES, profiler_type, **kwargs):
+        self.rr_params = kwargs
+        self.iterations_regular = 1000
+        self.iterations_trace = 10
+        super().__init__(
+            bench,
+            f"record_and_replay_benchmark_{runtime.value}",
+            "RecordGraph",
+            runtime,
+            profiler_type,
+        )
+
+    def name(self):
+        ret = [self.profiler_type.value]
+        for k, v in self.rr_params.items():
+            if k[0] == "n":  # numeric parameter
+                ret.append(f"{k[1:]} {v}")
+            elif k[0] == "m":
+                if v != 0:  # measure parameter
+                    ret.append(f"{k[1:]}")
+            else:  # boolean parameter
+                if v != 0:
+                    ret.append(k)
+        ret.sort()
+        return f"{self.bench_name} {self.test} " + ", ".join(ret)
+
+    def display_name(self) -> str:
+        return self.name()
+
+    def description(self) -> str:
+        return f"{self.runtime.value} Graphs record and reply"
+
+    def get_tags(self):
+        return ["L0"]
+
+    def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
+        return [f"--{k}={v}" for k, v in self.rr_params.items()]
+
+
 class QueueInOrderMemcpy(ComputeBenchmark):
     def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type):
         self.isCopyOnly = isCopyOnly
diff --git a/devops/scripts/benchmarks/git_project.py b/devops/scripts/benchmarks/git_project.py
@@ -2,7 +2,7 @@
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
+import os
 from pathlib import Path
 import shutil
 
@@ -167,6 +167,11 @@ def _setup_repo(self) -> bool:
         Returns:
             bool: True if the repository was cloned or updated, False if it was already up-to-date.
         """
+        if os.environ.get("LLVM_BENCHMARKS_UNIT_TESTING") == "1":
+            log.debug(
+                f"Skipping git operations during unit testing of {self._name} (LLVM_BENCHMARKS_UNIT_TESTING=1)."
+            )
+            return False
         if not self.src_dir.exists():
             self._git_clone()
             return True
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
@@ -137,10 +137,13 @@ def process_results(
             stddev_threshold_override
             if stddev_threshold_override is not None
             else options.stddev_threshold
-        ) * mean_value
+        )
+        threshold_scaled = threshold * mean_value
 
-        if stddev > threshold:
-            log.warning(f"stddev {stddev} above the threshold {threshold} for {label}")
+        if stddev > threshold_scaled:
+            log.warning(
+                f"stddev {stddev} above the threshold {threshold_scaled} ({threshold} times {mean_value}) for {label}"
+            )
             valid_results = False
 
         rlist.sort(key=lambda res: res.value)
@@ -228,6 +231,9 @@ def main(directory, additional_env_vars, compare_names, filter):
             benchmark for benchmark in s.benchmarks() if benchmark.enabled()
         ]
         if filter:
+            log.debug(
+                f"Filtering {len(suite_benchmarks)} benchmarks in {s.name()} suite for {filter.pattern}"
+            )
             suite_benchmarks = [
                 benchmark
                 for benchmark in suite_benchmarks
@@ -713,6 +719,7 @@ def validate_and_parse_env_args(env_args):
     options.dry_run = args.dry_run
     options.umf = args.umf
     options.iterations_stddev = args.iterations_stddev
+    options.stddev_threshold = args.stddev_threshold
     options.build_igc = args.build_igc
     options.current_run_name = args.relative_perf
     options.cudnn_directory = args.cudnn_directory
diff --git a/devops/scripts/benchmarks/tests/test_integration.py b/devops/scripts/benchmarks/tests/test_integration.py
@@ -0,0 +1,146 @@
+import os
+import shutil
+import unittest
+import logging
+
+import subprocess
+import json
+from collections import namedtuple
+
+DataJson = namedtuple("DataJson", ["runs", "metadata", "tags", "names"])
+DataJsonRun = namedtuple("DataJsonRun", ["name", "results"])
+DataJsonResult = namedtuple(
+    "DataJsonResult", ["name", "label", "suite", "value", "unit"]
+)
+
+
+class App:
+    def __init__(self):
+        self.TMP_DIR = os.path.dirname(__file__)
+        self.OUTPUT_DIR = os.path.join(self.TMP_DIR, "tmp-output")
+        self.RESULTS_DIR = os.path.join(self.TMP_DIR, "tmp-results")
+        self.WORKDIR_DIR = os.path.join(self.TMP_DIR, "tmp-workdir")
+
+    def prepare_dirs(self):
+        for d in [self.RESULTS_DIR, self.OUTPUT_DIR, self.WORKDIR_DIR]:
+            os.makedirs(d)
+
+        # when UT does not want to build compute-benchmarks from scratch, it can provide prebuilt path
+        cb_targetpath = os.environ.get("COMPUTE_BENCHMARKS_BUILD_PATH")
+        if cb_targetpath and os.path.isdir(cb_targetpath):
+            cb_build_dir = os.path.join(self.WORKDIR_DIR, "compute-benchmarks-build")
+            os.symlink(cb_targetpath, cb_build_dir)
+            with open(
+                os.path.join(self.WORKDIR_DIR, "BENCH_WORKDIR_VERSION"), "w"
+            ) as f:
+                f.write("2.0")  # TODO: take from main.INTERNAL_WORKDIR_VERSION
+
+    def remove_dirs(self):
+        for d in [self.RESULTS_DIR, self.OUTPUT_DIR, self.WORKDIR_DIR]:
+            if os.path.exists(d):
+                shutil.rmtree(d)
+
+    def run_main(self, *args):
+
+        # TODO: not yet tested: "--detect-version", "sycl,compute_runtime"
+
+        return subprocess.run(
+            [
+                "./devops/scripts/benchmarks/main.py",
+                self.WORKDIR_DIR,
+                "--sycl",
+                os.environ.get("ONEAPI_ROOT"),
+                "--adapter",
+                "opencl",
+                "--save",
+                "testplik",
+                "--output-html",
+                "remote",
+                "--results-dir",
+                self.RESULTS_DIR,
+                "--output-dir",
+                self.OUTPUT_DIR,
+                "--preset",
+                "Minimal",
+                "--timestamp-override",
+                "20240102_030405",
+                "--stddev-threshold",
+                "999999999.9",
+                "--exit-on-failure",
+                *args,
+            ]
+        )
+
+    def get_output(self):
+        output_file = os.path.join(self.OUTPUT_DIR, "data.json")
+        with open(output_file) as f:
+            out = json.load(f)
+            return DataJson(
+                runs=[
+                    DataJsonRun(
+                        name=run["name"],
+                        results=[
+                            DataJsonResult(
+                                name=r["name"],
+                                label=r["label"],
+                                suite=r["suite"],
+                                value=r["value"],
+                                unit=r["unit"],
+                            )
+                            for r in run["results"]
+                        ],
+                    )
+                    for run in out["benchmarkRuns"]
+                ],
+                metadata=out["benchmarkMetadata"],
+                tags=out["benchmarkTags"],
+                names=out["defaultCompareNames"],
+            )
+
+
+# add "--verbose" for debug logs
+
+
+class TestE2E(unittest.TestCase):
+    def setUp(self):
+        # Load test data
+        self.app = App()
+        self.app.remove_dirs()
+        self.app.prepare_dirs()
+
+        # clean directory with input, output
+
+    def tearDown(self):
+        self.app.remove_dirs()
+
+    def test_record_and_replay(self):
+        caseName = "RecordGraph AppendCopy 1, AppendKern 10, CmdSetsInLvl 10, ForksInLvl 2, Instantiations 10, Lvls 4, Rec, timer"
+        run_result = self.app.run_main("--filter", caseName)
+        self.assertEqual(run_result.returncode, 0, "Subprocess did not exit cleanly")
+
+        out = self.app.get_output()
+
+        testName = "record_and_replay_benchmark_l0 " + caseName
+        self.assertIn(testName, [r.name for r in out.runs[0].results])
+
+        metadata = out.metadata[testName]
+        self.assertEqual(metadata["type"], "benchmark")
+        self.assertEqual(set(metadata["tags"]), {"L0"})
+
+    def test_submit_kernel(self):
+        caseName = "SubmitKernel out of order with measure completion KernelExecTime=20"
+        run_result = self.app.run_main("--filter", caseName + "$")
+        self.assertEqual(run_result.returncode, 0, "Subprocess did not exit cleanly")
+
+        out = self.app.get_output()
+
+        testName = "api_overhead_benchmark_l0 " + caseName
+        self.assertIn(testName, [r.name for r in out.runs[0].results])
+
+        metadata = out.metadata[testName]
+        self.assertEqual(metadata["type"], "benchmark")
+        self.assertEqual(set(metadata["tags"]), {"L0", "latency", "micro", "submit"})
+
+
+if __name__ == "__main__":
+    unittest.main()