From 020578fd99b6f38239deed23a10b16490994e36f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 9 Oct 2025 06:42:42 +0000 Subject: [PATCH 1/8] Initial plan From 16ce1d8de8c148c125d627238e98bf190c74641e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 9 Oct 2025 06:49:29 +0000 Subject: [PATCH 2/8] Add preamble function to reset locks in examples 10 and 11 Co-authored-by: neoblizz <9790745+neoblizz@users.noreply.github.com> --- .../10_gemm_all_scatter_wg_specialization/benchmark.py | 7 ++++++- .../11_gemm_all_scatter_producer_consumer/benchmark.py | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/examples/10_gemm_all_scatter_wg_specialization/benchmark.py b/examples/10_gemm_all_scatter_wg_specialization/benchmark.py index bb49bacb..515acd40 100755 --- a/examples/10_gemm_all_scatter_wg_specialization/benchmark.py +++ b/examples/10_gemm_all_scatter_wg_specialization/benchmark.py @@ -135,6 +135,11 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict): # Allocate Timestamps timestamps = Timestamps(num_tiles=total_tiles) + def preamble(): + shmem.barrier() + locks.zero_() + shmem.barrier() + def run_experiment(): nonlocal local_C nonlocal global_C @@ -222,7 +227,7 @@ def run_experiment(): matmul.set_debug(False) shmem.info("Benchmarking...") perf = lambda ms: 2 * args["M"] * args["N"] * args["K"] * 1e-12 / (ms * 1e-3) - triton_ms = iris.do_bench(run_experiment, shmem.barrier) + triton_ms = iris.do_bench(run_experiment, shmem.barrier, preamble) triton_tflops = perf(triton_ms) algo_string = "all_scatter" shmem.info( diff --git a/examples/11_gemm_all_scatter_producer_consumer/benchmark.py b/examples/11_gemm_all_scatter_producer_consumer/benchmark.py index 8059c26f..264d5f79 100755 --- a/examples/11_gemm_all_scatter_producer_consumer/benchmark.py +++ b/examples/11_gemm_all_scatter_producer_consumer/benchmark.py @@ -144,6 +144,11 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict): # Allocate Timestamps timestamps = Timestamps(num_tiles=total_tiles) + def preamble(): + shmem.barrier() + locks.zero_() + shmem.barrier() + def run_experiment(): nonlocal C nonlocal kernel_timing @@ -253,7 +258,7 @@ def run_experiment(): matmul.set_debug(False) shmem.info("Benchmarking...") perf = lambda ms: 2 * args["M"] * args["N"] * args["K"] * 1e-12 / (ms * 1e-3) - triton_ms = iris.do_bench(run_experiment, shmem.barrier) + triton_ms = iris.do_bench(run_experiment, shmem.barrier, preamble) triton_tflops = perf(triton_ms) algo_string = "all_scatter" shmem.info( From 8d15de7602af8c0f51beb25c3621420d13aa742f Mon Sep 17 00:00:00 2001 From: Muhammad Awad <112003944+mawad-amd@users.noreply.github.com> Date: Thu, 9 Oct 2025 15:28:07 -0700 Subject: [PATCH 3/8] Use proper synchronization instead of loads and stores (#215) --- examples/10_gemm_all_scatter_wg_specialization/benchmark.py | 2 +- .../gemm_all_scatter_wg_specialization.py | 5 ++--- examples/11_gemm_all_scatter_producer_consumer/benchmark.py | 2 +- .../gemm_all_scatter_producer_consumer.py | 5 ++--- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/examples/10_gemm_all_scatter_wg_specialization/benchmark.py b/examples/10_gemm_all_scatter_wg_specialization/benchmark.py index af44fd8a..1b8417dc 100755 --- a/examples/10_gemm_all_scatter_wg_specialization/benchmark.py +++ b/examples/10_gemm_all_scatter_wg_specialization/benchmark.py @@ -136,7 +136,7 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict): total_blocks_N = triton.cdiv(args["n"], args["BLK_N"]) total_tiles = total_blocks_M * total_blocks_N - locks = shmem.zeros((total_tiles,), device="cuda", dtype=torch.int8) + locks = shmem.zeros((total_tiles,), device="cuda", dtype=torch.int32) bias = None diff --git a/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py b/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py index ac2d2e35..cf6b1ca5 100644 --- a/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py +++ b/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py @@ -142,8 +142,7 @@ def persistent_gemm_all_scatter_wg_specialization( tl.atomic_max(mm_end_timestamp_ptr + tile_id, timestamp) tl.store(c_global + global_offset, c, mask=sub_mask, cache_modifier=".wt") - tl.debug_barrier() - tl.store(locks + tile_id, 1, cache_modifier=".wt") + tl.atomic_xchg(locks + tile_id, 1, sem="release", scope="gpu") else: # pid >= GEMM_SMS COMM_SMS = NUM_SMS - GEMM_SMS @@ -165,7 +164,7 @@ def persistent_gemm_all_scatter_wg_specialization( global_offset = rm[:, None] * stride_cm_global + (rn[None, :] + cur_rank * N) * stride_cn_global # End: masks/offset calculations. - while tl.load(locks + tile_id, cache_modifier=".cv", volatile=True) != 1: + while tl.atomic_xchg(locks + tile_id, 0, sem="acquire", scope="gpu") != 1: pass for remote_rank in range(world_size): diff --git a/examples/11_gemm_all_scatter_producer_consumer/benchmark.py b/examples/11_gemm_all_scatter_producer_consumer/benchmark.py index 6b92c728..561a37dc 100755 --- a/examples/11_gemm_all_scatter_producer_consumer/benchmark.py +++ b/examples/11_gemm_all_scatter_producer_consumer/benchmark.py @@ -136,7 +136,7 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict): total_blocks_N = triton.cdiv(args["n"], args["BLK_N"]) total_tiles = total_blocks_M * total_blocks_N - locks = shmem.zeros((total_tiles,), device="cuda", dtype=torch.int8) + locks = shmem.zeros((total_tiles,), device="cuda", dtype=torch.int32) bias = None diff --git a/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py b/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py index a8311943..87d7f675 100644 --- a/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py +++ b/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py @@ -133,8 +133,7 @@ def persistent_gemm( tl.atomic_max(mm_end_timestamp_ptr + tile_id, timestamp) tl.store(C + global_offset, c, mask=sub_mask, cache_modifier=".wt") - tl.debug_barrier() - tl.store(locks + tile_id, 1, cache_modifier=".wt") + tl.atomic_xchg(locks + tile_id, 1, sem="release", scope="gpu") @triton.jit() @@ -185,7 +184,7 @@ def persistent_all_scatter( global_offset = rm[:, None] * stride_cm_global + (rn[None, :] + cur_rank * N) * stride_cn_global # End: masks/offset calculations. - while tl.load(locks + tile_id, cache_modifier=".cv", volatile=True) != 1: + while tl.atomic_xchg(locks + tile_id, 0, sem="acquire", scope="gpu") != 1: pass for remote_rank in range(world_size): From a5ec30c5edaa67c3013ccad2f632a1fdf20a6df9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Oct 2025 01:01:26 +0000 Subject: [PATCH 4/8] Fix atomic operations: use atomic_cas for non-destructive polling Co-authored-by: neoblizz <9790745+neoblizz@users.noreply.github.com> --- .../gemm_all_scatter_wg_specialization.py | 2 +- .../gemm_all_scatter_producer_consumer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py b/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py index cf6b1ca5..643768f7 100644 --- a/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py +++ b/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py @@ -164,7 +164,7 @@ def persistent_gemm_all_scatter_wg_specialization( global_offset = rm[:, None] * stride_cm_global + (rn[None, :] + cur_rank * N) * stride_cn_global # End: masks/offset calculations. - while tl.atomic_xchg(locks + tile_id, 0, sem="acquire", scope="gpu") != 1: + while tl.atomic_cas(locks + tile_id, 0, 0, sem="acquire", scope="gpu") != 1: pass for remote_rank in range(world_size): diff --git a/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py b/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py index 87d7f675..51bd0bd8 100644 --- a/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py +++ b/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py @@ -184,7 +184,7 @@ def persistent_all_scatter( global_offset = rm[:, None] * stride_cm_global + (rn[None, :] + cur_rank * N) * stride_cn_global # End: masks/offset calculations. - while tl.atomic_xchg(locks + tile_id, 0, sem="acquire", scope="gpu") != 1: + while tl.atomic_cas(locks + tile_id, 0, 0, sem="acquire", scope="gpu") != 1: pass for remote_rank in range(world_size): From f32e69a5254ffd00bb4058a935b034fae8b6e05b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Oct 2025 03:32:55 +0000 Subject: [PATCH 5/8] Fix atomic_cas polling: use (1, 0) to atomically consume the flag Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- .../gemm_all_scatter_wg_specialization.py | 3 ++- .../gemm_all_scatter_producer_consumer.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py b/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py index 643768f7..5652a4ec 100644 --- a/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py +++ b/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py @@ -164,7 +164,8 @@ def persistent_gemm_all_scatter_wg_specialization( global_offset = rm[:, None] * stride_cm_global + (rn[None, :] + cur_rank * N) * stride_cn_global # End: masks/offset calculations. - while tl.atomic_cas(locks + tile_id, 0, 0, sem="acquire", scope="gpu") != 1: + # Poll until lock is set to 1, then atomically reset it to 0 + while tl.atomic_cas(locks + tile_id, 1, 0, sem="acquire", scope="gpu") != 1: pass for remote_rank in range(world_size): diff --git a/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py b/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py index 51bd0bd8..b101c85b 100644 --- a/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py +++ b/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py @@ -184,7 +184,8 @@ def persistent_all_scatter( global_offset = rm[:, None] * stride_cm_global + (rn[None, :] + cur_rank * N) * stride_cn_global # End: masks/offset calculations. - while tl.atomic_cas(locks + tile_id, 0, 0, sem="acquire", scope="gpu") != 1: + # Poll until lock is set to 1, then atomically reset it to 0 + while tl.atomic_cas(locks + tile_id, 1, 0, sem="acquire", scope="gpu") != 1: pass for remote_rank in range(world_size): From 7479b4be49b4e03dd4b1c8635d64de916b42f316 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Oct 2025 04:30:15 +0000 Subject: [PATCH 6/8] Use volatile load spin loop followed by atomic_cas with dependency Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- .../gemm_all_scatter_wg_specialization.py | 9 ++++++--- .../gemm_all_scatter_producer_consumer.py | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py b/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py index 5652a4ec..aac520da 100644 --- a/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py +++ b/examples/10_gemm_all_scatter_wg_specialization/gemm_all_scatter_wg_specialization.py @@ -164,9 +164,12 @@ def persistent_gemm_all_scatter_wg_specialization( global_offset = rm[:, None] * stride_cm_global + (rn[None, :] + cur_rank * N) * stride_cn_global # End: masks/offset calculations. - # Poll until lock is set to 1, then atomically reset it to 0 - while tl.atomic_cas(locks + tile_id, 1, 0, sem="acquire", scope="gpu") != 1: - pass + # Spin on volatile load until flag is non-zero (cheap) + flag_val = tl.load(locks + tile_id, cache_modifier=".cv", volatile=True) + while flag_val == 0: + flag_val = tl.load(locks + tile_id, cache_modifier=".cv", volatile=True) + # Use atomic_cas with dependency on loaded value to prevent reordering + tl.atomic_cas(locks + tile_id, flag_val, 0, sem="acquire", scope="gpu") for remote_rank in range(world_size): if remote_rank != cur_rank: diff --git a/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py b/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py index b101c85b..3620f061 100644 --- a/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py +++ b/examples/11_gemm_all_scatter_producer_consumer/gemm_all_scatter_producer_consumer.py @@ -184,9 +184,12 @@ def persistent_all_scatter( global_offset = rm[:, None] * stride_cm_global + (rn[None, :] + cur_rank * N) * stride_cn_global # End: masks/offset calculations. - # Poll until lock is set to 1, then atomically reset it to 0 - while tl.atomic_cas(locks + tile_id, 1, 0, sem="acquire", scope="gpu") != 1: - pass + # Spin on volatile load until flag is non-zero (cheap) + flag_val = tl.load(locks + tile_id, cache_modifier=".cv", volatile=True) + while flag_val == 0: + flag_val = tl.load(locks + tile_id, cache_modifier=".cv", volatile=True) + # Use atomic_cas with dependency on loaded value to prevent reordering + tl.atomic_cas(locks + tile_id, flag_val, 0, sem="acquire", scope="gpu") for remote_rank in range(world_size): if remote_rank != cur_rank: From c66903c3006d485020a5c7b51626b13e34554a67 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Oct 2025 05:29:44 +0000 Subject: [PATCH 7/8] Add port cleanup to CI tests to prevent lingering connections Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- .github/scripts/run_perf_benchmark.sh | 15 ++++++++++++++- .github/scripts/run_tests.sh | 12 ++++++++++++ .../workflows/iris-external-validation-test.yml | 5 ++++- tests/run_tests_distributed.py | 5 +++++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/.github/scripts/run_perf_benchmark.sh b/.github/scripts/run_perf_benchmark.sh index 7be18d84..ba3b6a26 100755 --- a/.github/scripts/run_perf_benchmark.sh +++ b/.github/scripts/run_perf_benchmark.sh @@ -10,6 +10,20 @@ BENCHMARK_ARGS="$@" # Create overlay image in workspace (will be auto-cleaned by GitHub Actions) OVERLAY="iris_overlay_perf_${EXAMPLE_PATH//\//_}.img" +# Cleanup function +cleanup() { + echo "Cleaning up processes and ports..." + # Kill any lingering Python processes from this benchmark + pkill -9 -f "benchmark.py" 2>/dev/null || true + # Give the system time to release ports + sleep 1 + # Cleanup overlay image + rm -f "${OVERLAY}" 2>/dev/null || true +} + +# Set trap to ensure cleanup happens on exit (success or failure) +trap cleanup EXIT INT TERM + echo "::group::Creating overlay image" apptainer overlay create --size 1024 --create-dir /var/cache/iris "${OVERLAY}" echo "::endgroup::" @@ -60,4 +74,3 @@ fi echo "✅ Performance test passed! TFLOPs: $TFLOPS (threshold: >$TFLOPS_THRESHOLD)" echo "::endgroup::" - diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh index fd7b9388..041fcd26 100755 --- a/.github/scripts/run_tests.sh +++ b/.github/scripts/run_tests.sh @@ -13,6 +13,18 @@ if [ -z "$NUM_RANKS" ]; then exit 1 fi +# Function to cleanup ports and processes on exit +cleanup() { + echo "Cleaning up ports and processes..." + # Kill any lingering Python processes from this test session + pkill -9 -f "run_tests_distributed.py" 2>/dev/null || true + # Give the system time to release ports + sleep 1 +} + +# Set trap to ensure cleanup happens on exit (success or failure) +trap cleanup EXIT INT TERM + # Run examples tests one at a time using distributed wrapper echo 'Running examples tests one at a time...' for test_file in tests/examples/test_*.py; do diff --git a/.github/workflows/iris-external-validation-test.yml b/.github/workflows/iris-external-validation-test.yml index 57c904d3..69a16303 100644 --- a/.github/workflows/iris-external-validation-test.yml +++ b/.github/workflows/iris-external-validation-test.yml @@ -67,7 +67,10 @@ jobs: set -e pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }} wget -O test_iris_distributed.py https://gist.githubusercontent.com/mawad-amd/6375dc078e39e256828f379e03310ec7/raw/a527c3192bee4615292769e340b1c73676f6945a/test_iris_distributed.py - python test_iris_distributed.py + python test_iris_distributed.py || { echo 'Test failed, cleaning up...'; pkill -9 -f python 2>/dev/null || true; exit 1; } + # Ensure cleanup of any lingering processes + pkill -9 -f python 2>/dev/null || true + sleep 1 " echo "::endgroup::" diff --git a/tests/run_tests_distributed.py b/tests/run_tests_distributed.py index e3254556..bc267fb7 100755 --- a/tests/run_tests_distributed.py +++ b/tests/run_tests_distributed.py @@ -110,6 +110,11 @@ def main(): except Exception: # Any other unhandled exception = failure sys.exit(1) + finally: + # Ensure cleanup: wait briefly to allow port to be released + import time + + time.sleep(0.5) if __name__ == "__main__": From 7d7179b72b1970a0a2ac59f631e2bd7165836fd9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Oct 2025 05:34:14 +0000 Subject: [PATCH 8/8] Revert "Add port cleanup to CI tests to prevent lingering connections" Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- .github/scripts/run_perf_benchmark.sh | 15 +-------------- .github/scripts/run_tests.sh | 12 ------------ .../workflows/iris-external-validation-test.yml | 5 +---- tests/run_tests_distributed.py | 5 ----- 4 files changed, 2 insertions(+), 35 deletions(-) diff --git a/.github/scripts/run_perf_benchmark.sh b/.github/scripts/run_perf_benchmark.sh index ba3b6a26..7be18d84 100755 --- a/.github/scripts/run_perf_benchmark.sh +++ b/.github/scripts/run_perf_benchmark.sh @@ -10,20 +10,6 @@ BENCHMARK_ARGS="$@" # Create overlay image in workspace (will be auto-cleaned by GitHub Actions) OVERLAY="iris_overlay_perf_${EXAMPLE_PATH//\//_}.img" -# Cleanup function -cleanup() { - echo "Cleaning up processes and ports..." - # Kill any lingering Python processes from this benchmark - pkill -9 -f "benchmark.py" 2>/dev/null || true - # Give the system time to release ports - sleep 1 - # Cleanup overlay image - rm -f "${OVERLAY}" 2>/dev/null || true -} - -# Set trap to ensure cleanup happens on exit (success or failure) -trap cleanup EXIT INT TERM - echo "::group::Creating overlay image" apptainer overlay create --size 1024 --create-dir /var/cache/iris "${OVERLAY}" echo "::endgroup::" @@ -74,3 +60,4 @@ fi echo "✅ Performance test passed! TFLOPs: $TFLOPS (threshold: >$TFLOPS_THRESHOLD)" echo "::endgroup::" + diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh index 041fcd26..fd7b9388 100755 --- a/.github/scripts/run_tests.sh +++ b/.github/scripts/run_tests.sh @@ -13,18 +13,6 @@ if [ -z "$NUM_RANKS" ]; then exit 1 fi -# Function to cleanup ports and processes on exit -cleanup() { - echo "Cleaning up ports and processes..." - # Kill any lingering Python processes from this test session - pkill -9 -f "run_tests_distributed.py" 2>/dev/null || true - # Give the system time to release ports - sleep 1 -} - -# Set trap to ensure cleanup happens on exit (success or failure) -trap cleanup EXIT INT TERM - # Run examples tests one at a time using distributed wrapper echo 'Running examples tests one at a time...' for test_file in tests/examples/test_*.py; do diff --git a/.github/workflows/iris-external-validation-test.yml b/.github/workflows/iris-external-validation-test.yml index 69a16303..57c904d3 100644 --- a/.github/workflows/iris-external-validation-test.yml +++ b/.github/workflows/iris-external-validation-test.yml @@ -67,10 +67,7 @@ jobs: set -e pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }} wget -O test_iris_distributed.py https://gist.githubusercontent.com/mawad-amd/6375dc078e39e256828f379e03310ec7/raw/a527c3192bee4615292769e340b1c73676f6945a/test_iris_distributed.py - python test_iris_distributed.py || { echo 'Test failed, cleaning up...'; pkill -9 -f python 2>/dev/null || true; exit 1; } - # Ensure cleanup of any lingering processes - pkill -9 -f python 2>/dev/null || true - sleep 1 + python test_iris_distributed.py " echo "::endgroup::" diff --git a/tests/run_tests_distributed.py b/tests/run_tests_distributed.py index bc267fb7..e3254556 100755 --- a/tests/run_tests_distributed.py +++ b/tests/run_tests_distributed.py @@ -110,11 +110,6 @@ def main(): except Exception: # Any other unhandled exception = failure sys.exit(1) - finally: - # Ensure cleanup: wait briefly to allow port to be released - import time - - time.sleep(0.5) if __name__ == "__main__":