diff --git a/benchmarks/nvidia-sdpa/Dockerfile b/benchmarks/nvidia-sdpa/Dockerfile new file mode 100644 index 0000000..db1d0f1 --- /dev/null +++ b/benchmarks/nvidia-sdpa/Dockerfile @@ -0,0 +1,22 @@ +FROM nvcr.io/nvidia/pytorch:25.09-py3 + +RUN pip install --upgrade pip && \ + pip install seaborn + +RUN apt-get update && \ + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + apt-get update && \ + apt-get -y install cudnn9-cuda-13 + +RUN pip uninstall -y cudnn + +COPY benchmark_bf16_sdpa.py . + +COPY benchmark_fp8_sdpa.py . + +COPY benchmark_single_sdpa.py . + +ENV LD_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/:$LD_LIBRARY_PATH + +WORKDIR /workspace diff --git a/benchmarks/nvidia-sdpa/README.md b/benchmarks/nvidia-sdpa/README.md new file mode 100644 index 0000000..eb119fd --- /dev/null +++ b/benchmarks/nvidia-sdpa/README.md @@ -0,0 +1,13 @@ +## Scaled Dot Product Attention Benchmark + +The upstream NVIDIA benchmark, which is part of the cudnn-frontend packages (found at https://github.com/NVIDIA/cudnn-frontend/tree/main/benchmark/sdpa_benchmark_training) is using x86_64 specific packages, which doesn't work on GB300 as Grace CPUs are arm (aarch64). + +In this repository you'll find a simple fixed Dockerfile which can be used on Nvidia Grace based systems. + +Steps: +1. Clone the repository + - `git clone https://github.com/NVIDIA/cudnn-frontend` +2. Replace the Dockerfile at `cudnn-frontend/benchmark/sdpa_benchmark_training/Dockerfile` with the one from this repo. +3. Follow the instructions as normal after this + - `docker build -t cudnn_attention_benchmark .` + - `docker run -it --gpus all --rm -v $(pwd):/workspace cudnn_attention_benchmark`