diff --git a/docs/conf.py b/docs/conf.py index 35fa7a00..7453ac9b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -95,7 +95,6 @@ "triton.language", "numpy", "iris._distributed_helpers", - "iris.hip", ] # Napoleon settings for Google/NumPy docstring parsing diff --git a/docs/reference/api-hip-module.md b/docs/reference/api-hip-module.md new file mode 100644 index 00000000..3724a8c5 --- /dev/null +++ b/docs/reference/api-hip-module.md @@ -0,0 +1,18 @@ +# HIP Module API + +Low-level HIP runtime integration for AMD GPU device management and memory operations. + +This module provides public APIs for querying device attributes. + +## Device Attributes + +### get_wall_clock_rate +```{eval-rst} +.. autofunction:: iris.hip.get_wall_clock_rate +``` + +### get_num_xcc +```{eval-rst} +.. autofunction:: iris.hip.get_num_xcc +``` + diff --git a/docs/reference/api-reference.md b/docs/reference/api-reference.md index 5aab4e30..6460a44e 100644 --- a/docs/reference/api-reference.md +++ b/docs/reference/api-reference.md @@ -5,10 +5,12 @@ Explore Iris APIs. The reference is broken down into focused sections to mirror - The `Iris` class itself (constructor and helper utilities) - Tensor-like creation methods on the `Iris` context - Triton device-side functions for remote memory ops and atomics +- HIP runtime integration for low-level device management Use the links below to navigate: - [Iris Class (ctor & helpers)](api-iris-class.md) - [Tensor Creation](api-tensor-creation.md) - [Triton Device Functions](api-device-functions.md) +- [HIP Module](api-hip-module.md) diff --git a/docs/sphinx/_toc.yml b/docs/sphinx/_toc.yml index 5b4cdead..a3761d77 100644 --- a/docs/sphinx/_toc.yml +++ b/docs/sphinx/_toc.yml @@ -15,3 +15,4 @@ subtrees: - file: reference/api-iris-class.md - file: reference/api-tensor-creation.md - file: reference/api-device-functions.md + - file: reference/api-hip-module.md diff --git a/iris/hip.py b/iris/hip.py index 89807860..454c002c 100644 --- a/iris/hip.py +++ b/iris/hip.py @@ -1,6 +1,28 @@ # SPDX-License-Identifier: MIT # Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +""" +HIP Runtime Integration Module + +This module provides low-level HIP runtime integration for AMD GPUs, +offering Python bindings to essential HIP runtime functions through ctypes. +It enables device management, memory operations, and inter-process communication +for multi-GPU programming. + +Key Features: +- Device enumeration and management +- IPC (Inter-Process Communication) memory handles +- Device attribute queries (compute units, architecture, XCC count) +- Fine-grained and coarse-grained memory allocation +- ROCm version detection + +Example: + >>> import iris.hip as hip + >>> num_devices = hip.count_devices() + >>> hip.set_device(0) + >>> cu_count = hip.get_cu_count() +""" + import ctypes import numpy as np import sys @@ -182,6 +204,22 @@ def get_rocm_version(): def get_wall_clock_rate(device_id): + """ + Get the wall clock rate (GPU clock frequency) for a HIP device. + + Args: + device_id (int): The device ID to query. + + Returns: + int: The wall clock rate in kHz. + + Raises: + RuntimeError: If the HIP runtime call fails. + + Example: + >>> clock_rate = get_wall_clock_rate(0) + >>> print(f"GPU clock rate: {clock_rate} kHz") + """ wall_clock_rate = ctypes.c_int() if _is_amd_backend: @@ -212,6 +250,26 @@ def get_arch_string(device_id=None): def get_num_xcc(device_id=None): + """ + Get the number of XCCs (Compute Dies) for a HIP device. + + XCC (Accelerated Compute Core) refers to the compute dies in MI300 series GPUs. + For ROCm versions before 7.0, returns a default value of 8. + For CUDA/NVIDIA devices, returns 1 as XCC is AMD-specific. + + Args: + device_id (int, optional): The device ID to query. If None, uses the current device. + + Returns: + int: The number of XCCs on the device. + + Raises: + RuntimeError: If the HIP runtime call fails. + + Example: + >>> xcc_count = get_num_xcc() + >>> print(f"Number of XCCs: {xcc_count}") + """ if device_id is None: device_id = get_device_id()