Skip to content

Commit 496eb5b

Browse files
Add doc-strings to KernelOccupancy methods.
1 parent e2adc57 commit 496eb5b

File tree

1 file changed

+73
-5
lines changed

1 file changed

+73
-5
lines changed

cuda_core/cuda/core/experimental/_module.py

Lines changed: 73 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,30 @@ def _init(cls, handle):
205205
return self
206206

207207
def max_active_blocks_per_multiprocessor(self, block_size: int, dynamic_shared_memory_size: int) -> int:
208-
"""int : Occupancy of the kernel"""
208+
"""Occupancy of the kernel.
209+
210+
Returns the maximum number of active blocks per multiprocessor for this kernel.
211+
212+
Parameters
213+
----------
214+
block_size: int
215+
Block size parameter used to launch this kernel.
216+
dynamic_shared_memory_size: int
217+
The amount of dynamic shared memory in bytes needed by block.
218+
Use `0` if block does not need shared memory.
219+
220+
Returns
221+
-------
222+
int
223+
The maximum number of active blocks per multiprocessor.
224+
225+
Note
226+
----
227+
The fraction of the product of maximum number of active blocks per multiprocessor
228+
and the block size to the maximum number of threads per multiprocessor is known as
229+
theoretical multiprocessor utilization (occupancy).
230+
231+
"""
209232
return handle_return(
210233
driver.cuOccupancyMaxActiveBlocksPerMultiprocessor(self._handle, block_size, dynamic_shared_memory_size)
211234
)
@@ -262,20 +285,65 @@ def max_potential_block_size(
262285
return MaxPotentialBlockSizeOccupancyResult(min_grid_size=min_grid_size, max_block_size=max_block_size)
263286

264287
def available_dynamic_shared_memory_per_block(self, num_blocks_per_multiprocessor: int, block_size: int) -> int:
265-
"""int: Dynamic shared memory available per block for given launch configuration."""
288+
"""Dynamic shared memory available per block for given launch configuration.
289+
290+
The amount of dynamic shared memory per block, in bytes, for given kernel launch configuration.
291+
292+
Parameter
293+
---------
294+
num_blocks_per_multiprocessor: int
295+
Number of blocks to be concurrently executing on a multiprocessor.
296+
block_size: int
297+
Block size parameter used to launch this kernel.
298+
299+
Returns
300+
-------
301+
int
302+
Dynamic shared memory available per block for given launch configuration.
303+
"""
266304
return handle_return(
267305
driver.cuOccupancyAvailableDynamicSMemPerBlock(self._handle, num_blocks_per_multiprocessor, block_size)
268306
)
269307

270308
def max_potential_cluster_size(self, config: LaunchConfig, stream: Optional[Stream] = None) -> int:
271-
""" "int: The maximum cluster size that can be launched for this kernel and launch configuration"""
309+
"""Maximum potential cluster size.
310+
311+
The maximum potential cluster size for this kernel and given launch configuration.
312+
313+
Parameters
314+
----------
315+
config: :obj:`~_launch_config.LaunchConfig`
316+
Kernel launch configuration. Cluster dimensions in the configuration are ignored.
317+
stream: :obj:`~Stream`, optional
318+
The stream on which this kernel is to be launched.
319+
320+
Returns
321+
-------
322+
int
323+
The maximum cluster size that can be launched for this kernel and launch configuration.
324+
"""
272325
drv_cfg = _to_native_launch_config(config)
273326
if stream is not None:
274327
drv_cfg.hStream = stream.handle
275328
return handle_return(driver.cuOccupancyMaxPotentialClusterSize(self._handle, drv_cfg))
276329

277330
def max_active_clusters(self, config: LaunchConfig, stream: Optional[Stream] = None) -> int:
278-
""" "int: The maximum number of clusters that could co-exist on the target device"""
331+
"""Maximum number of active clusters on the target device.
332+
333+
The maximum number of clusters that could concurrently execute on the target device.
334+
335+
Parameters
336+
----------
337+
config: :obj:`~_launch_config.LaunchConfig`
338+
Kernel launch configuration.
339+
stream: :obj:`~Stream`, optional
340+
The stream on which this kernel is to be launched.
341+
342+
Returns
343+
-------
344+
int
345+
The maximum number of clusters that could co-exist on the target device.
346+
"""
279347
drv_cfg = _to_native_launch_config(config)
280348
if stream is not None:
281349
drv_cfg.hStream = stream.handle
@@ -351,7 +419,7 @@ def arguments_info(self) -> list[ParamInfo]:
351419

352420
@property
353421
def occupancy(self) -> KernelOccupancy:
354-
"""Get the read-only attributes of this kernel."""
422+
"""Get the occupancy information for launching this kernel."""
355423
if self._occupancy is None:
356424
self._occupancy = KernelOccupancy._init(self._handle)
357425
return self._occupancy

0 commit comments

Comments
 (0)