A simple and extensible template for CUDA projects.
- Easy CMake integration — build separate CPU and GPU static libraries.
- Smart GPU memory management via
memory_cuda.cuh:cuda::unique_ptr<T>
A GPU-aware smart pointer that owns device memory allocated withcudaMalloc().
Automatically releases it usingcudaFree()when it goes out of scope.cuda::shared_ptr<T>
A reference-counted smart pointer that provides shared ownership of GPU-allocated memory. Multiple instances can safely share the same device pointer.
This implementation is thread-safe, thanks to atomic reference counting.- Safe-by-default — dereferencing on the host side is explicitly disabled (
operator*,->, and[]are deleted).
#include "memory_cuda.cuh"
__global__ void kernel(float* data) {
int idx = threadIdx.x;
data[idx] *= 2.0f;
}
int main() {
// Allocate GPU memory and wrap it in a smart pointer
auto d_ptr = cuda::make_unique<float>(10);
// Launch a kernel that uses the allocated device memory
kernel<<<1, 10>>>(d_ptr.get());
cudaDeviceSynchronize();
return 0;
}This project is licensed under GPL-3.0 license. See the LICENSE file for the full license text.