Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .github/workflows/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,33 @@ jobs:
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda

test-cuda-shims:
name: test-cuda-shims
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
with:
timeout: 90
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: 12.6
use-custom-docker-registry: false
submodules: recursive
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
set -eux
# Install requirements
bash ./install_requirements.sh

# Build ExecuTorch with CUDA support
cmake --workflow --preset llm-release-cuda

# Build and run CUDA shim tests
pushd backends/cuda/runtime/shims/tests
cmake --workflow --preset default
popd

export-model-cuda-artifact:
name: export-model-cuda-artifact
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
Expand Down
35 changes: 32 additions & 3 deletions backends/cuda/runtime/shims/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ AOTITorchError aoti_torch_create_tensor_from_blob_v2(
int32_t layout,
const uint8_t* opaque_metadata,
int64_t opaque_metadata_size) {
// TODO(gasoonjia): verify given data is on the target device
(void)device_type;
(void)opaque_metadata;
(void)layout;
(void)opaque_metadata_size;
Expand Down Expand Up @@ -154,6 +152,34 @@ AOTITorchError aoti_torch_create_tensor_from_blob_v2(
// Storage offset must be 0 since from_blob cannot handle different offsets
ET_CHECK_OK_OR_RETURN_ERROR(validate_storage_offset(storage_offset));

// Verify that data pointer location matches the requested device_type
cudaPointerAttributes data_attributes{};
ET_CUDA_CHECK_OR_RETURN_ERROR(
cudaPointerGetAttributes(&data_attributes, data));

bool data_is_on_device = data_attributes.type == cudaMemoryTypeDevice;
bool data_is_on_host = data_attributes.type == cudaMemoryTypeHost ||
data_attributes.type == cudaMemoryTypeUnregistered;
bool requested_device =
device_type == static_cast<int32_t>(SupportedDevices::CUDA);
bool requested_cpu =
device_type == static_cast<int32_t>(SupportedDevices::CPU);

// Error if data location doesn't match requested device type
ET_CHECK_OR_RETURN_ERROR(
!(data_is_on_device && requested_cpu),
InvalidArgument,
"aoti_torch_create_tensor_from_blob_v2 failed: data pointer %p is on CUDA "
"but device_type is CPU. Data must be on CPU for CPU tensors.",
data);

ET_CHECK_OR_RETURN_ERROR(
!(data_is_on_host && requested_device),
InvalidArgument,
"aoti_torch_create_tensor_from_blob_v2 failed: data pointer %p is on CPU "
"but device_type is CUDA. Data must be on GPU for CUDA tensors.",
data);

// Convert sizes to the format expected by ExecutorTorch using SizesType
std::vector<executorch::aten::SizesType> sizes =
convert_sizes_to_vector(ndim, sizes_ptr);
Expand Down Expand Up @@ -305,7 +331,10 @@ void clear_all_tensors() {
// tensors set should now be empty, but ensure it's cleared
tensors.clear();

ET_LOG(Info, "Cleared all tensors");
// Clear memory tracking map (includes leftover NOT_OWN entries)
memory_to_n_tensor.clear();

ET_LOG(Info, "Cleared all tensors and memory tracking");
}

AOTITorchError aoti_torch_delete_tensor_object(Tensor* tensor) {
Expand Down
3 changes: 3 additions & 0 deletions backends/cuda/runtime/shims/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ AOTITorchError aoti_torch_new_tensor_handle(

// Function to clear all tensors from internal storage
AOTI_SHIM_EXPORT void clear_all_tensors();

// Function to clear memory tracking map (for test cleanup)
AOTI_SHIM_EXPORT void clear_memory_tracking();
} // extern "C"

} // namespace executorch::backends::cuda
69 changes: 69 additions & 0 deletions backends/cuda/runtime/shims/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.19)
project(aoti_cuda_shim_tests LANGUAGES CXX CUDA)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Find required packages
find_package(CUDAToolkit REQUIRED)

# Fetch GoogleTest
include(FetchContent)
FetchContent_Declare(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG v1.14.0
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt
ON
CACHE BOOL "" FORCE
)
FetchContent_MakeAvailable(googletest)

# Get EXECUTORCH_ROOT
if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..)
endif()

# Find installed ExecuTorch
find_package(executorch CONFIG REQUIRED HINTS ${CMAKE_INSTALL_PREFIX})

# List of test files
set(CUDA_SHIM_TESTS
test_aoti_torch_create_tensor_from_blob_v2 test_aoti_torch_empty_strided
test_aoti_torch_delete_tensor_object test_aoti_torch__reinterpret_tensor
test_aoti_torch_copy_ test_aoti_torch_new_tensor_handle
)

enable_testing()

foreach(test_name ${CUDA_SHIM_TESTS})
add_executable(${test_name} ${test_name}.cpp)

target_include_directories(
${test_name} PRIVATE ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}
${CUDAToolkit_INCLUDE_DIRS}
)

target_link_libraries(
${test_name}
PRIVATE GTest::gtest
GTest::gtest_main
aoti_cuda_shims
aoti_cuda_backend
cuda_tensor_maker
cuda_platform
executorch_core
extension_tensor
CUDA::cudart
)

add_test(NAME ${test_name} COMMAND ${test_name})
endforeach()
95 changes: 95 additions & 0 deletions backends/cuda/runtime/shims/tests/CMakePresets.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
{
"version": 6,
"configurePresets": [
{
"name": "default",
"displayName": "CUDA Shim Tests",
"binaryDir": "${sourceDir}/../../../../../cmake-out/backends/cuda/runtime/shims/tests",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"CMAKE_PREFIX_PATH": "${sourceDir}/../../../../../cmake-out"
},
"condition": {
"type": "inList",
"string": "${hostSystemName}",
"list": ["Linux", "Windows"]
}
},
{
"name": "debug",
"displayName": "CUDA Shim Tests (Debug)",
"inherits": ["default"],
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug"
}
}
],
"buildPresets": [
{
"name": "default",
"displayName": "Build CUDA Shim Tests",
"configurePreset": "default"
},
{
"name": "debug",
"displayName": "Build CUDA Shim Tests (Debug)",
"configurePreset": "debug"
}
],
"workflowPresets": [
{
"name": "default",
"displayName": "Configure, build, and test CUDA Shim Tests",
"steps": [
{
"type": "configure",
"name": "default"
},
{
"type": "build",
"name": "default"
},
{
"type": "test",
"name": "default"
}
]
},
{
"name": "debug",
"displayName": "Configure, build, and test CUDA Shim Tests (Debug)",
"steps": [
{
"type": "configure",
"name": "debug"
},
{
"type": "build",
"name": "debug"
},
{
"type": "test",
"name": "debug"
}
]
}
],
"testPresets": [
{
"name": "default",
"displayName": "Run all CUDA Shim Tests",
"configurePreset": "default",
"output": {
"outputOnFailure": true
}
},
{
"name": "debug",
"displayName": "Run all CUDA Shim Tests (Debug)",
"configurePreset": "debug",
"output": {
"outputOnFailure": true
}
}
]
}
94 changes: 94 additions & 0 deletions backends/cuda/runtime/shims/tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# CUDA AOTI Shim Tests

Unit tests for the CUDA AOTI (Ahead-Of-Time Inductor) shim functions used by the ExecuTorch CUDA backend.

## Prerequisites

1. **CUDA Toolkit**: Ensure CUDA is installed and available
2. **ExecuTorch with CUDA**: Build and install ExecuTorch with CUDA support first

## Building ExecuTorch with CUDA

From the ExecuTorch root directory:

```bash
# Release build
cmake --workflow --preset llm-release-cuda

# Or debug build (recommended for debugging test failures)
cmake --workflow --preset llm-debug-cuda
```

## Building and Run the Tests

### Option 1: Using CMake Presets (Recommended)

From this directory (`backends/cuda/runtime/shims/tests/`):

```bash
# Release build
cmake --workflow --preset default

# Debug build
cmake --workflow --preset debug
```

### Option 2: Manual CMake Commands

From the ExecuTorch root directory:

```bash
# Configure
cmake -B cmake-out/backends/cuda/runtime/shims/tests \
-S backends/cuda/runtime/shims/tests \
-DCMAKE_PREFIX_PATH=$(pwd)/cmake-out \
-DCMAKE_BUILD_TYPE=Debug

# Build
cmake --build cmake-out/backends/cuda/runtime/shims/tests -j$(nproc)
```

### Run Specific Test Cases

Use Google Test filters to run specific test cases:

```bash
# From the build directory
cd cmake-out/backends/cuda/runtime/shims/tests
# Run only device mismatch tests
./test_aoti_torch_create_tensor_from_blob_v2 --gtest_filter="*DeviceMismatch*"

# Run a single test
./test_aoti_torch_create_tensor_from_blob_v2 --gtest_filter="AOTITorchCreateTensorFromBlobV2Test.BasicFunctionalityCUDA"

# List all available tests
./test_aoti_torch_create_tensor_from_blob_v2 --gtest_list_tests
```

## Troubleshooting

### CUDA Not Available

If tests are skipped with "CUDA not available", ensure:
- CUDA drivers are installed
- A CUDA-capable GPU is present
- `nvidia-smi` shows the GPU

### Link Errors

If you get link errors, ensure ExecuTorch was built with CUDA support:
```bash
cmake --workflow --preset llm-release-cuda
```

### Test Failures

For debugging test failures, build with debug mode:
```bash
cmake --workflow --preset debug
```

Then run with verbose output:
```bash
./test_aoti_torch_create_tensor_from_blob_v2 --gtest_break_on_failure
```
Loading
Loading