From 499534b72948b375cfc424e36a7dfece58c4d258 Mon Sep 17 00:00:00 2001 From: Eric Schweitz Date: Mon, 6 Oct 2025 10:18:06 -0700 Subject: [PATCH 1/3] [runtime] Fix interface bugs. Fixes kernel_builder to use the correct declaration of altLaunchKernel and pass it the correct arguments. Signed-off-by: Eric Schweitz --- runtime/cudaq/builder/kernel_builder.cpp | 22 ++++--- runtime/cudaq/platform/nvqpp_interface.h | 74 +++++++++++++++++++++++ runtime/cudaq/platform/quantum_platform.h | 2 +- 3 files changed, 89 insertions(+), 9 deletions(-) create mode 100644 runtime/cudaq/platform/nvqpp_interface.h diff --git a/runtime/cudaq/builder/kernel_builder.cpp b/runtime/cudaq/builder/kernel_builder.cpp index ee7202db46f..efd28f10804 100644 --- a/runtime/cudaq/builder/kernel_builder.cpp +++ b/runtime/cudaq/builder/kernel_builder.cpp @@ -17,6 +17,7 @@ #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" +#include "cudaq/platform/nvqpp_interface.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/ExecutionEngine/ExecutionEngine.h" @@ -30,16 +31,10 @@ #include "mlir/Support/LogicalResult.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "mlir/Transforms/Passes.h" - #include using namespace mlir; -extern "C" { -void altLaunchKernel(const char *kernelName, void (*kernelFunc)(void *), - void *kernelArgs, std::uint64_t argsSize); -} - namespace cudaq::details { /// @brief Track unique measurement register names. @@ -1105,9 +1100,20 @@ void invokeCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit, } // Invoke and free the args memory. - auto thunk = reinterpret_cast(*thunkPtr); + auto thunk = reinterpret_cast(*thunkPtr); + + // Extract the result offset, which we named. + auto roName = properName + ".resultOffset"; + auto roPtr = jit->lookup(roName); + if (!roPtr) + throw std::runtime_error( + "cudaq::builder failed to get result offset function"); + + // Invoke and free the args memory. + auto resultOffset = reinterpret_cast(*roPtr); - altLaunchKernel(properName.data(), thunk, rawArgs, size); + [[maybe_unused]] auto uncheckedResult = + altLaunchKernel(properName.data(), thunk, rawArgs, size, resultOffset); std::free(rawArgs); // TODO: any return values are dropped on the floor here. } diff --git a/runtime/cudaq/platform/nvqpp_interface.h b/runtime/cudaq/platform/nvqpp_interface.h new file mode 100644 index 00000000000..84cdeb4f84c --- /dev/null +++ b/runtime/cudaq/platform/nvqpp_interface.h @@ -0,0 +1,74 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2025 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#pragma once + +#include "common/ThunkInterface.h" +#include +#include + +namespace cudaq { + +/// Entry point for the auto-generated kernel execution path. TODO: Needs to be +/// tied to the quantum platform instance somehow. Note that the compiler cannot +/// provide that information. +extern "C" { +// Client-server (legacy) interface. +[[nodiscard]] KernelThunkResultType +altLaunchKernel(const char *kernelName, KernelThunkType kernel, void *args, + std::uint64_t argsSize, std::uint64_t resultOffset); + +// Streamlined interface for launching kernels. Argument synthesis and JIT +// compilation *must* happen on the local machine. +[[nodiscard]] KernelThunkResultType +streamlinedLaunchKernel(const char *kernelName, + const std::vector &rawArgs); + +// Hybrid of the client-server and streamlined approaches. Letting JIT +// compilation happen either early or late and can handle return values from +// each kernel launch. +[[nodiscard]] KernelThunkResultType +hybridLaunchKernel(const char *kernelName, KernelThunkType kernel, void *args, + std::uint64_t argsSize, std::uint64_t resultOffset, + const std::vector &rawArgs); + +//===----------------------------------------------------------------------===// +// Launch module entry points. +// +// In some environments (e.g., Python), the ModuleOp of the source can be +// provided immediately to be launched, unlike with statically compiled systems +// (C++). These entry points allow the managed runtime to provide the ModuleOp +// directly. +//===----------------------------------------------------------------------===// + +// Client-server interface. The caller must provide an mlir::ModuleOp and the +// exact name of the entry point kernel function to be called, which is +// typically the .thunk unmarshalling function. Passing short names is +// considered incorrect. +[[nodiscard]] KernelThunkResultType +altLaunchModule(const char *exactEntryPointName, void *moduleOp, void *args, + std::uint64_t argsSize, std::uint64_t resultOffset); + +// Streamlined interface for launching kernels. Argument synthesis and JIT +// compilation *must* happen on the local machine. The caller must provide an +// mlir::ModuleOp and the exact name of the entry point kernel function to be +// called, +[[nodiscard]] KernelThunkResultType +streamlinedLaunchModule(const char *exactEntryPointName, void *moduleOp, + const std::vector &rawArgs); + +// Hybrid of the client-server and streamlined approaches. Letting JIT +// compilation happen either early or late and can handle return values from +// each kernel launch. The caller must provide an mlir::ModuleOp and the exact +// name of the entry point kernel function to be called, +[[nodiscard]] KernelThunkResultType +hybridLaunchModule(const char *exactEntryPointName, void *moduleOp, void *args, + std::uint64_t argsSize, std::uint64_t resultOffset, + const std::vector &rawArgs); +} // extern "C" +} // namespace cudaq diff --git a/runtime/cudaq/platform/quantum_platform.h b/runtime/cudaq/platform/quantum_platform.h index 3bfa43ac152..c52e7065ec7 100644 --- a/runtime/cudaq/platform/quantum_platform.h +++ b/runtime/cudaq/platform/quantum_platform.h @@ -15,6 +15,7 @@ #include "common/ThunkInterface.h" #include "cudaq/remote_capabilities.h" #include "cudaq/utils/cudaq_utils.h" +#include "nvqpp_interface.h" #include #include #include @@ -22,7 +23,6 @@ #include #include #include -#include namespace cudaq { From ac6ca134177042c69d9f11fdc19da3dbd0ef2654 Mon Sep 17 00:00:00 2001 From: Eric Schweitz Date: Mon, 6 Oct 2025 10:22:47 -0700 Subject: [PATCH 2/3] Remove declarations needed for reworked runtime. (Will be added later.) Signed-off-by: Eric Schweitz --- runtime/cudaq/platform/nvqpp_interface.h | 34 ------------------------ 1 file changed, 34 deletions(-) diff --git a/runtime/cudaq/platform/nvqpp_interface.h b/runtime/cudaq/platform/nvqpp_interface.h index 84cdeb4f84c..d7ed15dda9d 100644 --- a/runtime/cudaq/platform/nvqpp_interface.h +++ b/runtime/cudaq/platform/nvqpp_interface.h @@ -36,39 +36,5 @@ streamlinedLaunchKernel(const char *kernelName, hybridLaunchKernel(const char *kernelName, KernelThunkType kernel, void *args, std::uint64_t argsSize, std::uint64_t resultOffset, const std::vector &rawArgs); - -//===----------------------------------------------------------------------===// -// Launch module entry points. -// -// In some environments (e.g., Python), the ModuleOp of the source can be -// provided immediately to be launched, unlike with statically compiled systems -// (C++). These entry points allow the managed runtime to provide the ModuleOp -// directly. -//===----------------------------------------------------------------------===// - -// Client-server interface. The caller must provide an mlir::ModuleOp and the -// exact name of the entry point kernel function to be called, which is -// typically the .thunk unmarshalling function. Passing short names is -// considered incorrect. -[[nodiscard]] KernelThunkResultType -altLaunchModule(const char *exactEntryPointName, void *moduleOp, void *args, - std::uint64_t argsSize, std::uint64_t resultOffset); - -// Streamlined interface for launching kernels. Argument synthesis and JIT -// compilation *must* happen on the local machine. The caller must provide an -// mlir::ModuleOp and the exact name of the entry point kernel function to be -// called, -[[nodiscard]] KernelThunkResultType -streamlinedLaunchModule(const char *exactEntryPointName, void *moduleOp, - const std::vector &rawArgs); - -// Hybrid of the client-server and streamlined approaches. Letting JIT -// compilation happen either early or late and can handle return values from -// each kernel launch. The caller must provide an mlir::ModuleOp and the exact -// name of the entry point kernel function to be called, -[[nodiscard]] KernelThunkResultType -hybridLaunchModule(const char *exactEntryPointName, void *moduleOp, void *args, - std::uint64_t argsSize, std::uint64_t resultOffset, - const std::vector &rawArgs); } // extern "C" } // namespace cudaq From 24f0561e00d9858b530967994da314185d81be23 Mon Sep 17 00:00:00 2001 From: Eric Schweitz Date: Mon, 6 Oct 2025 10:31:04 -0700 Subject: [PATCH 3/3] Fix spelling. Signed-off-by: Eric Schweitz --- runtime/cudaq/builder/kernel_builder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/cudaq/builder/kernel_builder.cpp b/runtime/cudaq/builder/kernel_builder.cpp index efd28f10804..4002e57397f 100644 --- a/runtime/cudaq/builder/kernel_builder.cpp +++ b/runtime/cudaq/builder/kernel_builder.cpp @@ -1103,7 +1103,7 @@ void invokeCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit, auto thunk = reinterpret_cast(*thunkPtr); // Extract the result offset, which we named. - auto roName = properName + ".resultOffset"; + auto roName = properName + ".returnOffset"; auto roPtr = jit->lookup(roName); if (!roPtr) throw std::runtime_error(