From 232f3cf33eaf50f0683a5c73d8cd907716d525bc Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Mon, 2 Dec 2024 15:03:28 +0000 Subject: [PATCH 1/8] Add preliminary gpu_timeline layer --- .../source/layer_device_functions.hpp | 2 + layer_gpu_timeline/CMakeLists.txt | 4 +- layer_gpu_timeline/README_LAYER.md | 164 +++++++ .../docs/command_buffer_model.md | 155 +++++++ layer_gpu_timeline/source/CMakeLists.txt | 13 +- layer_gpu_timeline/source/device.cpp | 14 + layer_gpu_timeline/source/device.hpp | 39 +- .../source/layer_device_functions.hpp | 388 ++++++++++++++++ .../layer_device_functions_command_buffer.cpp | 161 +++++++ .../layer_device_functions_command_pool.cpp | 105 +++++ .../source/layer_device_functions_debug.cpp | 123 +++++ .../layer_device_functions_dispatch.cpp | 131 ++++++ .../layer_device_functions_draw_call.cpp | 251 ++++++++++ .../source/layer_device_functions_queue.cpp | 129 ++++++ .../layer_device_functions_render_pass.cpp | 433 ++++++++++++++++++ .../layer_device_functions_trace_rays.cpp | 109 +++++ layer_gpu_timeline/source/timeline_comms.cpp | 51 +++ layer_gpu_timeline/source/timeline_comms.hpp | 42 ++ lgl_host_server.py | 18 +- lglpy/service_gpu_timeline.py | 42 ++ source_common/CMakeLists.txt | 1 + source_common/comms/comms_module.cpp | 7 + source_common/trackers/CMakeLists.txt | 46 ++ source_common/trackers/command_buffer.cpp | 185 ++++++++ source_common/trackers/command_buffer.hpp | 218 +++++++++ source_common/trackers/device.cpp | 175 +++++++ source_common/trackers/device.hpp | 208 +++++++++ .../trackers/layer_command_stream.cpp | 161 +++++++ .../trackers/layer_command_stream.hpp | 196 ++++++++ source_common/trackers/queue.cpp | 67 +++ source_common/trackers/queue.hpp | 99 ++++ source_common/trackers/render_pass.cpp | 386 ++++++++++++++++ source_common/trackers/render_pass.hpp | 194 ++++++++ source_common/trackers/stats.hpp | 227 +++++++++ source_common/utils/misc.hpp | 61 ++- 35 files changed, 4588 insertions(+), 17 deletions(-) create mode 100644 layer_gpu_timeline/README_LAYER.md create mode 100644 layer_gpu_timeline/docs/command_buffer_model.md create mode 100644 layer_gpu_timeline/source/layer_device_functions.hpp create mode 100644 layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp create mode 100644 layer_gpu_timeline/source/layer_device_functions_command_pool.cpp create mode 100644 layer_gpu_timeline/source/layer_device_functions_debug.cpp create mode 100644 layer_gpu_timeline/source/layer_device_functions_dispatch.cpp create mode 100644 layer_gpu_timeline/source/layer_device_functions_draw_call.cpp create mode 100644 layer_gpu_timeline/source/layer_device_functions_queue.cpp create mode 100644 layer_gpu_timeline/source/layer_device_functions_render_pass.cpp create mode 100644 layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp create mode 100644 layer_gpu_timeline/source/timeline_comms.cpp create mode 100644 layer_gpu_timeline/source/timeline_comms.hpp create mode 100644 lglpy/service_gpu_timeline.py create mode 100644 source_common/trackers/CMakeLists.txt create mode 100644 source_common/trackers/command_buffer.cpp create mode 100644 source_common/trackers/command_buffer.hpp create mode 100644 source_common/trackers/device.cpp create mode 100644 source_common/trackers/device.hpp create mode 100644 source_common/trackers/layer_command_stream.cpp create mode 100644 source_common/trackers/layer_command_stream.hpp create mode 100644 source_common/trackers/queue.cpp create mode 100644 source_common/trackers/queue.hpp create mode 100644 source_common/trackers/render_pass.cpp create mode 100644 source_common/trackers/render_pass.hpp create mode 100644 source_common/trackers/stats.hpp diff --git a/layer_example/source/layer_device_functions.hpp b/layer_example/source/layer_device_functions.hpp index df321c2..f3403c7 100644 --- a/layer_example/source/layer_device_functions.hpp +++ b/layer_example/source/layer_device_functions.hpp @@ -23,6 +23,8 @@ * ---------------------------------------------------------------------------- */ +#include + #include "framework/utils.hpp" /* See Vulkan API for documentation. */ diff --git a/layer_gpu_timeline/CMakeLists.txt b/layer_gpu_timeline/CMakeLists.txt index b36ba8c..f3d8cd2 100644 --- a/layer_gpu_timeline/CMakeLists.txt +++ b/layer_gpu_timeline/CMakeLists.txt @@ -35,5 +35,7 @@ set(LGL_CONFIG_LOG 1) include(../source_common/compiler_helper.cmake) # Build steps -add_subdirectory(source) +add_subdirectory(../source_common/comms source_common/comms) add_subdirectory(../source_common/framework source_common/framework) +add_subdirectory(../source_common/trackers source_common/trackers) +add_subdirectory(source) diff --git a/layer_gpu_timeline/README_LAYER.md b/layer_gpu_timeline/README_LAYER.md new file mode 100644 index 0000000..8f8a56c --- /dev/null +++ b/layer_gpu_timeline/README_LAYER.md @@ -0,0 +1,164 @@ +# Layer: GPU Timeline + +This layer is used with Arm GPUs for tracking submitted schedulable workloads +and emitting semantic information about them. This data can be combined with +the raw workload execution timing information captured using the Android +Perfetto service, providing developers with a richer debug visualization. + +## What devices? + +The Arm GPU driver integration with the Perfetto render stages scheduler event +trace is supported at production quality since the r47p0 driver version. +However, associating semantics from this layer relies on a further integration +with debug labels which requires an r51p0 or later driver version. + +## What workloads? + +A schedulable workload is the smallest workload that the Arm GPU command stream +scheduler will issue to the GPU hardware work queues. This includes the +following workload types: + +* Render passes, split into: + * Vertex or Binning phase + * Fragment or Main phase +* Compute dispatches +* Trace rays +* Transfers to a buffer +* Transfers to an image + +Most workloads are dispatched using a single API call, and are trivial to +manage in the layer. However, render passes are more complex and need extra +handling. In particular: + +* Render passes are issued using multiple API calls. +* Useful render pass properties, such as draw count, are not known until the + render pass recording has ended. +* Dynamic render passes using `vkCmdBeginRendering()` and `vkCmdEndRendering()` + can be suspended and resumed across command buffer boundaries. Properties + such as draw count are not defined by the scope of a single command buffer. + +## Tracking workloads + +This layer tracks workloads encoded in command buffers, and emits semantic +metadata for each workload via a communications side-channel. A host tool +combines the semantic data stream with the Perfetto data stream, using debug +label tags injected by the layer as a common cross-reference to link across +the streams. + +### Workload labelling + +Command stream labelling is implemented using `vkCmdDebugMarkerBeginEXT()` +and `vkCmdDebugMarkerEndEXT()`, wrapping one layer-owned `tagID` label around +each semantic workload. This `tagID` can unambiguously refer to this workload +encoding, and metadata that we do not expect to change per submit will be +emitted using the matching `tagID` as the sole identifier. + +_**TODO:** Dynamic `submitID` tracking is not yet implemented._ + +The `tagID` label is encoded into the recorded command buffer which means, for +reusable command buffers, it is not an unambiguous identifier of a specific +running workload. To allow us to disambiguate specific workload instances, the +layer can optionally add an outer wrapper of `submitID` labels around each +submitted command buffer. This wrapper is only generated if the submit contains +any command buffers that require the generation of a per-submit annex (see the +following section for when this is needed). + +The `submitID.tagID` pair of IDs uniquely identifies a specific running +workload, and can be used to attach an instance-specific metadata annex to a +specific submitted workload rather than to the shared recorded command buffer. + +### Workload metadata for split render passes + +_**TODO:** Split render pass tracking is not yet implemented._ + +Dynamic render passes can be split across multiple Begin/End pairs, including +being split across command buffer boundaries. If these splits occur within a +single primary command buffer, or its secondaries, it is handled transparently +by the layer and it appears as a single message as if no splits occurred. If +these splits occur across primary command buffer boundaries, then some +additional work is required. + +In our design a `tagID` debug marker is only started when the render pass first +starts (not on resume), and stopped at the end of the render pass (not on +suspend). The same `tagID` is used to refer to all parts of the render pass, +no matter how many times it was suspended and resumed. + +If a render pass splits across command buffers, we cannot precompute metrics +based on `tagID` alone, even if the command buffers are one-time use. This is +because we do not know what combination of submitted command buffers will be +used, and so we cannot know what the render pass contains until submit time. +Split render passes will emit a `submitID.tagID` metadata annex containing +the parameters that can only be known at submit time. + +### Workload metadata for compute dispatches + +_**TODO:** Compute workgroup parsing from the SPIR-V is not yet implemented._ + +Compute workload dispatch is simple to track, but one of the metadata items we +want to export is the total size of the work space (work_group_count * +work_group_size). + +The work group count is defined by the API call, but may be an indirect +parameter (see indirect tracking above). + +The work group size is defined by the program pipeline, and is defined in the +SPIR-V via a literal or a build-time specialization constant. To support this +use case we will need to parse the SPIR-V when the pipeline is built, if +SPIR-V is available. + +### Workload metadata for indirect calls + +_**TODO:** Indirect parameter tracking is not yet implemented._ + +One of the valuable pieces of metadata that we want to present is the size of +each workload. For render passes this is captured at API call time, but for +other workloads the size can be an indirect parameter that is not known when +the triggering API call is made. + +To capture indirect parameters we insert a transfer that copies the indirect +parameters into a layer-owned buffer. To ensure exclusive use of the buffer and +avoid data corruption, each buffer region used is unique to a specific `tagID`. +Attempting to submit the same command buffer multiple times will result in +the workload being serialized to avoid racy access to the buffer. Once the +buffer has been retrieved by the layer, a metadata annex containing the +indirect parameters will be emitted using the `submitID.tagID` pair. This may +be some time later than the original submit. + +### Workload metadata for user-defined labels + +The workload metadata captures user-defined labels that the application +provides using `vkCmdDebugMarkerBeginEXT()` and `vkCmdDebugMarkerEndEXT()`. +These are a stack-based debug mechanism where `Begin` pushes a new entry on to +to the stack, and `End` pops the the most recent level off the stack. + +Workloads are labelled with the stack values that existed when the workload +was started. For render passes this is the value on the stack when, e.g., +`vkCmdBeginRenderPass()` was called. We do not capture any labels that exist +inside the render pass. + +The debug label stack belongs to the queue, not to the command buffer, so the +value of the label stack is not known until submit time. The debug information +for a specific `submitID.tagID` pair is therefore provided as an annex at +submit time once the stack can be resolved. + +## Message protocol + +For each workload in a command buffer, or part-workload in the case of a +suspended render pass, we record a JSON metadata blob containing the payload +we want to send. + +The low level protocol message contains: + +* Message type `uint8_t` +* Sequence ID `uint64_t` (optional, implied by message type) +* Tag ID `uint64_t` +* JSON length `uint32_t` +* JSON payload `uint8_t[]` + +Each workload will read whatever properties it can from the `tagID` metadata +and will then merge in all fields from any subsequent `sequenceID.tagID` +metadata that matches. + +- - - + +_Copyright © 2024, Arm Limited and contributors._ diff --git a/layer_gpu_timeline/docs/command_buffer_model.md b/layer_gpu_timeline/docs/command_buffer_model.md new file mode 100644 index 0000000..e7422d7 --- /dev/null +++ b/layer_gpu_timeline/docs/command_buffer_model.md @@ -0,0 +1,155 @@ +# Layer: GPU Timeline - Command Buffer Modelling + +One of the main challenges of this layer driver is modelling behavior in queues +and command buffers that is not known until submit time, and then taking +appropriate actions based on the combination of both the head state of the +queue and the content of the pre-recorded command buffers. + +Our design to solve this is a lightweight software command stream which is +recorded when a command buffer is recorded, and then executed when the +command buffer is submitted to the queue. Just like a real hardware command +stream these commands can update state or trigger some other action we need +performed. + +## Layer commands + +**MARKER_BEGIN(const std::string\*):** + +* Push a new marker into the queue debug label stack. + +**MARKER_END():** + +* Pop the latest marker from the queue debug label stack. + +**RENDERPASS_BEGIN(const json\*):** + +* Set the current workload to a new render pass with the passed metadata. + +**RENDERPASS_RESUME(const json\*):** + +* Update the current workload, which must be a render pass, with extra + draw count metadata. + +**COMPUTE_DISPATCH_BEGIN(const json\*):** + +* Set the current workload to a new compute dispatch with the passed metadata. + +**TRACE_RAYS_BEGIN(const json\*):** + +* Set the current workload to a new trace rays with the passed metadata. + +**BUFFER_TRANSFER_BEGIN(const json\*):** + +* Set the current workload to a new a buffer transfer. + +**IMAGE_TRANSFER(const json\*):** + +* Set the current workload to a new image transfer. + +**WORKLOAD_END():** + +* Mark the current workload as complete, and emit a built metadata entry for + it. + +## Layer command recording + +Command buffer recording is effectively building two separate state +structures for the layer. + +The first is a per-workload or per-restart JSON structure that contains the +metadata we need for that workload. For partial workloads - e.g. a dynamic +render pass begin that has been suspended - this metadata will be partial and +rely on later restart metadata to complete it. + +The second is the layer "command stream" that contains the bytecode commands +to execute when the command buffer is submitted to the queue. These commands +are very simple, consisting of a list of command+pointer pairs, where the +pointer value may be unused by some commands. Commands are stored in a +std::vector, but we reserve enough memory to store 256 commands without +reallocating which is enough for the majority of command buffers we see in +real applications. + +The command stream for a secondary command buffer is inlined into the primary +command buffer during recording. + +### Recording sequence + +When application records a new workload: + + * A `tagID` is assigned and recorded using `vkCmdMarkerBegin()` label in the + Vulkan command stream _before_ the new workload is written to the command + stream. + * If workload is using indirect parameters, then a transfer job to copy + indirect parameters into a layer-owned buffer is emitted _before_ the new + workload. No additional barrier is needed because application barriers must + have already ensured that the indirect parameter buffer is valid. + * A proxy workload object is created in the layer storing the assigned + `tagID` and all settings that are known at command recording time. + * A layer command stream command is recorded into the submit time stream + indicating `_BEGIN` with a pointer to the proxy workload. Note that + this JSON may be modified later for some workloads. + * If workload is using indirect parameters, a layer command stream command is + recorded into the resolve time stream, which will handle cleanup and + emitting the `submitID.tagID` annex message for the indirect data. + * If the command buffer is not ONE_TIME_SUBMIT, if any workload is using + indirect parameters, or contains incomplete render passes, the command + buffer is marked as needing a `submitID` wrapper. + * The user command is written to the Vulkan command stream. + +When application resumes a render pass workload: + + * A `tagID` of zero is assigned, but not emitted to the command stream. + * A layer command stream command is recorded into the submit time stream + indicating `_RESUME` with a pointer to the proxy workload. Note that + this JSON may be modified later for some workloads. + * The user command is written to the Vulkan command stream. + +When application ends a workload: + + * For render pass workloads, any statistics accumulated since the last begin + are rolled up into the proxy workload object. + * For render pass workloads, the user command is written to the Vulkan + command stream. + * The command steam label scope is closed using `vkCmdMarkerEnd()`. + +## Layer command playback + +The persistent state for command playback belongs to the queues the command +buffers are submitted to. The command stream bytecode is run by a bytecode +interpreter associated with the state of the current queue, giving the +interpreter access to the current `submitID` and queue debug label stack. + +### Submitting sequence + +For each command buffer in the user submit: + +* If the command buffer needs a `submitID` we allocate a unique `submitID` and + create two new command buffers that will wrap the user command buffer with an + additional stack layer of debug label containing the `s` string. We will + inject a layer command stream async command to handle freeing the command + buffers. +* The tool will process the submit-time layer commands, executing each command + to either update some state or emit +* If there are any async layer commands, either recorded in the command buffer + or from the wrapping command buffers, we will need to add an async handler. + This cannot safely use the user fence or depend on any user object lifetime, + so we will add a layer-owned timeline semaphore to the submit which we can + wait on to determine when it is safe trigger the async work. + +## Future: Async commands + +One of our longer-term goals is to be able to capture indirect parameters, +which will be available after-the-fact once the GPU has processed the command +buffer. Once we have the data we can emit an annex message containing +parameters for each indirect `submitID.tagID` pair in the command buffer. + +We need to be able to emit the metadata after the commands are complete, +and correctly synchronize use of the indirect capture staging buffer +if command buffers are reissued. My current thinking is that we would +implement this using additional layer commands that are processed on submit, +including support for async commands that run in a separate thread and +wait on the command buffer completion fence before running. + +- - - + +_Copyright © 2024, Arm Limited and contributors._ diff --git a/layer_gpu_timeline/source/CMakeLists.txt b/layer_gpu_timeline/source/CMakeLists.txt index d267712..45e55c3 100644 --- a/layer_gpu_timeline/source/CMakeLists.txt +++ b/layer_gpu_timeline/source/CMakeLists.txt @@ -43,7 +43,16 @@ add_library( ${VK_LAYER} SHARED ${PROJECT_SOURCE_DIR}/../source_common/framework/entry.cpp device.cpp - instance.cpp) + instance.cpp + layer_device_functions_command_buffer.cpp + layer_device_functions_command_pool.cpp + layer_device_functions_debug.cpp + layer_device_functions_dispatch.cpp + layer_device_functions_draw_call.cpp + layer_device_functions_queue.cpp + layer_device_functions_render_pass.cpp + layer_device_functions_trace_rays.cpp + timeline_comms.cpp) target_include_directories( ${VK_LAYER} PRIVATE @@ -59,7 +68,9 @@ lgl_set_build_options(${VK_LAYER}) target_link_libraries( ${VK_LAYER} + lib_layer_comms lib_layer_framework + lib_layer_trackers $<$:log>) if (CMAKE_BUILD_TYPE STREQUAL "Release") diff --git a/layer_gpu_timeline/source/device.cpp b/layer_gpu_timeline/source/device.cpp index 580e339..271aab3 100644 --- a/layer_gpu_timeline/source/device.cpp +++ b/layer_gpu_timeline/source/device.cpp @@ -29,6 +29,7 @@ #include #include +#include "comms/comms_module.hpp" #include "framework/utils.hpp" #include "device.hpp" @@ -36,6 +37,12 @@ static std::unordered_map> g_devices; +/* See header for documentation. */ +std::unique_ptr Device::commsModule; + +/* See header for documentation. */ +std::unique_ptr Device::commsWrapper; + /* See header for documentation. */ void Device::store( VkDevice handle, @@ -90,6 +97,13 @@ Device::Device( device(_device) { initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver); + + // Init the shared comms module for the first device built + if (!commsModule) + { + commsModule = std::make_unique("lglcomms"); + commsWrapper = std::make_unique(*commsModule); + } } /* See header for documentation. */ diff --git a/layer_gpu_timeline/source/device.hpp b/layer_gpu_timeline/source/device.hpp index d6ecad7..b04ace1 100644 --- a/layer_gpu_timeline/source/device.hpp +++ b/layer_gpu_timeline/source/device.hpp @@ -56,9 +56,12 @@ #include +#include "comms/comms_module.hpp" #include "framework/device_dispatch_table.hpp" +#include "trackers/device.hpp" #include "instance.hpp" +#include "timeline_comms.hpp" /** * @brief This class implements the layer state tracker for a single device. @@ -127,7 +130,29 @@ class Device */ ~Device(); + /** + * @brief Callback for sending messages + */ + void onWorkloadSubmit(const std::string& message) + { + commsWrapper->txMessage(message); + } + + /** + * @brief Get the cumulative stats for this device. + */ + Tracker::Device& getStateTracker() + { + return stateTracker; + } + public: + /** + * @brief The driver function dispatch table. + */ + DeviceDispatchTable driver {}; + +private: /** * @brief The instance this device is created with. */ @@ -144,7 +169,17 @@ class Device const VkDevice device; /** - * @brief The driver function dispatch table. + * @brief State tracking for this device; */ - DeviceDispatchTable driver {}; + Tracker::Device stateTracker; + + /** + * @brief Communications module. + */ + static std::unique_ptr commsModule; + + /** + * @brief Communications module message encoder. + */ + static std::unique_ptr commsWrapper; }; diff --git a/layer_gpu_timeline/source/layer_device_functions.hpp b/layer_gpu_timeline/source/layer_device_functions.hpp new file mode 100644 index 0000000..129f1e6 --- /dev/null +++ b/layer_gpu_timeline/source/layer_device_functions.hpp @@ -0,0 +1,388 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "framework/utils.hpp" + +// Functions for command pools + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool( + VkDevice device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCommandPool); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool( + VkDevice device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator); + +// Functions for command buffers + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkAllocateCommandBuffers( + VkDevice device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult layer_vkBeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers); + +// Functions for render passes + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass( + VkDevice device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass( + VkDevice device, + VkRenderPass renderPass, + const VkAllocationCallbacks* pAllocator); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering( + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR( + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass( + VkCommandBuffer commandBuffer); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRendering( + VkCommandBuffer commandBuffer); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderingKHR( + VkCommandBuffer commandBuffer); + +// Functions for draw calls + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDraw( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexed( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectByteCountEXT( + VkCommandBuffer commandBuffer, + uint32_t instanceCount, + uint32_t firstInstance, + VkBuffer counterBuffer, + VkDeviceSize counterBufferOffset, + uint32_t counterOffset, + uint32_t vertexStride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +// Functions for compute dispatches + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch( + VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset); + +// Commands for trace rays + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR( + VkCommandBuffer commandBuffer, + VkDeviceAddress indirectDeviceAddress); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + VkDeviceAddress indirectDeviceAddress); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + uint32_t width, + uint32_t height, + uint32_t depth); + +// Functions for debug + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT( + VkCommandBuffer commandBuffer, + const VkDebugMarkerMarkerInfoEXT* pMarkerInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT( + VkCommandBuffer commandBuffer); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT( + VkCommandBuffer commandBuffer, + const VkDebugUtilsLabelEXT* pLabelInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT( + VkCommandBuffer commandBuffer); + +// Functions for queues + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR( + VkQueue queue, + const VkPresentInfoKHR* pPresentInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence fence); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence); diff --git a/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp b/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp new file mode 100644 index 0000000..75fb36b --- /dev/null +++ b/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkAllocateCommandBuffers( + VkDevice device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult result = layer->driver.vkAllocateCommandBuffers( + device, pAllocateInfo, pCommandBuffers); + if (result != VK_SUCCESS) + { + return result; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) + { + tracker.allocateCommandBuffer( + pAllocateInfo->commandPool, pCommandBuffers[i]); + } + + return result; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult layer_vkBeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo +) { + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer); + cmdBuffer.reset(); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkBeginCommandBuffer(commandBuffer, pBeginInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer); + cmdBuffer.reset(); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkResetCommandBuffer(commandBuffer, flags); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + for (uint32_t i = 0; i < commandBufferCount; i++) + { + tracker.freeCommandBuffer(commandPool, pCommandBuffers[i]); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkFreeCommandBuffers( + device, commandPool, commandBufferCount, pCommandBuffers); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store and device-wide data + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& primary = tracker.getCommandBuffer(commandBuffer); + + for (uint32_t i = 0; i < commandBufferCount; i++) + { + auto& secondary = tracker.getCommandBuffer(pCommandBuffers[i]); + primary.executeCommands(secondary); + } + + // Release the lock to call into the main driver + lock.unlock(); + layer->driver.vkCmdExecuteCommands( + commandBuffer, commandBufferCount, pCommandBuffers); +} diff --git a/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp b/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp new file mode 100644 index 0000000..31bc1b4 --- /dev/null +++ b/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool( + VkDevice device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCommandPool +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult result = layer->driver.vkCreateCommandPool( + device, pCreateInfo, pAllocator, pCommandPool); + if (result != VK_SUCCESS) + { + return result; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createCommandPool(*pCommandPool); + return result; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + tracker.getCommandPool(commandPool).reset(); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkResetCommandPool(device, commandPool, flags); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool( + VkDevice device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + tracker.destroyCommandPool(commandPool); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkDestroyCommandPool(device, commandPool, pAllocator); +} diff --git a/layer_gpu_timeline/source/layer_device_functions_debug.cpp b/layer_gpu_timeline/source/layer_device_functions_debug.cpp new file mode 100644 index 0000000..4c1e1d9 --- /dev/null +++ b/layer_gpu_timeline/source/layer_device_functions_debug.cpp @@ -0,0 +1,123 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT( + VkCommandBuffer commandBuffer, + const VkDebugMarkerMarkerInfoEXT* pMarkerInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Increment the render pass counter in the tracker + cb.debugMarkerBegin(pMarkerInfo->pMarkerName); + + // Note that we do not call the driver for user labels - they are + // emitted via the comms side-channel for each workload to avoid + // polluting the layer's use of the driver for tag labelling +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Increment the render pass counter in the tracker + cb.debugMarkerEnd(); + + // Note that we do not call the driver for user labels - they are + // emitted via the comms side-channel for each workload to avoid + // polluting the layer's use of the driver for tag labelling +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT( + VkCommandBuffer commandBuffer, + const VkDebugUtilsLabelEXT* pLabelInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Increment the render pass counter in the tracker + cb.debugMarkerBegin(pLabelInfo->pLabelName); + + // Note that we do not call the driver for user labels - they are + // emitted via the comms side-channel for each workload to avoid + // polluting the layer's use of the driver for tag labelling +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Increment the render pass counter in the tracker + cb.debugMarkerEnd(); + + // Note that we do not call the driver for user labels - they are + // emitted via the comms side-channel for each workload to avoid + // polluting the layer's use of the driver for tag labelling +} diff --git a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp new file mode 100644 index 0000000..88bf568 --- /dev/null +++ b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp @@ -0,0 +1,131 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +static void registerDispatch( + Device* layer, + VkCommandBuffer commandBuffer +) { + auto& state = layer->getStateTracker(); + auto& stats = state.getCommandBuffer(commandBuffer).getStats(); + stats.incDispatchCount(); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch( + VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDispatch(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDispatch(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDispatch(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDispatch(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDispatchIndirect(commandBuffer, buffer, offset); +} diff --git a/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp b/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp new file mode 100644 index 0000000..49cf669 --- /dev/null +++ b/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp @@ -0,0 +1,251 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +static void registerDrawCall( + Device* layer, + VkCommandBuffer commandBuffer +) { + auto& state = layer->getStateTracker(); + auto& stats = state.getCommandBuffer(commandBuffer).getStats(); + stats.incDrawCallCount(); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDraw( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDraw(commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexed( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndexedIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectByteCountEXT( + VkCommandBuffer commandBuffer, + uint32_t instanceCount, + uint32_t firstInstance, + VkBuffer counterBuffer, + VkDeviceSize counterBufferOffset, + uint32_t counterOffset, + uint32_t vertexStride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndirectByteCountEXT(commandBuffer, instanceCount, firstInstance, counterBuffer, counterBufferOffset, counterOffset, vertexStride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerDrawCall(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDrawIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} diff --git a/layer_gpu_timeline/source/layer_device_functions_queue.cpp b/layer_gpu_timeline/source/layer_device_functions_queue.cpp new file mode 100644 index 0000000..906a39e --- /dev/null +++ b/layer_gpu_timeline/source/layer_device_functions_queue.cpp @@ -0,0 +1,129 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" +#include "utils/misc.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR( + VkQueue queue, + const VkPresentInfoKHR* pPresentInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(queue); + + auto& tracker = layer->getStateTracker(); + tracker.queuePresent(); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkQueuePresentKHR(queue, pPresentInfo); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence fence +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(queue); + + using namespace std::placeholders; + auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1); + + auto& tracker = layer->getStateTracker(); + auto& trackQueue = tracker.getQueue(queue); + + for (uint32_t i = 0; i < submitCount; i++) + { + const auto& submit = pSubmits[i]; + for (uint32_t j = 0; j < submit.commandBufferCount; j++) + { + auto& trackCB = tracker.getCommandBuffer(submit.pCommandBuffers[j]); + const auto& LCS = trackCB.getSubmitCommandStream(); + + trackQueue.runSubmitCommandStream(LCS, onSubmit); + } + } + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkQueueSubmit(queue, submitCount, pSubmits, fence); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(queue); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkQueueSubmit2(queue, submitCount, pSubmits, fence); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(queue); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkQueueSubmit2KHR(queue, submitCount, pSubmits, fence); +} diff --git a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp new file mode 100644 index 0000000..3486da3 --- /dev/null +++ b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp @@ -0,0 +1,433 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +#include "framework/utils.hpp" +#include "trackers/render_pass.hpp" + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass( + VkDevice device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult ret = layer->driver.vkCreateRenderPass(device, pCreateInfo, pAllocator, pRenderPass); + if (ret != VK_SUCCESS) + { + return ret; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createRenderPass(*pRenderPass, *pCreateInfo); + return VK_SUCCESS; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult ret = layer->driver.vkCreateRenderPass2(device, pCreateInfo, pAllocator, pRenderPass); + if (ret != VK_SUCCESS) + { + return ret; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createRenderPass(*pRenderPass, *pCreateInfo); + return VK_SUCCESS; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult ret = layer->driver.vkCreateRenderPass2KHR(device, pCreateInfo, pAllocator, pRenderPass); + if (ret != VK_SUCCESS) + { + return ret; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createRenderPass(*pRenderPass, *pCreateInfo); + return VK_SUCCESS; +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass( + VkDevice device, + VkRenderPass renderPass, + const VkAllocationCallbacks* pAllocator +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + tracker.destroyRenderPass(renderPass); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkDestroyRenderPass(device, renderPass, pAllocator); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + LAYER_LOG("A"); + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + LAYER_LOG("B"); + auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); + uint32_t width = pRenderPassBegin->renderArea.extent.width; + uint32_t height = pRenderPassBegin->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + LAYER_LOG("C"); + uint64_t tagID = cb.renderPassBegin(rp, width, height); + + LAYER_LOG("D"); + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + [[maybe_unused]] VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; + + LAYER_LOG("E"); + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + LAYER_LOG("F"); + layer->driver.vkCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); + uint32_t width = pRenderPassBegin->renderArea.extent.width; + uint32_t height = pRenderPassBegin->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin(rp, width, height); + + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + layer->driver.vkCmdBeginRenderPass2(commandBuffer, pRenderPassBegin, pSubpassBeginInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); + uint32_t width = pRenderPassBegin->renderArea.extent.width; + uint32_t height = pRenderPassBegin->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin(rp, width, height); + + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + layer->driver.vkCmdBeginRenderPass2KHR(commandBuffer, pRenderPassBegin, pSubpassBeginInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering( + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT; + bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT; + + // Extract metadata for later use ... + Tracker::RenderPass rp(*pRenderingInfo); + uint32_t width = pRenderingInfo->renderArea.extent.width; + uint32_t height = pRenderingInfo->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin( + rp, width, height, resuming, suspending); + + // Release the lock to call into the driver + lock.unlock(); + + // Emit the label only for new render passes + if (!resuming) + { + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; + + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + } + + layer->driver.vkCmdBeginRendering(commandBuffer, pRenderingInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR( + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT; + bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT; + + // Extract metadata for later use ... + Tracker::RenderPass rp(*pRenderingInfo); + uint32_t width = pRenderingInfo->renderArea.extent.width; + uint32_t height = pRenderingInfo->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + uint64_t tagID = cb.renderPassBegin( + rp, width, height, resuming, suspending); + + // Release the lock to call into the driver + lock.unlock(); + + // Emit the label only for new render passes + if (!resuming) + { + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; + + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + } + + layer->driver.vkCmdBeginRenderingKHR(commandBuffer, pRenderingInfo); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + LAYER_LOG(" - Command buffer: %p", (void*)commandBuffer); + auto& cb = tracker.getCommandBuffer(commandBuffer); + cb.renderPassEnd(); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRenderPass(commandBuffer); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRendering( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + bool suspending = cb.renderPassEnd(); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRendering(commandBuffer); + if (!suspending) + { + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); + } +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderingKHR( + VkCommandBuffer commandBuffer +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + bool suspending = cb.renderPassEnd(); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRenderingKHR(commandBuffer); + if (!suspending) + { + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); + } +} diff --git a/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp new file mode 100644 index 0000000..5373747 --- /dev/null +++ b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp @@ -0,0 +1,109 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +static void registerTraceRays( + Device* layer, + VkCommandBuffer commandBuffer +) { + auto& state = layer->getStateTracker(); + auto& stats = state.getCommandBuffer(commandBuffer).getStats(); + stats.incTraceRaysCount(); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR( + VkCommandBuffer commandBuffer, + VkDeviceAddress indirectDeviceAddress +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerTraceRays(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + VkDeviceAddress indirectDeviceAddress +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerTraceRays(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdTraceRaysIndirectKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + uint32_t width, + uint32_t height, + uint32_t depth +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + registerTraceRays(layer, commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdTraceRaysKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth); +} \ No newline at end of file diff --git a/layer_gpu_timeline/source/timeline_comms.cpp b/layer_gpu_timeline/source/timeline_comms.cpp new file mode 100644 index 0000000..fbb496a --- /dev/null +++ b/layer_gpu_timeline/source/timeline_comms.cpp @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "timeline_comms.hpp" + +TimelineComms::TimelineComms( + Comms::CommsInterface& commsIf) +{ + comms = &commsIf; + if (comms->isConnected()) + { + endpoint = comms->getEndpointID("GPUTimeline"); + } +} + +void TimelineComms::txMessage( + const std::string& message) +{ + // Message endpoint is not available + if (endpoint == 0) + { + return; + } + + auto data = std::make_unique(message.begin(), message.end()); + comms->txAsync(endpoint, std::move(data)); +} diff --git a/layer_gpu_timeline/source/timeline_comms.hpp b/layer_gpu_timeline/source/timeline_comms.hpp new file mode 100644 index 0000000..435f37a --- /dev/null +++ b/layer_gpu_timeline/source/timeline_comms.hpp @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#include "comms/comms_interface.hpp" + +class TimelineComms +{ +public: + TimelineComms( + Comms::CommsInterface& comms); + + void txMessage( + const std::string& message); + +private: + Comms::EndpointID endpoint { 0 }; + Comms::CommsInterface* comms { nullptr }; +}; diff --git a/lgl_host_server.py b/lgl_host_server.py index c7c1382..14d5941 100644 --- a/lgl_host_server.py +++ b/lgl_host_server.py @@ -32,6 +32,7 @@ import threading import lglpy.server +import lglpy.service_gpu_timeline import lglpy.service_test import lglpy.service_log @@ -41,13 +42,18 @@ def main(): # Register all the services with it print(f'Registering host services:') - test_service = lglpy.service_test.TestService() - endpoint_id = server.register_endpoint(test_service) - print(f' - [{endpoint_id}] = {test_service.get_service_name()}') + service = lglpy.service_test.TestService() + endpoint_id = server.register_endpoint(service) + print(f' - [{endpoint_id}] = {service.get_service_name()}') + + service = lglpy.service_log.LogService() + endpoint_id = server.register_endpoint(service) + print(f' - [{endpoint_id}] = {service.get_service_name()}') + + service = lglpy.service_gpu_timeline.GPUTimelineService() + endpoint_id = server.register_endpoint(service) + print(f' - [{endpoint_id}] = {service.get_service_name()}') - log_service = lglpy.service_log.LogService() - endpoint_id = server.register_endpoint(log_service) - print(f' - [{endpoint_id}] = {log_service.get_service_name()}') print() # Start it running diff --git a/lglpy/service_gpu_timeline.py b/lglpy/service_gpu_timeline.py new file mode 100644 index 0000000..69ac031 --- /dev/null +++ b/lglpy/service_gpu_timeline.py @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: MIT +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ----------------------------------------------------------------------------- + +# This module implements the server-side communications module service that +# implements a basic message endpoint for testing. + +from lglpy.server import Message + +class GPUTimelineService: + + def __init__(self): + pass + + def get_service_name(self) -> str: + return 'GPUTimeline' + + def handle_message(self, message: Message): + payload = message.payload.decode('utf-8') + + print(f'{message.message_type.name}: {payload} ({len(payload)} bytes)') + + return None diff --git a/source_common/CMakeLists.txt b/source_common/CMakeLists.txt index 1f37093..a408717 100644 --- a/source_common/CMakeLists.txt +++ b/source_common/CMakeLists.txt @@ -26,3 +26,4 @@ # Device classes which get specialized for each use case. add_subdirectory(comms) +add_subdirectory(trackers) diff --git a/source_common/comms/comms_module.cpp b/source_common/comms/comms_module.cpp index 42c815e..5ef695e 100644 --- a/source_common/comms/comms_module.cpp +++ b/source_common/comms/comms_module.cpp @@ -46,6 +46,7 @@ namespace Comms CommsModule::CommsModule( const std::string& domainAddress ) { + LAYER_LOG("Client UDS socket create"); sockfd = socket(AF_UNIX, SOCK_STREAM, 0); if (sockfd < 0) { @@ -60,6 +61,7 @@ CommsModule::CommsModule( std::strcpy(servAddr.sun_path + 1, domainAddress.c_str()); servAddr.sun_path[0] = '\0'; + LAYER_LOG("Client UDS connect"); int conn = connect( sockfd, reinterpret_cast(&servAddr), @@ -72,8 +74,13 @@ CommsModule::CommsModule( return; } + LAYER_LOG("Client make transmitter"); transmitter = std::make_unique(*this); + + LAYER_LOG("Client make receiver"); receiver = std::make_unique(*this); + + LAYER_LOG("Client make complete"); } /** See header for documentation. */ diff --git a/source_common/trackers/CMakeLists.txt b/source_common/trackers/CMakeLists.txt new file mode 100644 index 0000000..8318d8c --- /dev/null +++ b/source_common/trackers/CMakeLists.txt @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: MIT +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ----------------------------------------------------------------------------- + +set(LIB_BINARY lib_layer_trackers) + +add_library( + ${LIB_BINARY} STATIC + command_buffer.cpp + device.cpp + layer_command_stream.cpp + queue.cpp + render_pass.cpp) + +target_include_directories( + ${LIB_BINARY} PRIVATE + ../../khronos/vulkan/include + ../../source_third_party + ../) + +lgl_set_build_options(${LIB_BINARY}) + +# No unit tests for this module yet +#if(${LGL_UNITTEST}) +# add_subdirectory(test) +#endif() + diff --git a/source_common/trackers/command_buffer.cpp b/source_common/trackers/command_buffer.cpp new file mode 100644 index 0000000..0e4cdd8 --- /dev/null +++ b/source_common/trackers/command_buffer.cpp @@ -0,0 +1,185 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2022-2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "trackers/command_buffer.hpp" +#include "framework/utils.hpp" +#include "utils/misc.hpp" + +namespace Tracker +{ + +/* See header for documentation. */ +CommandBuffer::CommandBuffer( + VkCommandBuffer _handle) : + handle(_handle) +{ + +}; + +/* See header for documentation. */ +void CommandBuffer::reset() +{ + stats.reset(); + workloads.clear(); + workloadCommandStream.clear(); +} + +/** + * @brief Begin a user debug marker range. + */ +void CommandBuffer::debugMarkerBegin( + std::string marker +) { + // Create a workload we can reference later + auto workload = std::make_shared(marker); + workloads.push_back(workload); + + // Add command to update queue debug stack on submit + auto instr = std::make_pair(LCSOpcode::MARKER_BEGIN, workload); + workloadCommandStream.push_back(instr); +} + +/** + * @brief End a user debug marker range. + */ +void CommandBuffer::debugMarkerEnd() +{ + // Add command with empty workload to update queue debug stack on submit + auto workload = std::shared_ptr(); + auto instr = std::make_pair(LCSOpcode::MARKER_END, workload); + workloadCommandStream.push_back(instr); +} + +/** + * @brief End a user render pass. + */ +uint64_t CommandBuffer::renderPassBegin( + const RenderPass& renderPass, + uint32_t width, + uint32_t height, + bool resuming, + bool suspending +) { + uint64_t tagID { 0 }; + + assert(!currentRenderPass); + + // Assign ID and update the stats tracker for new render passes only + if (!resuming) + { + tagID = Tracker::LCSWorkload::getTagID(); + stats.incRenderPassCount(); + } + + // Populate render pass with config information + renderPassStartDrawCount = stats.getDrawCallCount(); + + auto workload = std::make_shared( + tagID, renderPass, width, height, suspending); + + currentRenderPass = workload; + workloads.push_back(workload); + + // Add a command to the layer-side command stream + auto instr = std::make_pair(LCSOpcode::RENDERPASS_BEGIN, workload); + workloadCommandStream.push_back(instr); + + return tagID; +} + +/** + * @brief End a user render pass. + */ +bool CommandBuffer::renderPassEnd() +{ + assert(currentRenderPass); + + // Update stats based on what happened ... + uint64_t endDrawCount = stats.getDrawCallCount(); + uint64_t drawCount = endDrawCount - renderPassStartDrawCount; + currentRenderPass->setDrawCallCount(drawCount); + + // Cache the return state and clear the current render pass tracker + bool suspending = currentRenderPass->isSuspending(); + currentRenderPass.reset(); + + return suspending; +} + +/* See header for documentation. */ +void CommandBuffer::executeCommands( + CommandBuffer& secondary +) { + // Integrate secondary statistics into the primary + stats.mergeCounts(secondary.getStats()); + + // Integrate secondary layer commands + vecAppend(workloads, secondary.workloads); + vecAppend(workloadCommandStream, secondary.workloadCommandStream); +} + + +CommandPool::CommandPool( + VkCommandPool _handle) : + handle(_handle) +{ + +}; + +/* See header for documentation. */ +CommandBuffer& CommandPool::allocateCommandBuffer( + VkCommandBuffer commandBuffer +) { + auto result = commandBuffers.insert({ + commandBuffer, + std::make_unique(commandBuffer) + }); + + // Validate that insertion worked + assert(result.second); + + // Return the created command buffer + return *result.first->second.get(); +} + +/* See header for documentation. */ +void CommandPool::freeCommandBuffer( + VkCommandBuffer commandBuffer +) { + commandBuffers.erase(commandBuffer); +} + +/* See header for documentation. */ +void CommandPool::reset() +{ + for (auto& commandBuffer : commandBuffers) + { + commandBuffer.second->reset(); + } +} + +} diff --git a/source_common/trackers/command_buffer.hpp b/source_common/trackers/command_buffer.hpp new file mode 100644 index 0000000..fcf5c34 --- /dev/null +++ b/source_common/trackers/command_buffer.hpp @@ -0,0 +1,218 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2022-2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * \file + * The declaration of Vulkan command pool and command buffer use trackers. + * + * Role summary + * ============ + * + * These trackers are used to monitor the use of command buffers in a frame, + * allowing us to monitor command buffer payloads submitted to a queue. + * + * Key properties + * ============== + * + * Command pools and Command buffers are both lock-free from a single app + * thread, relying on external synchronization above the API if multi-threaded + * use is required. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "trackers/stats.hpp" +#include "trackers/layer_command_stream.hpp" + +namespace Tracker +{ + +/** + * @brief The state tracker for a single command buffer. + */ +class CommandBuffer +{ +public: + /** + * @brief Construct a new command buffer wrapping a Vulkan allocation. + * + * @param handle The Vulkan command buffer handle we are tracking. + */ + CommandBuffer( + VkCommandBuffer handle); + + /** + * @brief Get the stats object for this command buffer; + */ + Stats& getStats() + { + return stats; + } + + /** + * @brief Get the layer submit-time command stream for this command buffer. + */ + const std::vector& getSubmitCommandStream() const + { + return workloadCommandStream; + } + + /** + * @brief Begin recording a render pass. + * + * @param renderPass Render pass creation info. + * @param width Render pass extent width in pixels. + * @param height Render pass extent height in pixels. + * @param resuming If @c true this recording starts with a resume. + * @param suspending If @c true this recording ends with a suspend. + * + * @return Returns the tagID assigned to this workload. Always returns 0 + * if @c resuming an existing workload. + */ + uint64_t renderPassBegin( + const RenderPass& renderPass, + uint32_t width, + uint32_t height, + bool resuming=false, + bool suspending=false); + + /** + * @brief End the current render pass workload recording. + * + * @return Returns @c true if this is a suspending dynamic render pass or + * @c false otherwise. + */ + bool renderPassEnd(); + + /** + * @brief Begin a user debug marker range. + */ + void debugMarkerBegin( + std::string marker); + + /** + * @brief End a user debug marker range. + */ + void debugMarkerEnd(); + + /** + * @brief Execute a secondary command buffer. + */ + void executeCommands( + CommandBuffer& secondary); + + /** + * @brief Reset the command buffer back into the @a Initial state. + */ + void reset(); + +private: + /** + * @brief The Vulkan API handle of this command buffer. + */ + const VkCommandBuffer handle; + + /** + * @brief The command buffer draw count at the start of the render pass. + */ + uint64_t renderPassStartDrawCount { 0 }; + + /** + * @brief The cumulative stats of the commands in this command buffer. + */ + Stats stats; + + /** + * @brief The current render pass if we are in one. + */ + std::shared_ptr currentRenderPass; + + /** + * @brief The recorded workloads. + */ + std::vector> workloads; + + /** + * @brief The recorded commands. + */ + std::vector workloadCommandStream; +}; + +/** + * @brief The state tracker for a single command pool. + */ +class CommandPool +{ +public: + /** + * @brief Construct a new command pool wrapping a Vulkan allocation. + * + * @param handle The Vulkan pool buffer handle we are wrapping. + */ + CommandPool( + VkCommandPool handle); + + /** + * @brief Allocate a command buffer in the pool with the given handle. + * + * @param commandBuffer The Vulkan handle of the allocated command buffer. + * + * \return The layer wrapper object for the command buffer. + */ + CommandBuffer& allocateCommandBuffer(VkCommandBuffer commandBuffer); + + /** + * @brief Free the command buffer in the pool with the given handle. + * + * @param commandBuffer The Vulkan handle of the command buffer to free. + */ + void freeCommandBuffer(VkCommandBuffer commandBuffer); + + /** + * @brief Reset all allocated command buffers into the @a Initial state. + */ + void reset(); + +private: + /** + * @brief The Vulkan API handle of this command pool. + */ + const VkCommandPool handle; + + /** + * @brief The command buffers currently allocated in this command pool. + */ + std::unordered_map> commandBuffers; +}; + +} diff --git a/source_common/trackers/device.cpp b/source_common/trackers/device.cpp new file mode 100644 index 0000000..a1f0687 --- /dev/null +++ b/source_common/trackers/device.cpp @@ -0,0 +1,175 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2022-2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include +#include +#include + +#include "trackers/device.hpp" +#include "utils/misc.hpp" + +namespace Tracker +{ + +/* See header for documentation. */ +void Device::createCommandPool( + VkCommandPool commandPool +) { + commandPools.insert({ + commandPool, + std::make_unique(commandPool) + }); +} + +/* See header for documentation. */ +CommandPool& Device::getCommandPool( + VkCommandPool commandPool +) { + assert(isInMap(commandPool, commandPools)); + return *commandPools.at(commandPool); +} + +/* See header for documentation. */ +void Device::destroyCommandPool( + VkCommandPool commandPool +) { + commandPools.erase(commandPool); +} + +/* See header for documentation. */ +void Device::allocateCommandBuffer( + VkCommandPool commandPool, + VkCommandBuffer commandBuffer +) { + // Allocate in the pool + auto& pool = getCommandPool(commandPool); + auto& buffer = pool.allocateCommandBuffer(commandBuffer); + + // Insert into the tracker lookup map + [[maybe_unused]] auto result = commandBuffers.insert({ + commandBuffer, + buffer + }); + + assert(result.second); +} + +/* See header for documentation. */ +void Device::freeCommandBuffer( + VkCommandPool commandPool, + VkCommandBuffer commandBuffer +) { + // Remove from the tracker lookup map + commandBuffers.erase(commandBuffer); + + // Remove from the command pool + auto& pool = getCommandPool(commandPool); + pool.freeCommandBuffer(commandBuffer); +} + +/* See header for documentation. */ +CommandBuffer& Device::getCommandBuffer( + VkCommandBuffer commandBuffer +) { + assert(isInMap(commandBuffer, commandBuffers)); + return commandBuffers.at(commandBuffer); +} + +/* See header for documentation. */ +void Device::createRenderPass( + VkRenderPass renderPass, + const VkRenderPassCreateInfo& createInfo +) { + renderPasses.insert({ + renderPass, + std::make_unique(renderPass, createInfo) + }); +} + +/* See header for documentation. */ +void Device::createRenderPass( + VkRenderPass renderPass, + const VkRenderPassCreateInfo2& createInfo +) { + renderPasses.insert({ + renderPass, + std::make_unique(renderPass, createInfo) + }); +} + +/* See header for documentation. */ +RenderPass& Device::getRenderPass( + VkRenderPass renderPass +) { + assert(isInMap(renderPass, renderPasses)); + return *renderPasses.at(renderPass); +} + +/* See header for documentation. */ +void Device::destroyRenderPass( + VkRenderPass renderPass +) { + renderPasses.erase(renderPass); +} + +/* See header for documentation. */ +Queue& Device::getQueue( + VkQueue queue +) { + // Create a tracker for a queue on first use + if (!isInMap(queue, queues)) + { + queues.insert({ + queue, + std::make_unique(queue) + }); + } + + return *queues.at(queue); +} + +/* See header for documentation. */ +void Device::queueSubmit( + VkCommandBuffer commandBuffer +) { + auto& cbStats = getCommandBuffer(commandBuffer).getStats(); + frameStats.mergeCounts(cbStats); +} + + +/* See header for documentation. */ +void Device::queuePresent() +{ + // Update cumulative statistics with the frame statistics + totalStats.incFrameCount(); + totalStats.mergeCounts(frameStats); + + // Reset the frame statistics ready for the next frame + frameStats.reset(); +} + +} diff --git a/source_common/trackers/device.hpp b/source_common/trackers/device.hpp new file mode 100644 index 0000000..4507707 --- /dev/null +++ b/source_common/trackers/device.hpp @@ -0,0 +1,208 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2022-2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * \file + * This module implements basic tracking of Vulkan devices. + * + * Role summary + * ============ + * + * Trackers are used to monitor the use of a device and the various resources + * that exist in the scope of a device. Primarily we use them to implement + * counters that layers can to either emit statistics or be used to trigger + * other layer behavior when a specific count is reached. + */ + +#pragma once + +#include +#include + +#include "trackers/command_buffer.hpp" +#include "trackers/queue.hpp" +#include "trackers/render_pass.hpp" + +namespace Tracker +{ + +/** + * @brief The state tracker for a single device. + */ +class Device +{ +public: + /** + * @brief Create a new command pool tracker within this device. + * + * @param commandPool The native handle to track. + */ + void createCommandPool( + VkCommandPool commandPool); + + /** + * @brief Get the tracker for a native command pool. + * + * @param commandPool The native handle we are tracking. + */ + CommandPool& getCommandPool( + VkCommandPool commandPool); + + /** + * @brief Create a new command buffer in a pool within this device. + * + * @param commandPool The native parent command pool handle. + * @param commandBuffer The native handle to track. + */ + void allocateCommandBuffer( + VkCommandPool commandPool, + VkCommandBuffer commandBuffer); + + /** + * @brief Free a command buffer in a pool within this device. + * + * @param commandPool The native parent command pool handle. + * @param commandBuffer The native handle to stop tracking. + */ + void freeCommandBuffer( + VkCommandPool commandPool, + VkCommandBuffer commandBuffer); + + /** + * @brief Get the tracker for native command buffer. + * + * @param commandBuffer The native handle we are tracking. + */ + CommandBuffer& getCommandBuffer( + VkCommandBuffer commandBuffer); + + /** + * @brief Destroy a command pool within this device. + * + * @param commandPool The native handle to stop tracking. + */ + void destroyCommandPool( + VkCommandPool commandPool); + + /** + * @brief Get the tracker for a native queue. + * + * Note that queue trackers are created on the fly when a native queue is + * first used. We don't track queue creation as a distinct step. + * + * @param queue The native handle we are tracking. + */ + Queue& getQueue( + VkQueue queue); + + /** + * @brief Create a new render pass tracker within this device. + * + * @param renderPass The native handle to track. + * @param createInfo The render pass configuration information. + */ + void createRenderPass( + VkRenderPass renderPass, + const VkRenderPassCreateInfo& createInfo); + + /** + * @brief Create a new render pass tracker within this device. + * + * @param renderPass The native handle to track. + * @param createInfo The render pass configuration information. + */ + void createRenderPass( + VkRenderPass renderPass, + const VkRenderPassCreateInfo2& createInfo); + + /** + * @brief Get the tracker for a native render pass. + * + * @param renderPass The native handle we are tracking. + */ + RenderPass& getRenderPass( + VkRenderPass renderPass); + + /** + * @brief Destroy a render pass within this device. + * + * @param renderPass The native handle to stop tracking. + */ + void destroyRenderPass( + VkRenderPass renderPass); + + /** + * @brief Submit a command buffer to a queue within this device. + * + * @param commandBuffer The native command buffer we are tracking. + */ + void queueSubmit( + VkCommandBuffer commandBuffer); + + /** + * @brief Submit a display present command to a queue within this device. + */ + void queuePresent(); + +public: + /** + * @brief The set of all queues allocated in this device. + */ + std::unordered_map> queues; + + /** + * @brief The set of all command pools allocated in this device. + */ + std::unordered_map> commandPools; + + /** + * @brief The set of all command buffers allocated in this device. + * + * Note - memory ownership is via the CommandPool, so dispatch references in this map + * must be removed before deleting the command pool that owns the buffer. + */ + std::unordered_map commandBuffers; + + /** + * @brief The set of all render passes allocated in this device. + */ + std::unordered_map> renderPasses; + + /** + * @brief The cumulative statistics for this device. + * + * Only updated on submit to a present queue. + */ + Stats totalStats; + + /** + * @brief The current frame statistics for this device. + * + * Only updated on submit to a queue. + */ + Stats frameStats; +}; + +} diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp new file mode 100644 index 0000000..56b1039 --- /dev/null +++ b/source_common/trackers/layer_command_stream.cpp @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "nlohmann/json.hpp" + +#include "trackers/layer_command_stream.hpp" + +using json = nlohmann::json; + +namespace Tracker +{ +/* See header for details. */ +std::atomic LCSWorkload::nextTagID { 1 }; + +LCSWorkload::LCSWorkload( + uint64_t _tagID): + tagID(_tagID) +{ + +} + +/* See header for details. */ +LCSMarker::LCSMarker( + const std::string& _label) : + LCSWorkload(0), + label(_label) +{ + +}; + +/* See header for details. */ +LCSRenderPass::LCSRenderPass( + uint64_t _tagID, + const RenderPass& renderPass, + uint32_t _width, + uint32_t _height, + bool _suspending) : + LCSWorkload(_tagID), + width(_width), + height(_height), + suspending(_suspending) +{ + // Copy these as the renderpass object may be transient. + subpassCount = renderPass.getSubpassCount(); + attachments = renderPass.getAttachments(); +} + +/* See header for details. */ +std::string LCSRenderPass::getBeginMetadata( + uint64_t submitID) const +{ + json metadata = { + { "type", "renderpass" }, + { "tid", tagID }, + { "width", width }, + { "height", height }, + { "drawCallCount", drawCallCount } + }; + + if (submitID != 0) + { + metadata["sid"] = submitID; + } + + // Default is 1, so only store if we need it + if (subpassCount != 1) + { + metadata["subpassCount"] = subpassCount; + } + + json attachPoints = json::array(); + for (const auto& attachment : attachments) + { + json attachPoint { + { "binding", attachment.getAttachmentStr() }, + }; + + // Default is false, so only store if we need it + if (attachment.isLoaded()) + { + attachPoint["load"] = true; + } + + // Default is true, so only store if we need it + if (!attachment.isStored()) + { + attachPoint["store"] = false; + } + + // Default is false, so only store if we need it + if (attachment.isResolved()) + { + attachPoint["resolve"] = true; + } + + attachPoints.push_back(attachPoint); + } + + metadata["attachments"] = attachPoints; + return metadata.dump(); +} + +/* See header for details. */ +std::string LCSRenderPass::getContinuationMetadata( + uint64_t tagIDContinuation, + uint64_t submitID) const +{ + json metadata = { + { "type", "renderpass" }, + { "tid", tagIDContinuation }, + { "drawCallCount", drawCallCount } + }; + + if (submitID != 0) + { + metadata["sid"] = submitID; + } + + return metadata.dump(); +} + +/* See header for details. */ +std::string LCSRenderPass::getMetadata( + uint64_t tagIDContinuation, + uint64_t submitID) const +{ + if (tagID) + { + assert(tagIDContinuation == 0); + return getBeginMetadata(submitID); + } + + assert(tagIDContinuation != 0); + return getContinuationMetadata(tagIDContinuation, submitID); +} + +} diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp new file mode 100644 index 0000000..a144683 --- /dev/null +++ b/source_common/trackers/layer_command_stream.hpp @@ -0,0 +1,196 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2022-2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * \file + * The declaration of Vulkan command pool and command buffer use trackers. + * + * Role summary + * ============s + * + * These trackers are used to monitor the use of command buffers in a frame, + * allowing us to monitor command buffer payloads submitted to a queue. + * + * Key properties + * ============== + * + * Command pools and Command buffers are both lock-free from a single app + * thread, relying on external synchronization above the API if multi-threaded + * use is required. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "trackers/render_pass.hpp" +#include "trackers/stats.hpp" +#include "utils/misc.hpp" + +namespace Tracker +{ + +/** + * @brief Enumeration of layer command stream opcodes. + */ +enum class LCSOpcode +{ + MARKER_BEGIN, + MARKER_END, + RENDERPASS_BEGIN, + RENDERPASS_END, // TODO: Does this need to be an opcode? + DISPATCH, + TRACE_RAYS, + BUFFER_TRANSFER, + IMAGE_TRANSFER +}; + +/** + * @brief Baseclass representing a GPU workload in the command stream. + */ +class LCSWorkload +{ +public: + LCSWorkload( + uint64_t tagID); + + virtual ~LCSWorkload() = default; + + virtual std::string getMetadata( + uint64_t tagIDContinuation=0, + uint64_t submitID=0) const = 0; + + /** + * @brief Get a unique tagID to label a workload in a command buffer. + * + * @return The assigned ID. + */ + static uint64_t getTagID() + { + return nextTagID.fetch_add(1, std::memory_order_relaxed); + } + +protected: + /** + * @brief The assigned tagID for this workload. + * + * Render pass continuations are assigned tagID of zero. + */ + uint64_t tagID; + +private: + /** + * @brief The workload tagID allocator. + */ + static std::atomic nextTagID; +}; + +/** + * @brief Baseclass representing a GPU workload in the command stream. + */ +class LCSRenderPass : public LCSWorkload +{ +public: + LCSRenderPass( + uint64_t tagID, + const RenderPass& renderPass, + uint32_t width, + uint32_t height, + bool suspending); + + virtual ~LCSRenderPass() = default; + + bool isSuspending() const + { + return suspending; + }; + + void setDrawCallCount(uint64_t count) + { + drawCallCount = count; + }; + + virtual std::string getMetadata( + uint64_t tagIDContinuation=0, + uint64_t submitID=0) const; + +private: + std::string getBeginMetadata( + uint64_t submitID=0) const; + + std::string getContinuationMetadata( + uint64_t tagIDContinuation, + uint64_t submitID=0) const; + + uint32_t width; + + uint32_t height; + + bool suspending; + + uint32_t subpassCount; + + uint64_t drawCallCount { 0 }; + + std::vector attachments; +}; + +/** + * @brief Baseclass representing a GPU workload in the command stream. + */ +class LCSMarker : public LCSWorkload +{ +public: + LCSMarker( + const std::string& label); + + virtual ~LCSMarker() = default; + + virtual std::string getMetadata( + uint64_t tagIDContinuation=0, + uint64_t submitID=0) const + { + UNUSED(tagIDContinuation); + UNUSED(submitID); + return label; + }; + +private: + std::string label; +}; + +/** + * @brief Instructions are an opcode with a data pointer. + * + * Data pointers may be null for some opcodes. + */ +using LCSInstruction = std::pair>; +} diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp new file mode 100644 index 0000000..cfd6590 --- /dev/null +++ b/source_common/trackers/queue.cpp @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2022-2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "trackers/queue.hpp" + +namespace Tracker +{ +/* See header for details. */ +std::atomic Queue::nextSubmitID { 1 }; + +/* See header for details. */ +Queue::Queue( + VkQueue _handle): + handle(_handle) { }; + +/* See header for details. */ +void Queue::runSubmitCommandStream( + const std::vector& stream, + std::function callback +) { + for (auto& instr: stream) + { + LCSOpcode opCode = instr.first; + const LCSWorkload* opData = instr.second.get(); + + if (opCode == LCSOpcode::MARKER_BEGIN) + { + debugStack.push_back(opData->getMetadata()); + } + else if (opCode == LCSOpcode::MARKER_END) + { + debugStack.pop_back(); + } + else if (opCode == LCSOpcode::RENDERPASS_BEGIN) + { + auto* workload = dynamic_cast(opData); + callback(workload->getMetadata()); + std::string log = joinString(debugStack, "|"); + } + } +} + +} diff --git a/source_common/trackers/queue.hpp b/source_common/trackers/queue.hpp new file mode 100644 index 0000000..26fc416 --- /dev/null +++ b/source_common/trackers/queue.hpp @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2022-2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * \file + * The declaration of Vulkan queue use trackers. + * + * Role summary + * ============ + * + * These trackers are used to monitor the use of a queue. + * + * Key properties + * ============== + * + * Queues are lock-free from a single app thread, relying on external + * synchronization above the API if multi-threaded use is required. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "framework/utils.hpp" +#include "trackers/layer_command_stream.hpp" + +namespace Tracker +{ + +/** + * @brief The state tracker for a queue. + */ +class Queue +{ +public: + Queue( + VkQueue handle); + + /** + * @brief Execute a layer command stream. + */ + void runSubmitCommandStream( + const std::vector& stream, + std::function callback); + + /** + * @brief Get a unique submitID to label a command buffer submit. + * + * @return The assigned ID. + */ + static uint64_t getSubmitID() + { + return nextSubmitID.fetch_add(1, std::memory_order_relaxed); + } + +private: + /** + * The handle of the native queue we are wrapping. + */ + VkQueue handle; + + /** + * @brief The stack of debug labels in the tool. + */ + std::vector debugStack; + + /** + * @brief The command buffer submitID allocator. + */ + static std::atomic nextSubmitID; +}; + +} diff --git a/source_common/trackers/render_pass.cpp b/source_common/trackers/render_pass.cpp new file mode 100644 index 0000000..3560612 --- /dev/null +++ b/source_common/trackers/render_pass.cpp @@ -0,0 +1,386 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2022-2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "trackers/render_pass.hpp" + +namespace Tracker +{ + +/* See header for details. */ +RenderPassAttachment::RenderPassAttachment( + RenderPassAttachName _name, + VkAttachmentLoadOp _loadOp, + VkAttachmentStoreOp _storeOp, + bool _resolve) : + name(_name), + loadOp(_loadOp), + storeOp(_storeOp), + resolve(_resolve) +{ + +} + +/* See header for details. */ +RenderPass::RenderPass( + VkRenderPass _handle, + const VkRenderPassCreateInfo& createInfo) : + handle(_handle) +{ + subpassCount = createInfo.subpassCount; + + auto& lastSubpass = createInfo.pSubpasses[subpassCount - 1]; + + // Color attachments + for(uint32_t i = 0; i < lastSubpass.colorAttachmentCount; i++) + { + auto& attachRef = lastSubpass.pColorAttachments[i]; + if (attachRef.attachment == VK_ATTACHMENT_UNUSED) + { + continue; + } + + auto& attachDesc = createInfo.pAttachments[attachRef.attachment]; + attachments.emplace_back( + static_cast(i), + attachDesc.loadOp, + attachDesc.storeOp, + false); + } + + // Color resolve attachments + for(uint32_t i = 0; i < lastSubpass.colorAttachmentCount; i++) + { + // We may not have any resolve attachments + if (!lastSubpass.pResolveAttachments) + { + continue; + } + + auto& attachRef = lastSubpass.pResolveAttachments[i]; + if (attachRef.attachment == VK_ATTACHMENT_UNUSED) + { + continue; + } + + auto& attachDesc = createInfo.pAttachments[attachRef.attachment]; + attachments.emplace_back( + static_cast(i), + attachDesc.loadOp, + attachDesc.storeOp, + true); + } + + // Depth+Stencil attachments + // TODO: Determine if this is depth/stencil/both from image format + if (lastSubpass.pDepthStencilAttachment) + { + auto& attachRef = *lastSubpass.pDepthStencilAttachment; + if (attachRef.attachment != VK_ATTACHMENT_UNUSED) + { + auto& attachDesc = createInfo.pAttachments[attachRef.attachment]; + + // Canonicalize read-only attachments as storeOp=NONE + VkAttachmentStoreOp depthStoreOp; + switch(attachRef.layout) + { + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: + if (attachDesc.storeOp == VK_ATTACHMENT_STORE_OP_STORE) + { + depthStoreOp = VK_ATTACHMENT_STORE_OP_NONE; + } + break; + default: + depthStoreOp = attachDesc.storeOp; + break; + } + + attachments.emplace_back( + RenderPassAttachName::DEPTH, + attachDesc.loadOp, + depthStoreOp, + false); + + // Canonicalize read-only attachments as storeOp=NONE + VkAttachmentStoreOp stencilStoreOp; + switch(attachRef.layout) + { + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: + if (attachDesc.stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE) + { + stencilStoreOp = VK_ATTACHMENT_STORE_OP_NONE; + } + break; + default: + stencilStoreOp = attachDesc.stencilStoreOp; + break; + } + + attachments.emplace_back( + RenderPassAttachName::STENCIL, + attachDesc.stencilLoadOp, + stencilStoreOp, + false); + } + } +} + +/* See header for details. */ +RenderPass::RenderPass( + VkRenderPass _handle, + const VkRenderPassCreateInfo2& createInfo) : + handle(_handle) +{ + subpassCount = createInfo.subpassCount; + + auto& lastSubpass = createInfo.pSubpasses[subpassCount - 1]; + + // Color attachments + for(uint32_t i = 0; i < lastSubpass.colorAttachmentCount; i++) + { + auto& attachRef = lastSubpass.pColorAttachments[i]; + if (attachRef.attachment == VK_ATTACHMENT_UNUSED) + { + continue; + } + + auto& attachDesc = createInfo.pAttachments[attachRef.attachment]; + attachments.emplace_back( + static_cast(i), + attachDesc.loadOp, + attachDesc.storeOp, + false); + } + + // Color resolve attachments + for(uint32_t i = 0; i < lastSubpass.colorAttachmentCount; i++) + { + // We may not have any resolve attachments + if (!lastSubpass.pResolveAttachments) + { + continue; + } + + auto& attachRef = lastSubpass.pResolveAttachments[i]; + if (attachRef.attachment == VK_ATTACHMENT_UNUSED) + { + continue; + } + + auto& attachDesc = createInfo.pAttachments[attachRef.attachment]; + attachments.emplace_back( + static_cast(i), + attachDesc.loadOp, + attachDesc.storeOp, + true); + } + + // Depth+Stencil attachments + // TODO: Determine if this is depth/stencil/both from image format + if (lastSubpass.pDepthStencilAttachment) + { + auto& attachRef = *lastSubpass.pDepthStencilAttachment; + if (attachRef.attachment != VK_ATTACHMENT_UNUSED) + { + auto& attachDesc = createInfo.pAttachments[attachRef.attachment]; + + // Canonicalize read-only attachments as storeOp=NONE + VkAttachmentStoreOp depthStoreOp; + switch(attachRef.layout) + { + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: + if (attachDesc.storeOp == VK_ATTACHMENT_STORE_OP_STORE) + { + depthStoreOp = VK_ATTACHMENT_STORE_OP_NONE; + } + break; + default: + depthStoreOp = attachDesc.storeOp; + break; + } + + attachments.emplace_back( + RenderPassAttachName::DEPTH, + attachDesc.loadOp, + depthStoreOp, + false); + + // Canonicalize read-only attachments as storeOp=NONE + VkAttachmentStoreOp stencilStoreOp; + switch(attachRef.layout) + { + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: + if (attachDesc.stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE) + { + stencilStoreOp = VK_ATTACHMENT_STORE_OP_NONE; + } + break; + default: + stencilStoreOp = attachDesc.stencilStoreOp; + break; + } + + attachments.emplace_back( + RenderPassAttachName::STENCIL, + attachDesc.stencilLoadOp, + stencilStoreOp, + false); + } + } +} + +/* See header for details. */ +RenderPass::RenderPass( + const VkRenderingInfo& createInfo) : + handle(VK_NULL_HANDLE) +{ + // No subpasses in dynamic rendering + subpassCount = 1; + + // Color attachments + for(uint32_t i = 0; i < createInfo.colorAttachmentCount; i++) + { + auto& attachRef = createInfo.pColorAttachments[i]; + if (attachRef.imageView == VK_NULL_HANDLE) + { + continue; + } + + attachments.emplace_back( + static_cast(i), + attachRef.loadOp, + attachRef.storeOp, + false); + } + + // Color resolve attachments + for(uint32_t i = 0; i < createInfo.colorAttachmentCount; i++) + { + auto& attachRef = createInfo.pColorAttachments[i]; + if ((attachRef.imageView == VK_NULL_HANDLE) || + (attachRef.resolveMode == VK_RESOLVE_MODE_NONE)) + { + continue; + } + + attachments.emplace_back( + static_cast(i), + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_STORE, + true); + } + + // Depth attachments + if (createInfo.pDepthAttachment) + { + auto& attachRef = *createInfo.pDepthAttachment; + + // Canonicalize read-only attachments as storeOp=NONE + VkAttachmentStoreOp depthStoreOp; + switch(attachRef.imageLayout) + { + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: + if (attachRef.storeOp == VK_ATTACHMENT_STORE_OP_STORE) + { + depthStoreOp = VK_ATTACHMENT_STORE_OP_NONE; + } + break; + default: + depthStoreOp = attachRef.storeOp; + break; + } + + attachments.emplace_back( + RenderPassAttachName::DEPTH, + attachRef.loadOp, + depthStoreOp, + false); + + // Depth resolve attachment + if ((attachRef.imageView != VK_NULL_HANDLE) && + (attachRef.resolveMode != VK_RESOLVE_MODE_NONE)) + { + attachments.emplace_back( + RenderPassAttachName::DEPTH, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_STORE, + true); + } + } + + + // Stencil attachment + if (createInfo.pStencilAttachment) + { + auto& attachRef = *createInfo.pStencilAttachment; + + // Canonicalize read-only attachments as storeOp=NONE + VkAttachmentStoreOp stencilStoreOp; + switch(attachRef.imageLayout) + { + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: + if (attachRef.storeOp == VK_ATTACHMENT_STORE_OP_STORE) + { + stencilStoreOp = VK_ATTACHMENT_STORE_OP_NONE; + } + break; + default: + stencilStoreOp = attachRef.storeOp; + break; + } + + attachments.emplace_back( + RenderPassAttachName::STENCIL, + attachRef.loadOp, + stencilStoreOp, + false); + + // Stencil resolve attachment + if ((attachRef.imageView != VK_NULL_HANDLE) && + (attachRef.resolveMode != VK_RESOLVE_MODE_NONE)) + { + attachments.emplace_back( + RenderPassAttachName::STENCIL, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_STORE, + true); + } + } +} + +} \ No newline at end of file diff --git a/source_common/trackers/render_pass.hpp b/source_common/trackers/render_pass.hpp new file mode 100644 index 0000000..fc5044e --- /dev/null +++ b/source_common/trackers/render_pass.hpp @@ -0,0 +1,194 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2022-2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * \file + * The declaration of Vulkan render pass use trackers. + * + * Role summary + * ============ + * + * These trackers are used to monitor the use of a render pass. + */ + +#pragma once + +#include +#include +#include +#include + +namespace Tracker +{ + +/** + * @brief Symbolic names of render pass attachments. + */ +enum class RenderPassAttachName +{ + COLOR0 = 0, + COLOR1 = 1, + COLOR2 = 2, + COLOR3 = 3, + COLOR4 = 4, + COLOR5 = 5, + COLOR6 = 6, + COLOR7 = 7, + DEPTH = 100, + STENCIL = 200 +}; + +/** + * @brief The state tracker for a render pass. + */ +class RenderPassAttachment +{ +public: + RenderPassAttachment( + RenderPassAttachName name, + VkAttachmentLoadOp loadOp, + VkAttachmentStoreOp storeOp, + bool resolve); + + std::string getAttachmentStr() const + { + switch(name) + { + case RenderPassAttachName::COLOR0: + return "C0"; + case RenderPassAttachName::COLOR1: + return "C1"; + case RenderPassAttachName::COLOR2: + return "C2"; + case RenderPassAttachName::COLOR3: + return "C3"; + case RenderPassAttachName::COLOR4: + return "C4"; + case RenderPassAttachName::COLOR5: + return "C5"; + case RenderPassAttachName::COLOR6: + return "C6"; + case RenderPassAttachName::COLOR7: + return "C7"; + case RenderPassAttachName::DEPTH: + return "D"; + case RenderPassAttachName::STENCIL: + return "S"; + default: + assert(false); + } + + return "U"; + } + + bool isLoaded() const + { + return loadOp == VK_ATTACHMENT_LOAD_OP_LOAD; + } + + bool isStored() const + { + return storeOp == VK_ATTACHMENT_STORE_OP_STORE; + } + + bool isResolved() const + { + return resolve; + } + +private: + /** + * @brief The attachment point name. + */ + RenderPassAttachName name; + + /** + * @brief The attachment load operation. + */ + VkAttachmentLoadOp loadOp; + + /** + * @brief The attachment store operation. + */ + VkAttachmentStoreOp storeOp; + + /** + * @brief Is this attachment a resolve attachment? + */ + bool resolve; +}; + +/** + * @brief The state tracker for a render pass. + */ +class RenderPass +{ +public: + RenderPass( + VkRenderPass handle, + const VkRenderPassCreateInfo& createInfo); + + RenderPass( + VkRenderPass handle, + const VkRenderPassCreateInfo2& createInfo); + + RenderPass( + const VkRenderingInfo& createInfo); + + uint32_t getSubpassCount() const + { + return subpassCount; + }; + + const std::vector& getAttachments() const + { + return attachments; + }; + +private: + /** + * @brief The handle of the native render pass we represent. + */ + VkRenderPass handle; + + /** + * @brief The render pass subpass count. + */ + uint32_t subpassCount { 1 }; + + /** + * @brief The render pass attachments in this render pass. + * + * For render passes that are using multiple sub-passes this stores the + * the output attachments present in the final subpass. + * + * TODO: In future we could store more information here using the subpass + * merging feedback extension to work out how many boxes we're going to + * end up with on the timeline, and store attachments per merged chunk. + */ + std::vector attachments; +}; + +} diff --git a/source_common/trackers/stats.hpp b/source_common/trackers/stats.hpp new file mode 100644 index 0000000..2d297ed --- /dev/null +++ b/source_common/trackers/stats.hpp @@ -0,0 +1,227 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2022-2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * \file + * This module implements basic counter tracking of Vulkan workloads. + */ + +#pragma once + +#include +#include +#include + +namespace Tracker +{ + +/** + * @brief Statistics counters for a single device. + * + * These counters are designed to be used hierarchically, so you can use the + * API to aggregate counters into a parent tracker. Not all instances are + * required to track all statistics. + */ +class Stats +{ +public: + /** + * @brief Increment the frame counter. + */ + void incFrameCount() + { + frameCount += 1; + } + + /** + * @brief Increment the render pass counter. + */ + void incRenderPassCount() + { + renderPassCount += 1; + } + + /** + * @brief Increment the draw counter. + */ + void incDrawCallCount() + { + drawCallCount += 1; + } + + /** + * @brief Increment the compute dispatch counter. + */ + void incDispatchCount() + { + dispatchCount += 1; + } + + /** + * @brief Increment the trace rays counter. + */ + void incTraceRaysCount() + { + traceRaysCount += 1; + }; + + /** + * @brief Increment the buffer transfer counter. + */ + void incBufferTransferCount() + { + bufferTransferCount += 1; + } + + /** + * @brief Increment the image transfer counter. + */ + void incImageTransferCount() + { + imageTransferCount += 1; + } + + /** + * @brief Increment all counters with values from another stats object. + */ + void mergeCounts(const Stats& other) + { + frameCount += other.frameCount; + renderPassCount += other.renderPassCount; + drawCallCount += other.drawCallCount; + dispatchCount = other.dispatchCount; + traceRaysCount = other.traceRaysCount; + bufferTransferCount = other.bufferTransferCount; + imageTransferCount = other.imageTransferCount; + } + + /** + * @brief Reset all counters to zero; + */ + void reset() + { + frameCount = 0; + renderPassCount = 0; + drawCallCount = 0; + dispatchCount = 0; + traceRaysCount = 0; + bufferTransferCount = 0; + imageTransferCount = 0; + } + + /** + * @brief Get the frame counter. + */ + uint64_t getFrameCount() const + { + return frameCount; + } + + /** + * @brief Increment the render pass counter. + */ + uint64_t getRenderPassCount() const + { + return renderPassCount; + } + + /** + * @brief Increment the draw counter. + */ + uint64_t getDrawCallCount() const + { + return drawCallCount; + } + + /** + * @brief Increment the compute dispatch counter. + */ + uint64_t getDispatchCount() const + { + return dispatchCount; + } + + /** + * @brief Increment the trace rays counter. + */ + uint64_t getTraceRaysCount() const + { + return traceRaysCount; + }; + + /** + * @brief Increment the buffer transfer counter. + */ + uint64_t getBufferTransferCount() const + { + return bufferTransferCount; + } + + /** + * @brief Increment the image transfer counter. + */ + uint64_t getImageTransferCount() const + { + return imageTransferCount;; + } + +private: + /** + * @brief The number of frames tracked. + */ + uint64_t frameCount { 0 }; + + /** + * @brief The number of render passes tracked. + */ + uint64_t renderPassCount { 0 }; + + /** + * @brief The number of draw calls tracked. + */ + uint64_t drawCallCount { 0 }; + + /** + * @brief The number of compute dispatches tracked. + */ + uint64_t dispatchCount { 0 }; + + /** + * @brief The number of trace rays calls tracked. + */ + uint64_t traceRaysCount { 0 }; + + /** + * @brief The number of buffer transfers tracked. + */ + uint64_t bufferTransferCount { 0 }; + + /** + * @brief The number of image transfers tracked. + */ + uint64_t imageTransferCount { 0 }; +}; + +} diff --git a/source_common/utils/misc.hpp b/source_common/utils/misc.hpp index fb69b7b..e38e0b7 100644 --- a/source_common/utils/misc.hpp +++ b/source_common/utils/misc.hpp @@ -32,7 +32,10 @@ #include #include +#include +#include #include +#include /** * @brief Macro to stringize a value. @@ -51,7 +54,7 @@ * @param args The variadic values used to populate the template. */ template -std::string fmt_string( +std::string formatString( const std::string& format, Args ... args ) { @@ -69,6 +72,29 @@ std::string fmt_string( return std::string(buf.get(), buf.get() + size - 1); } +/** + * @brief Join a string of parts. + * + * @param parts The list of string parts to join. + * @param separator The delimiter to use when joining the parts. + */ +[[maybe_unused]] static std::string joinString( + const std::vector& parts, + const std::string& separator +) { + std::stringstream out; + for (size_t i = 0; i < parts.size(); i++) + { + out << parts[i]; + if (i != parts.size() - 1) + { + out << separator; + } + } + + return out.str(); +} + /** * @brief Test if an element exists in an iterable container. * @@ -101,22 +127,43 @@ bool isInMap( return cont.find(elem) != cont.end(); } +/** + * @brief Append all values in one vector to the back of another. + * + * @param src The destination vector to append to. + * @param dst The source vector; must not be src vector. + */ +template +void vecAppend( + std::vector& dst, + const std::vector& src +) { + // Perform a resize with some room for growth + size_t newSize = dst.size() + src.size(); + dst.reserve(newSize); + + // Merge secondary into this command buffer + dst.insert(std::end(dst), std::begin(src), std::end(src)); +} + /** * @brief Get a displayable pointer. * - * On 64-bit systems this strips the MTE tag. + * On 64-bit Arm systems this strips the MTE tag in the top byte. * - * @return The displayable pointer + * @return The displayable pointer. */ static inline uintptr_t getDisplayPointer( void* pointer ) { uintptr_t dispPointer = reinterpret_cast(pointer); - if constexpr(sizeof(uintptr_t) == 8) - { - dispPointer &= 0x00FFFFFFFFFFFFFFull; - } + #if defined(__aarch64__) + if constexpr(sizeof(uintptr_t) == 8) + { + dispPointer &= 0x00FFFFFFFFFFFFFFull; + } + #endif return dispPointer; } From daf83f9cf484805543cf7eef3514a0542e47c85d Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Wed, 11 Dec 2024 11:24:44 +0000 Subject: [PATCH 2/8] Remove comms logging --- source_common/comms/comms_module.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/source_common/comms/comms_module.cpp b/source_common/comms/comms_module.cpp index 5ef695e..42c815e 100644 --- a/source_common/comms/comms_module.cpp +++ b/source_common/comms/comms_module.cpp @@ -46,7 +46,6 @@ namespace Comms CommsModule::CommsModule( const std::string& domainAddress ) { - LAYER_LOG("Client UDS socket create"); sockfd = socket(AF_UNIX, SOCK_STREAM, 0); if (sockfd < 0) { @@ -61,7 +60,6 @@ CommsModule::CommsModule( std::strcpy(servAddr.sun_path + 1, domainAddress.c_str()); servAddr.sun_path[0] = '\0'; - LAYER_LOG("Client UDS connect"); int conn = connect( sockfd, reinterpret_cast(&servAddr), @@ -74,13 +72,8 @@ CommsModule::CommsModule( return; } - LAYER_LOG("Client make transmitter"); transmitter = std::make_unique(*this); - - LAYER_LOG("Client make receiver"); receiver = std::make_unique(*this); - - LAYER_LOG("Client make complete"); } /** See header for documentation. */ From d6ef99a390ce6dead492c940cfa85756750be910 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Wed, 11 Dec 2024 11:36:47 +0000 Subject: [PATCH 3/8] Remove debug logging --- .../source/layer_device_functions_render_pass.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp index 3486da3..1a272bb 100644 --- a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp @@ -156,20 +156,16 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(commandBuffer); - LAYER_LOG("A"); auto& tracker = layer->getStateTracker(); auto& cb = tracker.getCommandBuffer(commandBuffer); - LAYER_LOG("B"); auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); uint32_t width = pRenderPassBegin->renderArea.extent.width; uint32_t height = pRenderPassBegin->renderArea.extent.height; // Notify the command buffer we are starting a new render pass - LAYER_LOG("C"); uint64_t tagID = cb.renderPassBegin(rp, width, height); - LAYER_LOG("D"); // Emit the unique workload tag into the command stream std::string tagLabel = formatString("t%" PRIu64, tagID); [[maybe_unused]] VkDebugUtilsLabelEXT tagInfo { @@ -179,11 +175,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass( .color = { 0.0f, 0.0f, 0.0f, 0.0f } }; - LAYER_LOG("E"); // Release the lock to call into the driver lock.unlock(); layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); - LAYER_LOG("F"); layer->driver.vkCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents); } @@ -372,7 +366,6 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass( // Update the layer command stream in the tracker auto& tracker = layer->getStateTracker(); - LAYER_LOG(" - Command buffer: %p", (void*)commandBuffer); auto& cb = tracker.getCommandBuffer(commandBuffer); cb.renderPassEnd(); From 47a71805315e0820084fe57ec4ff2425d2bfabe0 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Wed, 11 Dec 2024 15:01:51 +0000 Subject: [PATCH 4/8] Improve renderpass handling --- layer_gpu_timeline/android_build.sh | 2 +- .../docs/command_buffer_model.md | 2 +- layer_gpu_timeline/source/CMakeLists.txt | 1 + layer_gpu_timeline/source/device.hpp | 8 +++ .../source/layer_device_functions_queue.cpp | 57 ++++++++++++++++++- lgl_host_server.py | 7 ++- lglpy/server.py | 2 +- lglpy/service_gpu_timeline.py | 55 +++++++++++++++++- source_common/trackers/command_buffer.cpp | 4 +- .../trackers/layer_command_stream.cpp | 17 +++++- .../trackers/layer_command_stream.hpp | 23 ++++++-- source_common/trackers/queue.cpp | 29 +++++++++- source_common/trackers/queue.hpp | 11 +++- 13 files changed, 195 insertions(+), 23 deletions(-) diff --git a/layer_gpu_timeline/android_build.sh b/layer_gpu_timeline/android_build.sh index 960b2b0..5bdbff5 100644 --- a/layer_gpu_timeline/android_build.sh +++ b/layer_gpu_timeline/android_build.sh @@ -67,7 +67,7 @@ cmake \ -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \ .. -make -j1 +make -j8 popd diff --git a/layer_gpu_timeline/docs/command_buffer_model.md b/layer_gpu_timeline/docs/command_buffer_model.md index e7422d7..f317b51 100644 --- a/layer_gpu_timeline/docs/command_buffer_model.md +++ b/layer_gpu_timeline/docs/command_buffer_model.md @@ -21,7 +21,7 @@ performed. * Pop the latest marker from the queue debug label stack. -**RENDERPASS_BEGIN(const json\*):** +**RENDER_PASS(const json\*):** * Set the current workload to a new render pass with the passed metadata. diff --git a/layer_gpu_timeline/source/CMakeLists.txt b/layer_gpu_timeline/source/CMakeLists.txt index 45e55c3..ca4fc25 100644 --- a/layer_gpu_timeline/source/CMakeLists.txt +++ b/layer_gpu_timeline/source/CMakeLists.txt @@ -57,6 +57,7 @@ add_library( target_include_directories( ${VK_LAYER} PRIVATE ${PROJECT_SOURCE_DIR}/../source_common + ${PROJECT_SOURCE_DIR}/../source_third_party ${CMAKE_CURRENT_BINARY_DIR} .) diff --git a/layer_gpu_timeline/source/device.hpp b/layer_gpu_timeline/source/device.hpp index b04ace1..2d5460d 100644 --- a/layer_gpu_timeline/source/device.hpp +++ b/layer_gpu_timeline/source/device.hpp @@ -130,6 +130,14 @@ class Device */ ~Device(); + /** + * @brief Callback for sending messages + */ + void onFrame(const std::string& message) + { + commsWrapper->txMessage(message); + } + /** * @brief Callback for sending messages */ diff --git a/layer_gpu_timeline/source/layer_device_functions_queue.cpp b/layer_gpu_timeline/source/layer_device_functions_queue.cpp index 906a39e..30ca611 100644 --- a/layer_gpu_timeline/source/layer_device_functions_queue.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_queue.cpp @@ -27,9 +27,15 @@ #include #include +#include "utils/misc.hpp" +#include "nlohmann/json.hpp" + #include "device.hpp" #include "layer_device_functions.hpp" -#include "utils/misc.hpp" + +using json = nlohmann::json; + +using namespace std::placeholders; extern std::mutex g_vulkanLock; @@ -48,6 +54,15 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR( auto& tracker = layer->getStateTracker(); tracker.queuePresent(); + // This is run with the lock held to ensure that all queue submit + // messages are sent sequentially to the host tool + json frame { + { "type", "frame" }, + { "fid", tracker.totalStats.getFrameCount() } + }; + + layer->onFrame(frame.dump()); + // Release the lock to call into the driver lock.unlock(); return layer->driver.vkQueuePresentKHR(queue, pPresentInfo); @@ -67,12 +82,13 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(queue); - using namespace std::placeholders; auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1); auto& tracker = layer->getStateTracker(); auto& trackQueue = tracker.getQueue(queue); + // This is run with the lock held to ensure that all queue submit + // messages are sent sequentially to the host tool for (uint32_t i = 0; i < submitCount; i++) { const auto& submit = pSubmits[i]; @@ -80,7 +96,6 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( { auto& trackCB = tracker.getCommandBuffer(submit.pCommandBuffers[j]); const auto& LCS = trackCB.getSubmitCommandStream(); - trackQueue.runSubmitCommandStream(LCS, onSubmit); } } @@ -104,6 +119,24 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(queue); + auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1); + + auto& tracker = layer->getStateTracker(); + auto& trackQueue = tracker.getQueue(queue); + + // This is run with the lock held to ensure that all queue submit + // messages are sent sequentially to the host tool + for (uint32_t i = 0; i < submitCount; i++) + { + const auto& submit = pSubmits[i]; + for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++) + { + auto& trackCB = tracker.getCommandBuffer(submit.pCommandBufferInfos[j].commandBuffer); + const auto& LCS = trackCB.getSubmitCommandStream(); + trackQueue.runSubmitCommandStream(LCS, onSubmit); + } + } + // Release the lock to call into the driver lock.unlock(); return layer->driver.vkQueueSubmit2(queue, submitCount, pSubmits, fence); @@ -123,6 +156,24 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(queue); + auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1); + + auto& tracker = layer->getStateTracker(); + auto& trackQueue = tracker.getQueue(queue); + + // This is run with the lock held to ensure that all queue submit + // messages are sent sequentially to the host tool + for (uint32_t i = 0; i < submitCount; i++) + { + const auto& submit = pSubmits[i]; + for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++) + { + auto& trackCB = tracker.getCommandBuffer(submit.pCommandBufferInfos[j].commandBuffer); + const auto& LCS = trackCB.getSubmitCommandStream(); + trackQueue.runSubmitCommandStream(LCS, onSubmit); + } + } + // Release the lock to call into the driver lock.unlock(); return layer->driver.vkQueueSubmit2KHR(queue, submitCount, pSubmits, fence); diff --git a/lgl_host_server.py b/lgl_host_server.py index 14d5941..3893fec 100644 --- a/lgl_host_server.py +++ b/lgl_host_server.py @@ -57,14 +57,15 @@ def main(): print() # Start it running - serverThread = threading.Thread(target=server.run) + serverThread = threading.Thread(target=server.run, daemon=True) serverThread.start() # Press to exit try: - input("Press any key to exit ...") + input("Press any key to exit ...\n\n") except KeyboardInterrupt: - server.stop() + print("Exiting ...") + sys.exit(0) return 0 diff --git a/lglpy/server.py b/lglpy/server.py index e10c58c..127e708 100644 --- a/lglpy/server.py +++ b/lglpy/server.py @@ -123,7 +123,7 @@ def run(self): # Accept connections from outside while not self.shutdown: - print('Waiting for connection') + print('Waiting for client connection') try: sockfd, _ = listen_sockfd.accept() except OSError: diff --git a/lglpy/service_gpu_timeline.py b/lglpy/service_gpu_timeline.py index 69ac031..1c707f2 100644 --- a/lglpy/service_gpu_timeline.py +++ b/lglpy/service_gpu_timeline.py @@ -25,18 +25,69 @@ # implements a basic message endpoint for testing. from lglpy.server import Message +import json +import struct class GPUTimelineService: def __init__(self): - pass + self.frame = { + "frame": 0, + "workloads": [ + + ] + } + + # TODO: Make file name configurable + self.fileHandle = open('malivision.gputl', 'wb') def get_service_name(self) -> str: return 'GPUTimeline' + def handle_frame(self, msg): + print(json.dumps(self.frame, indent=4)) + + # Write frame packet to the file + lastFrame = json.dumps(self.frame).encode('utf-8') + length = struct.pack('size()) + { + metadata["label"] = *debugLabel; + } + // Default is 1, so only store if we need it if (subpassCount != 1) { @@ -126,6 +132,7 @@ std::string LCSRenderPass::getBeginMetadata( /* See header for details. */ std::string LCSRenderPass::getContinuationMetadata( + const std::string* debugLabel, uint64_t tagIDContinuation, uint64_t submitID) const { @@ -135,6 +142,11 @@ std::string LCSRenderPass::getContinuationMetadata( { "drawCallCount", drawCallCount } }; + if (debugLabel && debugLabel->size()) + { + metadata["label"] = *debugLabel; + } + if (submitID != 0) { metadata["sid"] = submitID; @@ -145,17 +157,18 @@ std::string LCSRenderPass::getContinuationMetadata( /* See header for details. */ std::string LCSRenderPass::getMetadata( + const std::string* debugLabel, uint64_t tagIDContinuation, uint64_t submitID) const { if (tagID) { assert(tagIDContinuation == 0); - return getBeginMetadata(submitID); + return getBeginMetadata(debugLabel, submitID); } assert(tagIDContinuation != 0); - return getContinuationMetadata(tagIDContinuation, submitID); + return getContinuationMetadata(debugLabel, tagIDContinuation, submitID); } } diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp index a144683..9f3635d 100644 --- a/source_common/trackers/layer_command_stream.hpp +++ b/source_common/trackers/layer_command_stream.hpp @@ -65,8 +65,7 @@ enum class LCSOpcode { MARKER_BEGIN, MARKER_END, - RENDERPASS_BEGIN, - RENDERPASS_END, // TODO: Does this need to be an opcode? + RENDER_PASS, DISPATCH, TRACE_RAYS, BUFFER_TRANSFER, @@ -85,15 +84,26 @@ class LCSWorkload virtual ~LCSWorkload() = default; virtual std::string getMetadata( + const std::string* debugLabel=nullptr, uint64_t tagIDContinuation=0, uint64_t submitID=0) const = 0; + /** + * @brief Get this workloads tagID. + * + * @return The assigned ID. + */ + uint64_t getTagID() const + { + return tagID; + } + /** * @brief Get a unique tagID to label a workload in a command buffer. * * @return The assigned ID. */ - static uint64_t getTagID() + static uint64_t assignTagID() { return nextTagID.fetch_add(1, std::memory_order_relaxed); } @@ -139,15 +149,18 @@ class LCSRenderPass : public LCSWorkload }; virtual std::string getMetadata( + const std::string* debugLabel=nullptr, uint64_t tagIDContinuation=0, uint64_t submitID=0) const; private: std::string getBeginMetadata( + const std::string* debugLabel=nullptr, uint64_t submitID=0) const; std::string getContinuationMetadata( - uint64_t tagIDContinuation, + const std::string* debugLabel=nullptr, + uint64_t tagIDContinuation=0, uint64_t submitID=0) const; uint32_t width; @@ -175,9 +188,11 @@ class LCSMarker : public LCSWorkload virtual ~LCSMarker() = default; virtual std::string getMetadata( + const std::string* debugLabel=nullptr, uint64_t tagIDContinuation=0, uint64_t submitID=0) const { + UNUSED(debugLabel); UNUSED(tagIDContinuation); UNUSED(submitID); return label; diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp index cfd6590..358d71a 100644 --- a/source_common/trackers/queue.cpp +++ b/source_common/trackers/queue.cpp @@ -55,11 +55,36 @@ void Queue::runSubmitCommandStream( { debugStack.pop_back(); } - else if (opCode == LCSOpcode::RENDERPASS_BEGIN) + else if (opCode == LCSOpcode::RENDER_PASS) { auto* workload = dynamic_cast(opData); - callback(workload->getMetadata()); + uint64_t tagID = workload->getTagID(); + + // Build the debug info std::string log = joinString(debugStack, "|"); + + // Workload is a new render pass + if (tagID > 0) + { + assert(lastRenderPassTagID == 0); + callback(workload->getMetadata(&log)); + + lastRenderPassTagID = 0; + if (workload->isSuspending()) + { + lastRenderPassTagID = tagID; + } + } + // Workload is a continuation + else + { + assert(lastRenderPassTagID != 0); + callback(workload->getMetadata(nullptr, lastRenderPassTagID)); + if (!workload->isSuspending()) + { + lastRenderPassTagID = 0; + } + } } } } diff --git a/source_common/trackers/queue.hpp b/source_common/trackers/queue.hpp index 26fc416..ff62b87 100644 --- a/source_common/trackers/queue.hpp +++ b/source_common/trackers/queue.hpp @@ -74,7 +74,7 @@ class Queue * * @return The assigned ID. */ - static uint64_t getSubmitID() + static uint64_t assignSubmitID() { return nextSubmitID.fetch_add(1, std::memory_order_relaxed); } @@ -86,14 +86,21 @@ class Queue VkQueue handle; /** - * @brief The stack of debug labels in the tool. + * @brief The stack of user debug labels for this queue. */ std::vector debugStack; + /** + * @brief The last non-zero renderpass tagID submitted. + */ + uint64_t lastRenderPassTagID { 0 }; + /** * @brief The command buffer submitID allocator. */ static std::atomic nextSubmitID; + + }; } From 829a39f09c8bb884f397b5e237771a1197ecee69 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 12 Dec 2024 13:19:55 +0000 Subject: [PATCH 5/8] Add compute --- .../layer_device_functions_command_buffer.cpp | 1 + .../layer_device_functions_dispatch.cpp | 80 ++++++++++++++++--- .../layer_device_functions_render_pass.cpp | 2 +- lglpy/service_gpu_timeline.py | 23 ++++-- source_common/trackers/command_buffer.cpp | 51 ++++++++---- source_common/trackers/command_buffer.hpp | 27 +++++++ .../trackers/layer_command_stream.cpp | 64 +++++++++++++-- .../trackers/layer_command_stream.hpp | 30 ++++++- source_common/trackers/queue.cpp | 6 ++ source_common/trackers/render_pass.cpp | 1 - 10 files changed, 246 insertions(+), 39 deletions(-) diff --git a/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp b/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp index 75fb36b..f0cc338 100644 --- a/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp @@ -79,6 +79,7 @@ VKAPI_ATTR VkResult layer_vkBeginCommandBuffer( auto& tracker = layer->getStateTracker(); auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer); cmdBuffer.reset(); + cmdBuffer.begin(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); // Release the lock to call into the driver lock.unlock(); diff --git a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp index 88bf568..0e3774f 100644 --- a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp @@ -32,13 +32,16 @@ extern std::mutex g_vulkanLock; -static void registerDispatch( +static uint64_t registerDispatch( Device* layer, - VkCommandBuffer commandBuffer + VkCommandBuffer commandBuffer, + int64_t groupX, + int64_t groupY, + int64_t groupZ ) { - auto& state = layer->getStateTracker(); - auto& stats = state.getCommandBuffer(commandBuffer).getStats(); - stats.incDispatchCount(); + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.dispatch(groupX, groupY, groupZ); } /* See Vulkan API for documentation. */ @@ -55,11 +58,27 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(commandBuffer); - registerDispatch(layer, commandBuffer); + uint64_t tagID = registerDispatch( + layer, + commandBuffer, + static_cast(groupCountX), + static_cast(groupCountY), + static_cast(groupCountZ)); + + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; // Release the lock to call into the driver lock.unlock(); + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); layer->driver.vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); } /* See Vulkan API for documentation. */ @@ -79,11 +98,27 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(commandBuffer); - registerDispatch(layer, commandBuffer); + uint64_t tagID = registerDispatch( + layer, + commandBuffer, + static_cast(groupCountX), + static_cast(groupCountY), + static_cast(groupCountZ)); + + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; // Release the lock to call into the driver lock.unlock(); + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); layer->driver.vkCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); } /* See Vulkan API for documentation. */ @@ -103,11 +138,27 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(commandBuffer); - registerDispatch(layer, commandBuffer); + uint64_t tagID = registerDispatch( + layer, + commandBuffer, + static_cast(groupCountX), + static_cast(groupCountY), + static_cast(groupCountZ)); + + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; // Release the lock to call into the driver lock.unlock(); + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); layer->driver.vkCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); } /* See Vulkan API for documentation. */ @@ -123,9 +174,20 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(commandBuffer); - registerDispatch(layer, commandBuffer); + uint64_t tagID = registerDispatch(layer, commandBuffer, -1, -1, -1); + + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; // Release the lock to call into the driver lock.unlock(); + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); layer->driver.vkCmdDispatchIndirect(commandBuffer, buffer, offset); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); } diff --git a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp index 1a272bb..b59f305 100644 --- a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp @@ -168,7 +168,7 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass( // Emit the unique workload tag into the command stream std::string tagLabel = formatString("t%" PRIu64, tagID); - [[maybe_unused]] VkDebugUtilsLabelEXT tagInfo { + VkDebugUtilsLabelEXT tagInfo { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, .pNext = nullptr, .pLabelName = tagLabel.c_str(), diff --git a/lglpy/service_gpu_timeline.py b/lglpy/service_gpu_timeline.py index 1c707f2..0f619ae 100644 --- a/lglpy/service_gpu_timeline.py +++ b/lglpy/service_gpu_timeline.py @@ -33,9 +33,7 @@ class GPUTimelineService: def __init__(self): self.frame = { "frame": 0, - "workloads": [ - - ] + "workloads": [] } # TODO: Make file name configurable @@ -45,8 +43,6 @@ def get_service_name(self) -> str: return 'GPUTimeline' def handle_frame(self, msg): - print(json.dumps(self.frame, indent=4)) - # Write frame packet to the file lastFrame = json.dumps(self.frame).encode('utf-8') length = struct.pack('( - tagID, renderPass, width, height, suspending); + tagID, renderPass, width, height, suspending, oneTimeSubmit); currentRenderPass = workload; workloads.push_back(workload); @@ -111,9 +114,7 @@ uint64_t CommandBuffer::renderPassBegin( return tagID; } -/** - * @brief End a user render pass. - */ +/* See header for documentation. */ bool CommandBuffer::renderPassEnd() { assert(currentRenderPass); @@ -130,6 +131,28 @@ bool CommandBuffer::renderPassEnd() return suspending; } +/* See header for documentation. */ +uint64_t CommandBuffer::dispatch( + int64_t xGroups, + int64_t yGroups, + int64_t zGroups +) { + LAYER_LOG("Creating LCSDispatch workload"); + uint64_t tagID = Tracker::LCSWorkload::assignTagID(); + stats.incDispatchCount(); + + // Add a workload to the render pass + auto workload = std::make_shared( + tagID, xGroups, yGroups, zGroups); + workloads.push_back(workload); + + // Add a command to the layer-side command stream + auto instr = std::make_pair(LCSOpcode::DISPATCH, workload); + workloadCommandStream.push_back(instr); + + return tagID; +} + /* See header for documentation. */ void CommandBuffer::executeCommands( CommandBuffer& secondary @@ -142,7 +165,7 @@ void CommandBuffer::executeCommands( vecAppend(workloadCommandStream, secondary.workloadCommandStream); } - +/* See header for documentation. */ CommandPool::CommandPool( VkCommandPool _handle) : handle(_handle) diff --git a/source_common/trackers/command_buffer.hpp b/source_common/trackers/command_buffer.hpp index fcf5c34..c4b4828 100644 --- a/source_common/trackers/command_buffer.hpp +++ b/source_common/trackers/command_buffer.hpp @@ -114,6 +114,20 @@ class CommandBuffer */ bool renderPassEnd(); + /** + * @brief Capture a compute dispatch. + * + * @param xGroups Number of groups in X dimension, or -1 if unknown. + * @param yGroups Number of groups in Y dimension, or -1 if unknown. + * @param zGroups Number of groups in Z dimension, or -1 if unknown. + * + * @return Returns the tagID assigned to this workload. + */ + uint64_t dispatch( + int64_t xGroups, + int64_t yGroups, + int64_t zGroups); + /** * @brief Begin a user debug marker range. */ @@ -136,12 +150,25 @@ class CommandBuffer */ void reset(); + /** + * @brief Begin recording back into the @a Recording state. + * + * @param oneTimeSubmit Is this a one-time submit recording. + */ + void begin( + bool oneTimeSubmit); + private: /** * @brief The Vulkan API handle of this command buffer. */ const VkCommandBuffer handle; + /** + * @brief Is this command buffer recording one-time-submit? + */ + bool oneTimeSubmit { false }; + /** * @brief The command buffer draw count at the start of the render pass. */ diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp index a3dfccd..2b29d07 100644 --- a/source_common/trackers/layer_command_stream.cpp +++ b/source_common/trackers/layer_command_stream.cpp @@ -58,11 +58,13 @@ LCSRenderPass::LCSRenderPass( const RenderPass& renderPass, uint32_t _width, uint32_t _height, - bool _suspending) : + bool _suspending, + bool _oneTimeSubmit) : LCSWorkload(_tagID), width(_width), height(_height), - suspending(_suspending) + suspending(_suspending), + oneTimeSubmit(_oneTimeSubmit) { // Copy these as the renderpass object may be transient. subpassCount = renderPass.getSubpassCount(); @@ -74,12 +76,22 @@ std::string LCSRenderPass::getBeginMetadata( const std::string* debugLabel, uint64_t submitID) const { + // Draw count for a multi-submit command buffer cannot be reliably + // associated with a single tagID if restartable across command buffer + // boundaries because different command buffer submit combinations can + // result in different draw counts for the same starting tagID. + int64_t drawCount = static_cast(drawCallCount); + if (!oneTimeSubmit && suspending) + { + drawCount = -1; + } + json metadata = { { "type", "renderpass" }, { "tid", tagID }, { "width", width }, { "height", height }, - { "drawCallCount", drawCallCount } + { "drawCallCount", drawCount } }; if (submitID != 0) @@ -105,19 +117,19 @@ std::string LCSRenderPass::getBeginMetadata( { "binding", attachment.getAttachmentStr() }, }; - // Default is false, so only store if we need it + // Default is false, so only serialize if we need it if (attachment.isLoaded()) { attachPoint["load"] = true; } - // Default is true, so only store if we need it + // Default is true, so only serialize if we need it if (!attachment.isStored()) { attachPoint["store"] = false; } - // Default is false, so only store if we need it + // Default is false, so only serialize if we need it if (attachment.isResolved()) { attachPoint["resolve"] = true; @@ -171,4 +183,44 @@ std::string LCSRenderPass::getMetadata( return getContinuationMetadata(debugLabel, tagIDContinuation, submitID); } +/* See header for details. */ +LCSDispatch::LCSDispatch( + uint64_t _tagID, + int64_t _xGroups, + int64_t _yGroups, + int64_t _zGroups) : + LCSWorkload(_tagID), + xGroups(_xGroups), + yGroups(_yGroups), + zGroups(_zGroups) +{ + +} + +/* See header for details. */ +std::string LCSDispatch::getMetadata( + const std::string* debugLabel, + uint64_t tagIDContinuation, + uint64_t submitID +) const { + UNUSED(tagIDContinuation); + UNUSED(submitID); + + json metadata = { + { "type", "dispatch" }, + { "tid", tagID }, + { "xGroups", xGroups }, + { "yGroups", yGroups }, + { "zGroups", zGroups } + }; + + if (debugLabel && debugLabel->size()) + { + metadata["label"] = *debugLabel; + } + + return metadata.dump(); +} + + } diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp index 9f3635d..246558d 100644 --- a/source_common/trackers/layer_command_stream.hpp +++ b/source_common/trackers/layer_command_stream.hpp @@ -134,7 +134,8 @@ class LCSRenderPass : public LCSWorkload const RenderPass& renderPass, uint32_t width, uint32_t height, - bool suspending); + bool suspending, + bool oneTimeSubmit); virtual ~LCSRenderPass() = default; @@ -169,6 +170,8 @@ class LCSRenderPass : public LCSWorkload bool suspending; + bool oneTimeSubmit; + uint32_t subpassCount; uint64_t drawCallCount { 0 }; @@ -176,6 +179,31 @@ class LCSRenderPass : public LCSWorkload std::vector attachments; }; +/** + * @brief Baseclass representing a GPU workload in the command stream. + */ +class LCSDispatch : public LCSWorkload +{ +public: + LCSDispatch( + uint64_t tagID, + int64_t xGroups, + int64_t yGroups, + int64_t zGroups); + + virtual ~LCSDispatch() = default; + + virtual std::string getMetadata( + const std::string* debugLabel=nullptr, + uint64_t tagIDContinuation=0, + uint64_t submitID=0) const; + +private: + int64_t xGroups; + int64_t yGroups; + int64_t zGroups; +}; + /** * @brief Baseclass representing a GPU workload in the command stream. */ diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp index 358d71a..756ea07 100644 --- a/source_common/trackers/queue.cpp +++ b/source_common/trackers/queue.cpp @@ -86,6 +86,12 @@ void Queue::runSubmitCommandStream( } } } + else if (opCode == LCSOpcode::DISPATCH) + { + uint64_t tagID = opData->getTagID(); + std::string log = joinString(debugStack, "|"); + callback(opData->getMetadata(&log, tagID)); + } } } diff --git a/source_common/trackers/render_pass.cpp b/source_common/trackers/render_pass.cpp index 3560612..181cf53 100644 --- a/source_common/trackers/render_pass.cpp +++ b/source_common/trackers/render_pass.cpp @@ -341,7 +341,6 @@ RenderPass::RenderPass( } } - // Stencil attachment if (createInfo.pStencilAttachment) { From 9446e54e936f363c5867c8a74363d4bb913e0e09 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 12 Dec 2024 19:59:33 +0000 Subject: [PATCH 6/8] Add transfers --- .../source/layer_device_functions.hpp | 2 - layer_gpu_timeline/source/CMakeLists.txt | 1 + .../source/layer_device_functions.hpp | 120 +++++ .../layer_device_functions_dispatch.cpp | 61 +-- .../layer_device_functions_trace_rays.cpp | 40 +- .../layer_device_functions_transfer.cpp | 476 ++++++++++++++++++ lglpy/service_gpu_timeline.py | 7 +- source_common/trackers/command_buffer.cpp | 62 ++- source_common/trackers/command_buffer.hpp | 38 ++ .../trackers/layer_command_stream.cpp | 110 ++++ .../trackers/layer_command_stream.hpp | 71 +++ source_common/trackers/queue.cpp | 5 +- 12 files changed, 937 insertions(+), 56 deletions(-) create mode 100644 layer_gpu_timeline/source/layer_device_functions_transfer.cpp diff --git a/layer_example/source/layer_device_functions.hpp b/layer_example/source/layer_device_functions.hpp index f3403c7..df321c2 100644 --- a/layer_example/source/layer_device_functions.hpp +++ b/layer_example/source/layer_device_functions.hpp @@ -23,8 +23,6 @@ * ---------------------------------------------------------------------------- */ -#include - #include "framework/utils.hpp" /* See Vulkan API for documentation. */ diff --git a/layer_gpu_timeline/source/CMakeLists.txt b/layer_gpu_timeline/source/CMakeLists.txt index ca4fc25..b8212d1 100644 --- a/layer_gpu_timeline/source/CMakeLists.txt +++ b/layer_gpu_timeline/source/CMakeLists.txt @@ -52,6 +52,7 @@ add_library( layer_device_functions_queue.cpp layer_device_functions_render_pass.cpp layer_device_functions_trace_rays.cpp + layer_device_functions_transfer.cpp timeline_comms.cpp) target_include_directories( diff --git a/layer_gpu_timeline/source/layer_device_functions.hpp b/layer_gpu_timeline/source/layer_device_functions.hpp index 129f1e6..3806398 100644 --- a/layer_gpu_timeline/source/layer_device_functions.hpp +++ b/layer_gpu_timeline/source/layer_device_functions.hpp @@ -331,6 +331,126 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR( uint32_t height, uint32_t depth); + +// Commands for transfers + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + uint32_t data); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2( + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2( + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); + // Functions for debug /* See Vulkan API for documentation. */ diff --git a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp index 0e3774f..7555501 100644 --- a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp @@ -44,6 +44,23 @@ static uint64_t registerDispatch( return cb.dispatch(groupX, groupY, groupZ); } +static void emitStartTag( + Device* layer, + VkCommandBuffer commandBuffer, + uint64_t tagID +) { + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; + + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); +} + /* See Vulkan API for documentation. */ template <> VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch( @@ -65,18 +82,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch( static_cast(groupCountY), static_cast(groupCountZ)); - // Emit the unique workload tag into the command stream - std::string tagLabel = formatString("t%" PRIu64, tagID); - VkDebugUtilsLabelEXT tagInfo { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = tagLabel.c_str(), - .color = { 0.0f, 0.0f, 0.0f, 0.0f } - }; - // Release the lock to call into the driver lock.unlock(); - layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + emitStartTag(layer, commandBuffer, tagID); layer->driver.vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); } @@ -105,18 +113,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase( static_cast(groupCountY), static_cast(groupCountZ)); - // Emit the unique workload tag into the command stream - std::string tagLabel = formatString("t%" PRIu64, tagID); - VkDebugUtilsLabelEXT tagInfo { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = tagLabel.c_str(), - .color = { 0.0f, 0.0f, 0.0f, 0.0f } - }; - // Release the lock to call into the driver lock.unlock(); - layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + emitStartTag(layer, commandBuffer, tagID); layer->driver.vkCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); } @@ -145,18 +144,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR( static_cast(groupCountY), static_cast(groupCountZ)); - // Emit the unique workload tag into the command stream - std::string tagLabel = formatString("t%" PRIu64, tagID); - VkDebugUtilsLabelEXT tagInfo { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = tagLabel.c_str(), - .color = { 0.0f, 0.0f, 0.0f, 0.0f } - }; - // Release the lock to call into the driver lock.unlock(); - layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + emitStartTag(layer, commandBuffer, tagID); layer->driver.vkCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); } @@ -176,18 +166,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect( uint64_t tagID = registerDispatch(layer, commandBuffer, -1, -1, -1); - // Emit the unique workload tag into the command stream - std::string tagLabel = formatString("t%" PRIu64, tagID); - VkDebugUtilsLabelEXT tagInfo { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = tagLabel.c_str(), - .color = { 0.0f, 0.0f, 0.0f, 0.0f } - }; - // Release the lock to call into the driver lock.unlock(); - layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + emitStartTag(layer, commandBuffer, tagID); layer->driver.vkCmdDispatchIndirect(commandBuffer, buffer, offset); layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); } diff --git a/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp index 5373747..2d99a3b 100644 --- a/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp @@ -32,13 +32,33 @@ extern std::mutex g_vulkanLock; -static void registerTraceRays( +static uint64_t registerTraceRays( Device* layer, - VkCommandBuffer commandBuffer + VkCommandBuffer commandBuffer, + int64_t itemsX, + int64_t itemsY, + int64_t itemsZ +) { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.traceRays(itemsX, itemsY, itemsZ); +} + +static void emitStartTag( + Device* layer, + VkCommandBuffer commandBuffer, + uint64_t tagID ) { - auto& state = layer->getStateTracker(); - auto& stats = state.getCommandBuffer(commandBuffer).getStats(); - stats.incTraceRaysCount(); + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; + + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); } /* See Vulkan API for documentation. */ @@ -53,11 +73,13 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(commandBuffer); - registerTraceRays(layer, commandBuffer); + uint64_t tagID = registerTraceRays(layer, commandBuffer, -1, -1, -1); // Release the lock to call into the driver lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); layer->driver.vkCmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); } /* See Vulkan API for documentation. */ @@ -76,10 +98,11 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(commandBuffer); - registerTraceRays(layer, commandBuffer); + uint64_t tagID = registerTraceRays(layer, commandBuffer, -1, -1, -1); // Release the lock to call into the driver lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); layer->driver.vkCmdTraceRaysIndirectKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress); } @@ -101,9 +124,10 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(commandBuffer); - registerTraceRays(layer, commandBuffer); + uint64_t tagID = registerTraceRays(layer, commandBuffer, width, height, depth); // Release the lock to call into the driver lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); layer->driver.vkCmdTraceRaysKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth); } \ No newline at end of file diff --git a/layer_gpu_timeline/source/layer_device_functions_transfer.cpp b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp new file mode 100644 index 0000000..066c23f --- /dev/null +++ b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp @@ -0,0 +1,476 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "device.hpp" +#include "layer_device_functions.hpp" + +extern std::mutex g_vulkanLock; + +static uint64_t registerBufferTransfer( + Device* layer, + VkCommandBuffer commandBuffer, + const std::string& transferType, + int64_t byteCount +) { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.bufferTransfer(transferType, byteCount); +} + +static uint64_t registerImageTransfer( + Device* layer, + VkCommandBuffer commandBuffer, + const std::string& transferType, + int64_t pixelCount +) { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.imageTransfer(transferType, pixelCount); +} + +static void emitStartTag( + Device* layer, + VkCommandBuffer commandBuffer, + uint64_t tagID +) { + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; + + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); +} + +// Commands for transfers + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + uint32_t data +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Fill buffer", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdFillBuffer(commandBuffer, dstBuffer, dstOffset, size, data); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Clear image", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdClearColorImage(commandBuffer, image, imageLayout, pColor, rangeCount, pRanges); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Clear image", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdClearDepthStencilImage(commandBuffer, image, imageLayout, pDepthStencil, rangeCount, pRanges); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Copy buffer", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, regionCount, pRegions); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2( + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Copy buffer", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBuffer2(commandBuffer, pCopyBufferInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Copy buffer", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBuffer2KHR(commandBuffer, pCopyBufferInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBufferToImage(commandBuffer, srcBuffer, dstImage, dstImageLayout, regionCount, pRegions); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBufferToImage2(commandBuffer, pCopyBufferToImageInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyBufferToImage2KHR(commandBuffer, pCopyBufferToImageInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImage(commandBuffer, srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2( + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImage2(commandBuffer, pCopyImageInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerImageTransfer( + layer, + commandBuffer, + "Copy image", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImage2KHR(commandBuffer, pCopyImageInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Copy buffer", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImageToBuffer(commandBuffer, srcImage, srcImageLayout, dstBuffer, regionCount, pRegions); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Copy buffer", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImageToBuffer2(commandBuffer, pCopyImageToBufferInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR( + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerBufferTransfer( + layer, + commandBuffer, + "Copy buffer", + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyImageToBuffer2KHR(commandBuffer, pCopyImageToBufferInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} diff --git a/lglpy/service_gpu_timeline.py b/lglpy/service_gpu_timeline.py index 0f619ae..9ead818 100644 --- a/lglpy/service_gpu_timeline.py +++ b/lglpy/service_gpu_timeline.py @@ -77,8 +77,7 @@ def handle_renderpass(self, msg): else: self.frame['workloads'].append(msg) - def handle_dispatch(self, msg): - # Find the last workload + def handle_generic(self, msg): self.frame['workloads'].append(msg) def handle_message(self, message: Message): @@ -93,8 +92,8 @@ def handle_message(self, message: Message): elif payloadType == 'renderpass': self.handle_renderpass(parsedPayload) - elif payloadType == 'dispatch': - self.handle_dispatch(parsedPayload) + elif payloadType in ('dispatch', 'tracerays', 'imagetransfer', 'buffertransfer'): + self.handle_generic(parsedPayload) else: assert False, f'Unknown payload type {payloadType}' diff --git a/source_common/trackers/command_buffer.cpp b/source_common/trackers/command_buffer.cpp index ef47730..b1b5d3c 100644 --- a/source_common/trackers/command_buffer.cpp +++ b/source_common/trackers/command_buffer.cpp @@ -137,7 +137,6 @@ uint64_t CommandBuffer::dispatch( int64_t yGroups, int64_t zGroups ) { - LAYER_LOG("Creating LCSDispatch workload"); uint64_t tagID = Tracker::LCSWorkload::assignTagID(); stats.incDispatchCount(); @@ -153,6 +152,67 @@ uint64_t CommandBuffer::dispatch( return tagID; } +/* See header for documentation. */ +uint64_t CommandBuffer::traceRays( + int64_t xItems, + int64_t yItems, + int64_t zItems +) { + uint64_t tagID = Tracker::LCSWorkload::assignTagID(); + stats.incTraceRaysCount(); + + // Add a workload to the render pass + auto workload = std::make_shared( + tagID, xItems, yItems, zItems); + workloads.push_back(workload); + + // Add a command to the layer-side command stream + auto instr = std::make_pair(LCSOpcode::TRACE_RAYS, workload); + workloadCommandStream.push_back(instr); + + return tagID; +} + +/* See header for documentation. */ +uint64_t CommandBuffer::imageTransfer( + const std::string& transferType, + int64_t pixelCount +) { + uint64_t tagID = Tracker::LCSWorkload::assignTagID(); + stats.incImageTransferCount(); + + // Add a workload to the render pass + auto workload = std::make_shared( + tagID, transferType, pixelCount); + workloads.push_back(workload); + + // Add a command to the layer-side command stream + auto instr = std::make_pair(LCSOpcode::IMAGE_TRANSFER, workload); + workloadCommandStream.push_back(instr); + + return tagID; +} + +/* See header for documentation. */ +uint64_t CommandBuffer::bufferTransfer( + const std::string& transferType, + int64_t byteCount +) { + uint64_t tagID = Tracker::LCSWorkload::assignTagID(); + stats.incBufferTransferCount(); + + // Add a workload to the render pass + auto workload = std::make_shared( + tagID, transferType, byteCount); + workloads.push_back(workload); + + // Add a command to the layer-side command stream + auto instr = std::make_pair(LCSOpcode::BUFFER_TRANSFER, workload); + workloadCommandStream.push_back(instr); + + return tagID; +} + /* See header for documentation. */ void CommandBuffer::executeCommands( CommandBuffer& secondary diff --git a/source_common/trackers/command_buffer.hpp b/source_common/trackers/command_buffer.hpp index c4b4828..ea474c0 100644 --- a/source_common/trackers/command_buffer.hpp +++ b/source_common/trackers/command_buffer.hpp @@ -128,6 +128,44 @@ class CommandBuffer int64_t yGroups, int64_t zGroups); + /** + * @brief Capture a trace rays dispatch. + * + * @param xItems Number of work items in X dimension, or -1 if unknown. + * @param yItems Number of work items in Y dimension, or -1 if unknown. + * @param zItems Number of work items in Z dimension, or -1 if unknown. + * + * @return Returns the tagID assigned to this workload. + */ + uint64_t traceRays( + int64_t xItems, + int64_t yItems, + int64_t zItems); + + /** + * @brief Capture a transfer where the destination is an image. + * + * @param transferType The type of the transfer. + * @param pixelCount The number of pixels written. + * + * @return Returns the tagID assigned to this workload. + */ + uint64_t imageTransfer( + const std::string& transferType, + int64_t pixelCount); + + /** + * @brief Capture a transfer where the destination is a buffer. + * + * @param transferType The type of the transfer. + * @param byteCount The number of pixels written. + * + * @return Returns the tagID assigned to this workload. + */ + uint64_t bufferTransfer( + const std::string& transferType, + int64_t byteCount); + /** * @brief Begin a user debug marker range. */ diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp index 2b29d07..90c8c72 100644 --- a/source_common/trackers/layer_command_stream.cpp +++ b/source_common/trackers/layer_command_stream.cpp @@ -222,5 +222,115 @@ std::string LCSDispatch::getMetadata( return metadata.dump(); } +/* See header for details. */ +LCSTraceRays::LCSTraceRays( + uint64_t _tagID, + int64_t _xItems, + int64_t _yItems, + int64_t _zItems) : + LCSWorkload(_tagID), + xItems(_xItems), + yItems(_yItems), + zItems(_zItems) +{ + +} + +/* See header for details. */ +std::string LCSTraceRays::getMetadata( + const std::string* debugLabel, + uint64_t tagIDContinuation, + uint64_t submitID +) const { + UNUSED(tagIDContinuation); + UNUSED(submitID); + + json metadata = { + { "type", "tracerays" }, + { "tid", tagID }, + { "xItems", xItems }, + { "yItems", yItems }, + { "zItems", zItems } + }; + + if (debugLabel && debugLabel->size()) + { + metadata["label"] = *debugLabel; + } + + return metadata.dump(); +} + +/* See header for details. */ +LCSImageTransfer::LCSImageTransfer( + uint64_t _tagID, + const std::string& _transferType, + int64_t _pixelCount): + LCSWorkload(_tagID), + transferType(_transferType), + pixelCount(_pixelCount) +{ + +} + +/* See header for details. */ +std::string LCSImageTransfer::getMetadata( + const std::string* debugLabel, + uint64_t tagIDContinuation, + uint64_t submitID +) const { + UNUSED(tagIDContinuation); + UNUSED(submitID); + + json metadata = { + { "type", "imagetransfer" }, + { "tid", tagID }, + { "subtype", transferType }, + { "pixels", pixelCount } + }; + + if (debugLabel && debugLabel->size()) + { + metadata["label"] = *debugLabel; + } + + return metadata.dump(); +} + +/* See header for details. */ +LCSBufferTransfer::LCSBufferTransfer( + uint64_t _tagID, + const std::string& _transferType, + int64_t _byteCount): + LCSWorkload(_tagID), + transferType(_transferType), + byteCount(_byteCount) +{ + +} + +/* See header for details. */ +std::string LCSBufferTransfer::getMetadata( + const std::string* debugLabel, + uint64_t tagIDContinuation, + uint64_t submitID +) const { + UNUSED(tagIDContinuation); + UNUSED(submitID); + + json metadata = { + { "type", "buffertransfer" }, + { "tid", tagID }, + { "subtype", transferType }, + { "bytes", byteCount } + }; + + if (debugLabel && debugLabel->size()) + { + metadata["label"] = *debugLabel; + } + + return metadata.dump(); +} } diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp index 246558d..29acdd6 100644 --- a/source_common/trackers/layer_command_stream.hpp +++ b/source_common/trackers/layer_command_stream.hpp @@ -204,6 +204,77 @@ class LCSDispatch : public LCSWorkload int64_t zGroups; }; +/** + * @brief Baseclass representing a GPU workload in the command stream. + */ +class LCSTraceRays : public LCSWorkload +{ +public: + LCSTraceRays( + uint64_t tagID, + int64_t xItems, + int64_t yItems, + int64_t zItems); + + virtual ~LCSTraceRays() = default; + + virtual std::string getMetadata( + const std::string* debugLabel=nullptr, + uint64_t tagIDContinuation=0, + uint64_t submitID=0) const; + +private: + int64_t xItems; + int64_t yItems; + int64_t zItems; +}; + +/** + * @brief Baseclass representing a GPU workload in the command stream. + */ +class LCSImageTransfer : public LCSWorkload +{ +public: + LCSImageTransfer( + uint64_t tagID, + const std::string& transferType, + int64_t pixelCount); + + virtual ~LCSImageTransfer() = default; + + virtual std::string getMetadata( + const std::string* debugLabel=nullptr, + uint64_t tagIDContinuation=0, + uint64_t submitID=0) const; + +private: + std::string transferType; + int64_t pixelCount; +}; + +/** + * @brief Baseclass representing a GPU workload in the command stream. + */ +class LCSBufferTransfer : public LCSWorkload +{ +public: + LCSBufferTransfer( + uint64_t tagID, + const std::string& transferType, + int64_t byteCount); + + virtual ~LCSBufferTransfer() = default; + + virtual std::string getMetadata( + const std::string* debugLabel=nullptr, + uint64_t tagIDContinuation=0, + uint64_t submitID=0) const; + +private: + std::string transferType; + int64_t byteCount; +}; + /** * @brief Baseclass representing a GPU workload in the command stream. */ diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp index 756ea07..7b79606 100644 --- a/source_common/trackers/queue.cpp +++ b/source_common/trackers/queue.cpp @@ -86,7 +86,10 @@ void Queue::runSubmitCommandStream( } } } - else if (opCode == LCSOpcode::DISPATCH) + else if ((opCode == LCSOpcode::DISPATCH) || + (opCode == LCSOpcode::TRACE_RAYS) || + (opCode == LCSOpcode::IMAGE_TRANSFER) || + (opCode == LCSOpcode::BUFFER_TRANSFER)) { uint64_t tagID = opData->getTagID(); std::string log = joinString(debugStack, "|"); From 9ac3bc1cd97f2cdc24e8db1cd7d43863617c65a3 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 12 Dec 2024 21:43:37 +0000 Subject: [PATCH 7/8] Code style cleanup --- generator/vk_codegen/device_defs.txt | 2 - generator/vk_codegen/instance_defs.txt | 2 - generator/vk_layer/source/device.cpp | 24 +- generator/vk_layer/source/device.hpp | 18 +- generator/vk_layer/source/instance.cpp | 14 +- generator/vk_layer/source/instance.hpp | 13 +- generator/vk_layer/source/version.hpp.in | 4 +- layer_gpu_timeline/README_LAYER.md | 225 +++++++----------- layer_gpu_timeline/android_build.sh | 2 +- .../docs/command_buffer_model.md | 155 ------------ layer_gpu_timeline/source/device.cpp | 24 +- layer_gpu_timeline/source/device.hpp | 44 ++-- layer_gpu_timeline/source/device_utils.hpp | 56 +++++ layer_gpu_timeline/source/instance.cpp | 14 +- layer_gpu_timeline/source/instance.hpp | 13 +- .../source/layer_device_functions.hpp | 2 + .../layer_device_functions_command_buffer.cpp | 2 - .../layer_device_functions_command_pool.cpp | 2 - .../source/layer_device_functions_debug.cpp | 2 - .../layer_device_functions_dispatch.cpp | 31 +-- .../layer_device_functions_draw_call.cpp | 6 + .../source/layer_device_functions_queue.cpp | 4 +- .../layer_device_functions_render_pass.cpp | 68 +----- .../layer_device_functions_trace_rays.cpp | 29 +-- .../layer_device_functions_transfer.cpp | 38 +-- layer_gpu_timeline/source/timeline_comms.cpp | 13 +- layer_gpu_timeline/source/timeline_comms.hpp | 31 ++- layer_gpu_timeline/source/version.hpp.in | 4 +- source_common/comms/comms_message.cpp | 3 +- source_common/comms/comms_module.cpp | 22 +- source_common/comms/comms_receiver.cpp | 21 +- source_common/comms/comms_transmitter.cpp | 15 +- .../comms/test/comms_test_server.cpp | 14 +- source_common/framework/device_functions.cpp | 2 - .../framework/instance_functions.cpp | 2 - source_common/trackers/command_buffer.hpp | 11 +- source_common/trackers/device.cpp | 10 +- .../trackers/layer_command_stream.cpp | 26 +- source_common/trackers/queue.hpp | 7 +- source_common/trackers/render_pass.cpp | 44 +++- source_common/trackers/render_pass.hpp | 93 +++++--- source_common/utils/misc.hpp | 11 +- source_common/utils/queue.hpp | 7 +- 43 files changed, 512 insertions(+), 618 deletions(-) delete mode 100644 layer_gpu_timeline/docs/command_buffer_model.md create mode 100644 layer_gpu_timeline/source/device_utils.hpp diff --git a/generator/vk_codegen/device_defs.txt b/generator/vk_codegen/device_defs.txt index 3b87173..b217a1d 100644 --- a/generator/vk_codegen/device_defs.txt +++ b/generator/vk_codegen/device_defs.txt @@ -1,6 +1,4 @@ -#include #include -#include // Include from per-layer code #include "utils.hpp" diff --git a/generator/vk_codegen/instance_defs.txt b/generator/vk_codegen/instance_defs.txt index d4891fb..400263c 100644 --- a/generator/vk_codegen/instance_defs.txt +++ b/generator/vk_codegen/instance_defs.txt @@ -1,6 +1,4 @@ -#include #include -#include // Include from per-layer code #include "device.hpp" diff --git a/generator/vk_layer/source/device.cpp b/generator/vk_layer/source/device.cpp index 580e339..3371cff 100644 --- a/generator/vk_layer/source/device.cpp +++ b/generator/vk_layer/source/device.cpp @@ -34,6 +34,9 @@ #include "device.hpp" #include "instance.hpp" +/** + * @brief The dispatch lookup for all of the created Vulkan instances. + */ static std::unordered_map> g_devices; /* See header for documentation. */ @@ -47,8 +50,8 @@ void Device::store( /* See header for documentation. */ Device* Device::retrieve( - VkDevice handle) -{ + VkDevice handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_devices)); return g_devices.at(key).get(); @@ -56,8 +59,8 @@ Device* Device::retrieve( /* See header for documentation. */ Device* Device::retrieve( - VkQueue handle) -{ + VkQueue handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_devices)); return g_devices.at(key).get(); @@ -65,8 +68,8 @@ Device* Device::retrieve( /* See header for documentation. */ Device* Device::retrieve( - VkCommandBuffer handle) -{ + VkCommandBuffer handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_devices)); return g_devices.at(key).get(); @@ -85,15 +88,10 @@ Device::Device( VkPhysicalDevice _physicalDevice, VkDevice _device, PFN_vkGetDeviceProcAddr nlayerGetProcAddress -): instance(_instance), +): + instance(_instance), physicalDevice(_physicalDevice), device(_device) { initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver); } - -/* See header for documentation. */ -Device::~Device() -{ - -} diff --git a/generator/vk_layer/source/device.hpp b/generator/vk_layer/source/device.hpp index d6ecad7..c0e1f0a 100644 --- a/generator/vk_layer/source/device.hpp +++ b/generator/vk_layer/source/device.hpp @@ -24,8 +24,7 @@ */ /** - * @file - * Declares the root class for layer management of VkDevice objects. + * @file Declares the root class for layer management of VkDevice objects. * * Role summary * ============ @@ -41,10 +40,9 @@ * Key properties * ============== * - * Unlike EGL contexts, Vulkan devices are designed to be used concurrently by - * multiple application threads. An application can have multiple concurrent - * devices (although this is less common than with OpenGL ES applications), and - * use each device from multiple threads. + * Vulkan devices are designed to be used concurrently by multiple application + * threads. An application can have multiple concurrent devices, and use each + * device from multiple threads. * * Access to the layer driver structures must therefore be kept thread-safe. * For sake of simplicity, we generally implement this by: @@ -80,6 +78,8 @@ class Device * @brief Fetch a device from the global store of dispatchable devices. * * @param handle The dispatchable device handle to use as an indirect lookup. + * + * @return The layer device context. */ static Device* retrieve( VkDevice handle); @@ -88,6 +88,8 @@ class Device * @brief Fetch a device from the global store of dispatchable devices. * * @param handle The dispatchable queue handle to use as an indirect lookup. + * + * @return The layer device context. */ static Device* retrieve( VkQueue handle); @@ -96,6 +98,8 @@ class Device * @brief Fetch a device from the global store of dispatchable devices. * * @param handle The dispatchable command buffer handle to use as an indirect lookup. + * + * @return The layer device context. */ static Device* retrieve( VkCommandBuffer handle); @@ -125,7 +129,7 @@ class Device /** * @brief Destroy this layer device object. */ - ~Device(); + ~Device() = default; public: /** diff --git a/generator/vk_layer/source/instance.cpp b/generator/vk_layer/source/instance.cpp index 6ac278e..0b62857 100644 --- a/generator/vk_layer/source/instance.cpp +++ b/generator/vk_layer/source/instance.cpp @@ -29,6 +29,9 @@ #include "instance.hpp" +/** + * @brief The dispatch lookup for all of the created Vulkan instances. + */ static std::unordered_map> g_instances; /* See header for documentation. */ @@ -42,8 +45,8 @@ void Instance::store( /* See header for documentation. */ Instance* Instance::retrieve( - VkInstance handle) -{ + VkInstance handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_instances)); return g_instances.at(key).get(); @@ -51,8 +54,8 @@ Instance* Instance::retrieve( /* See header for documentation. */ Instance* Instance::retrieve( - VkPhysicalDevice handle) -{ + VkPhysicalDevice handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_instances)); return g_instances.at(key).get(); @@ -68,7 +71,8 @@ void Instance::destroy( /* See header for documentation. */ Instance::Instance( VkInstance _instance, - PFN_vkGetInstanceProcAddr _nlayerGetProcAddress) : + PFN_vkGetInstanceProcAddr _nlayerGetProcAddress +) : instance(_instance), nlayerGetProcAddress(_nlayerGetProcAddress) { diff --git a/generator/vk_layer/source/instance.hpp b/generator/vk_layer/source/instance.hpp index cfda54e..fc6af6b 100644 --- a/generator/vk_layer/source/instance.hpp +++ b/generator/vk_layer/source/instance.hpp @@ -42,9 +42,8 @@ * Key properties * ============== * - * Unlike EGL contexts, Vulkan instances are designed to be used concurrently - * by multiple application threads. An application can have multiple concurrent - * instances (although this is less common than with OpenGL ES applications), + * Vulkan instances are designed to be used concurrently by multiple + * application threads. An application can have multiple concurrent instances, * and use each instance from multiple threads. * * Access to the layer driver structures must therefore be kept thread-safe. @@ -65,10 +64,6 @@ /** * @brief This class implements the layer state tracker for a single instance. - * - * These objects are relatively light-weight, as they are rarely used once a VkDevice has been - * created, but we need to track the chain-of-ownership as the instance is the root object that - * the application creates when initializing a rendering context. */ class Instance { @@ -87,6 +82,8 @@ class Instance * @brief Fetch an instance from the global store of dispatchable instances. * * @param handle The dispatchable instance handle to use as an indirect lookup. + * + * @return The layer instance context. */ static Instance* retrieve( VkInstance handle); @@ -95,6 +92,8 @@ class Instance * @brief Fetch an instance from the global store of dispatchable instances. * * @param handle The dispatchable physical device handle to use as an indirect lookup. + * + * @return The layer instance context. */ static Instance* retrieve( VkPhysicalDevice handle); diff --git a/generator/vk_layer/source/version.hpp.in b/generator/vk_layer/source/version.hpp.in index 50c30b9..5fcb9c3 100644 --- a/generator/vk_layer/source/version.hpp.in +++ b/generator/vk_layer/source/version.hpp.in @@ -24,9 +24,7 @@ */ /** - * @file - * This header implements placeholder templates that are populated by CMake - * during configure. + * @file Placeholder templates that are populated by CMake during configure. */ #pragma once diff --git a/layer_gpu_timeline/README_LAYER.md b/layer_gpu_timeline/README_LAYER.md index 8f8a56c..a3b0fee 100644 --- a/layer_gpu_timeline/README_LAYER.md +++ b/layer_gpu_timeline/README_LAYER.md @@ -1,163 +1,100 @@ # Layer: GPU Timeline This layer is used with Arm GPUs for tracking submitted schedulable workloads -and emitting semantic information about them. This data can be combined with -the raw workload execution timing information captured using the Android -Perfetto service, providing developers with a richer debug visualization. +and emitting useful metadata that can be used in tooling visualizations. This +data can be combined with raw workload execution timing information captured +by the Android Perfetto service, providing developers with more useful +information about how their application is scheduled on to the Arm GPU. -## What devices? +## What devices are supported? The Arm GPU driver integration with the Perfetto render stages scheduler event trace is supported at production quality since the r47p0 driver version. -However, associating semantics from this layer relies on a further integration -with debug labels which requires an r51p0 or later driver version. +However, associating additional metadata from this layer relies on additional +functionality which requires an r51p0 or later driver version. -## What workloads? +## What workloads are supported? -A schedulable workload is the smallest workload that the Arm GPU command stream -scheduler will issue to the GPU hardware work queues. This includes the -following workload types: +The Arm GPU scheduler event trace can generate timing events for each +atomically schedulable workload submitted to the GPU scheduler. -* Render passes, split into: - * Vertex or Binning phase - * Fragment or Main phase +Most workloads submitted to a Vulkan queue by the application are a single +schedulable entity, for example a compute dispatch or transfer is a single +workload. + +The exception to this is the render pass workload. Arm GPUs are tile-based, so +each group of merged subpasses from a render pass is processed as two +schedulable phases. The first phase - the vertex or binning phase - determines +which primitives contribute to which screen-space tiles. The second phase - the +fragment or main phase - reads the binning information and completes fragment +shading tile-by-tile. + +This layer tracks the following workloads: + +* Render passes * Compute dispatches -* Trace rays +* Trace rays dispatches * Transfers to a buffer * Transfers to an image -Most workloads are dispatched using a single API call, and are trivial to -manage in the layer. However, render passes are more complex and need extra -handling. In particular: - -* Render passes are issued using multiple API calls. -* Useful render pass properties, such as draw count, are not known until the - render pass recording has ended. -* Dynamic render passes using `vkCmdBeginRendering()` and `vkCmdEndRendering()` - can be suspended and resumed across command buffer boundaries. Properties - such as draw count are not defined by the scope of a single command buffer. - ## Tracking workloads -This layer tracks workloads encoded in command buffers, and emits semantic -metadata for each workload via a communications side-channel. A host tool -combines the semantic data stream with the Perfetto data stream, using debug -label tags injected by the layer as a common cross-reference to link across -the streams. - -### Workload labelling - -Command stream labelling is implemented using `vkCmdDebugMarkerBeginEXT()` -and `vkCmdDebugMarkerEndEXT()`, wrapping one layer-owned `tagID` label around -each semantic workload. This `tagID` can unambiguously refer to this workload -encoding, and metadata that we do not expect to change per submit will be -emitted using the matching `tagID` as the sole identifier. - -_**TODO:** Dynamic `submitID` tracking is not yet implemented._ - -The `tagID` label is encoded into the recorded command buffer which means, for -reusable command buffers, it is not an unambiguous identifier of a specific -running workload. To allow us to disambiguate specific workload instances, the -layer can optionally add an outer wrapper of `submitID` labels around each -submitted command buffer. This wrapper is only generated if the submit contains -any command buffers that require the generation of a per-submit annex (see the -following section for when this is needed). - -The `submitID.tagID` pair of IDs uniquely identifies a specific running -workload, and can be used to attach an instance-specific metadata annex to a -specific submitted workload rather than to the shared recorded command buffer. - -### Workload metadata for split render passes - -_**TODO:** Split render pass tracking is not yet implemented._ - -Dynamic render passes can be split across multiple Begin/End pairs, including -being split across command buffer boundaries. If these splits occur within a -single primary command buffer, or its secondaries, it is handled transparently -by the layer and it appears as a single message as if no splits occurred. If -these splits occur across primary command buffer boundaries, then some -additional work is required. - -In our design a `tagID` debug marker is only started when the render pass first -starts (not on resume), and stopped at the end of the render pass (not on -suspend). The same `tagID` is used to refer to all parts of the render pass, -no matter how many times it was suspended and resumed. - -If a render pass splits across command buffers, we cannot precompute metrics -based on `tagID` alone, even if the command buffers are one-time use. This is -because we do not know what combination of submitted command buffers will be -used, and so we cannot know what the render pass contains until submit time. -Split render passes will emit a `submitID.tagID` metadata annex containing -the parameters that can only be known at submit time. - -### Workload metadata for compute dispatches - -_**TODO:** Compute workgroup parsing from the SPIR-V is not yet implemented._ - -Compute workload dispatch is simple to track, but one of the metadata items we -want to export is the total size of the work space (work_group_count * -work_group_size). - -The work group count is defined by the API call, but may be an indirect -parameter (see indirect tracking above). - -The work group size is defined by the program pipeline, and is defined in the -SPIR-V via a literal or a build-time specialization constant. To support this -use case we will need to parse the SPIR-V when the pipeline is built, if -SPIR-V is available. - -### Workload metadata for indirect calls - -_**TODO:** Indirect parameter tracking is not yet implemented._ - -One of the valuable pieces of metadata that we want to present is the size of -each workload. For render passes this is captured at API call time, but for -other workloads the size can be an indirect parameter that is not known when -the triggering API call is made. - -To capture indirect parameters we insert a transfer that copies the indirect -parameters into a layer-owned buffer. To ensure exclusive use of the buffer and -avoid data corruption, each buffer region used is unique to a specific `tagID`. -Attempting to submit the same command buffer multiple times will result in -the workload being serialized to avoid racy access to the buffer. Once the -buffer has been retrieved by the layer, a metadata annex containing the -indirect parameters will be emitted using the `submitID.tagID` pair. This may -be some time later than the original submit. - -### Workload metadata for user-defined labels - -The workload metadata captures user-defined labels that the application -provides using `vkCmdDebugMarkerBeginEXT()` and `vkCmdDebugMarkerEndEXT()`. -These are a stack-based debug mechanism where `Begin` pushes a new entry on to -to the stack, and `End` pops the the most recent level off the stack. - -Workloads are labelled with the stack values that existed when the workload -was started. For render passes this is the value on the stack when, e.g., -`vkCmdBeginRenderPass()` was called. We do not capture any labels that exist -inside the render pass. - -The debug label stack belongs to the queue, not to the command buffer, so the -value of the label stack is not known until submit time. The debug information -for a specific `submitID.tagID` pair is therefore provided as an annex at -submit time once the stack can be resolved. - -## Message protocol - -For each workload in a command buffer, or part-workload in the case of a -suspended render pass, we record a JSON metadata blob containing the payload -we want to send. - -The low level protocol message contains: - -* Message type `uint8_t` -* Sequence ID `uint64_t` (optional, implied by message type) -* Tag ID `uint64_t` -* JSON length `uint32_t` -* JSON payload `uint8_t[]` - -Each workload will read whatever properties it can from the `tagID` metadata -and will then merge in all fields from any subsequent `sequenceID.tagID` -metadata that matches. +The latest Arm driver integration with the Perfetto profiler propagates +application debug labels into the GPU Render Stages scheduler events. The debug +labels are the label stack created using either of these Vulkan methods: + +* `vkCmdBegin/EndDebugUtilsLabelEXT()` +* `vkCmdDebugMarkerBegin/EndEXT()` + +This layer utilizes this mechanism to wrap each submitted workload in a command +buffer with a unique `tagID` which identifies that recorded workload. A +metadata side-channel provides the metadata for each workload, annotating each +metadata record with the matching `tagID` to allow them to be cross-referenced +later. + +### Limitation: Indirect dispatches and trace rays + +The current implementation captures the metadata parameters when the command +buffer is recorded. The layer does not currently support asynchronous capture +of indirect parameter buffers. Indirect dispatch and trace rays are still +captured and reported, but with unknown workload dimensions. + +### Limitation: Compute dispatch sizes + +The current implementation reports the size of a compute workload as the +number of work groups, because this is the parameter used by the API. We +eventually want to report this as the number of work items, but the parsing +of the SPIR-V and pipeline parameters has not yet been implemented. + +### Limitation: Dynamic render passes split over multiple command buffers + +The label containing the `tagID` is recorded into the application command +buffer when the command buffer is recorded. The workload-to-metadata mapping +requires that every use of a `tagID` has the same properties, or we will +be unable to associate the correct metadata with its matching workload. + +Content that splits a render pass over multiple command buffers that +are not one-time-submit violates this requirement. Multiple submits of a render +pass with a single `tagID` may have different numbers of draw calls, depending +on the number of draws that occur in the later command buffers that resume the +render pass. When the layer detects suspended render pass in a multi-submit +command buffer, it will still capture and report the workload, but with an +unknown draw call count. + +## Command stream modelling + +Most properties we track are a property of the command buffer recording in +isolation. However, the user debug label stack is a property of the queue and +persists across submits. We can therefore only determine the debug label +associated with a workload in the command stream at submit time, and must +resolve it per workload inside the command buffer. + +To support this we implement a software command stream that contains simple +bytecode actions that represent the sequence of debug label and workload +commands inside each command buffer. This "command stream" can be played to +update the the queue state at submit time, triggering metadata submission +for each workload that can snapshot the current state of the user debug label +stack at that point in the command stream. - - - diff --git a/layer_gpu_timeline/android_build.sh b/layer_gpu_timeline/android_build.sh index 5bdbff5..960b2b0 100644 --- a/layer_gpu_timeline/android_build.sh +++ b/layer_gpu_timeline/android_build.sh @@ -67,7 +67,7 @@ cmake \ -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \ .. -make -j8 +make -j1 popd diff --git a/layer_gpu_timeline/docs/command_buffer_model.md b/layer_gpu_timeline/docs/command_buffer_model.md deleted file mode 100644 index f317b51..0000000 --- a/layer_gpu_timeline/docs/command_buffer_model.md +++ /dev/null @@ -1,155 +0,0 @@ -# Layer: GPU Timeline - Command Buffer Modelling - -One of the main challenges of this layer driver is modelling behavior in queues -and command buffers that is not known until submit time, and then taking -appropriate actions based on the combination of both the head state of the -queue and the content of the pre-recorded command buffers. - -Our design to solve this is a lightweight software command stream which is -recorded when a command buffer is recorded, and then executed when the -command buffer is submitted to the queue. Just like a real hardware command -stream these commands can update state or trigger some other action we need -performed. - -## Layer commands - -**MARKER_BEGIN(const std::string\*):** - -* Push a new marker into the queue debug label stack. - -**MARKER_END():** - -* Pop the latest marker from the queue debug label stack. - -**RENDER_PASS(const json\*):** - -* Set the current workload to a new render pass with the passed metadata. - -**RENDERPASS_RESUME(const json\*):** - -* Update the current workload, which must be a render pass, with extra - draw count metadata. - -**COMPUTE_DISPATCH_BEGIN(const json\*):** - -* Set the current workload to a new compute dispatch with the passed metadata. - -**TRACE_RAYS_BEGIN(const json\*):** - -* Set the current workload to a new trace rays with the passed metadata. - -**BUFFER_TRANSFER_BEGIN(const json\*):** - -* Set the current workload to a new a buffer transfer. - -**IMAGE_TRANSFER(const json\*):** - -* Set the current workload to a new image transfer. - -**WORKLOAD_END():** - -* Mark the current workload as complete, and emit a built metadata entry for - it. - -## Layer command recording - -Command buffer recording is effectively building two separate state -structures for the layer. - -The first is a per-workload or per-restart JSON structure that contains the -metadata we need for that workload. For partial workloads - e.g. a dynamic -render pass begin that has been suspended - this metadata will be partial and -rely on later restart metadata to complete it. - -The second is the layer "command stream" that contains the bytecode commands -to execute when the command buffer is submitted to the queue. These commands -are very simple, consisting of a list of command+pointer pairs, where the -pointer value may be unused by some commands. Commands are stored in a -std::vector, but we reserve enough memory to store 256 commands without -reallocating which is enough for the majority of command buffers we see in -real applications. - -The command stream for a secondary command buffer is inlined into the primary -command buffer during recording. - -### Recording sequence - -When application records a new workload: - - * A `tagID` is assigned and recorded using `vkCmdMarkerBegin()` label in the - Vulkan command stream _before_ the new workload is written to the command - stream. - * If workload is using indirect parameters, then a transfer job to copy - indirect parameters into a layer-owned buffer is emitted _before_ the new - workload. No additional barrier is needed because application barriers must - have already ensured that the indirect parameter buffer is valid. - * A proxy workload object is created in the layer storing the assigned - `tagID` and all settings that are known at command recording time. - * A layer command stream command is recorded into the submit time stream - indicating `_BEGIN` with a pointer to the proxy workload. Note that - this JSON may be modified later for some workloads. - * If workload is using indirect parameters, a layer command stream command is - recorded into the resolve time stream, which will handle cleanup and - emitting the `submitID.tagID` annex message for the indirect data. - * If the command buffer is not ONE_TIME_SUBMIT, if any workload is using - indirect parameters, or contains incomplete render passes, the command - buffer is marked as needing a `submitID` wrapper. - * The user command is written to the Vulkan command stream. - -When application resumes a render pass workload: - - * A `tagID` of zero is assigned, but not emitted to the command stream. - * A layer command stream command is recorded into the submit time stream - indicating `_RESUME` with a pointer to the proxy workload. Note that - this JSON may be modified later for some workloads. - * The user command is written to the Vulkan command stream. - -When application ends a workload: - - * For render pass workloads, any statistics accumulated since the last begin - are rolled up into the proxy workload object. - * For render pass workloads, the user command is written to the Vulkan - command stream. - * The command steam label scope is closed using `vkCmdMarkerEnd()`. - -## Layer command playback - -The persistent state for command playback belongs to the queues the command -buffers are submitted to. The command stream bytecode is run by a bytecode -interpreter associated with the state of the current queue, giving the -interpreter access to the current `submitID` and queue debug label stack. - -### Submitting sequence - -For each command buffer in the user submit: - -* If the command buffer needs a `submitID` we allocate a unique `submitID` and - create two new command buffers that will wrap the user command buffer with an - additional stack layer of debug label containing the `s` string. We will - inject a layer command stream async command to handle freeing the command - buffers. -* The tool will process the submit-time layer commands, executing each command - to either update some state or emit -* If there are any async layer commands, either recorded in the command buffer - or from the wrapping command buffers, we will need to add an async handler. - This cannot safely use the user fence or depend on any user object lifetime, - so we will add a layer-owned timeline semaphore to the submit which we can - wait on to determine when it is safe trigger the async work. - -## Future: Async commands - -One of our longer-term goals is to be able to capture indirect parameters, -which will be available after-the-fact once the GPU has processed the command -buffer. Once we have the data we can emit an annex message containing -parameters for each indirect `submitID.tagID` pair in the command buffer. - -We need to be able to emit the metadata after the commands are complete, -and correctly synchronize use of the indirect capture staging buffer -if command buffers are reissued. My current thinking is that we would -implement this using additional layer commands that are processed on submit, -including support for async commands that run in a separate thread and -wait on the command buffer completion fence before running. - -- - - - -_Copyright © 2024, Arm Limited and contributors._ diff --git a/layer_gpu_timeline/source/device.cpp b/layer_gpu_timeline/source/device.cpp index 271aab3..1e140ff 100644 --- a/layer_gpu_timeline/source/device.cpp +++ b/layer_gpu_timeline/source/device.cpp @@ -35,6 +35,9 @@ #include "device.hpp" #include "instance.hpp" +/** + * @brief The dispatch lookup for all of the created Vulkan devices. + */ static std::unordered_map> g_devices; /* See header for documentation. */ @@ -54,8 +57,8 @@ void Device::store( /* See header for documentation. */ Device* Device::retrieve( - VkDevice handle) -{ + VkDevice handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_devices)); return g_devices.at(key).get(); @@ -63,8 +66,8 @@ Device* Device::retrieve( /* See header for documentation. */ Device* Device::retrieve( - VkQueue handle) -{ + VkQueue handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_devices)); return g_devices.at(key).get(); @@ -72,8 +75,8 @@ Device* Device::retrieve( /* See header for documentation. */ Device* Device::retrieve( - VkCommandBuffer handle) -{ + VkCommandBuffer handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_devices)); return g_devices.at(key).get(); @@ -92,7 +95,8 @@ Device::Device( VkPhysicalDevice _physicalDevice, VkDevice _device, PFN_vkGetDeviceProcAddr nlayerGetProcAddress -): instance(_instance), +): + instance(_instance), physicalDevice(_physicalDevice), device(_device) { @@ -105,9 +109,3 @@ Device::Device( commsWrapper = std::make_unique(*commsModule); } } - -/* See header for documentation. */ -Device::~Device() -{ - -} diff --git a/layer_gpu_timeline/source/device.hpp b/layer_gpu_timeline/source/device.hpp index 2d5460d..b56de83 100644 --- a/layer_gpu_timeline/source/device.hpp +++ b/layer_gpu_timeline/source/device.hpp @@ -24,8 +24,7 @@ */ /** - * @file - * Declares the root class for layer management of VkDevice objects. + * @file Declares the root class for layer management of VkDevice objects. * * Role summary * ============ @@ -41,10 +40,9 @@ * Key properties * ============== * - * Unlike EGL contexts, Vulkan devices are designed to be used concurrently by - * multiple application threads. An application can have multiple concurrent - * devices (although this is less common than with OpenGL ES applications), and - * use each device from multiple threads. + * Vulkan devices are designed to be used concurrently by multiple application + * threads. An application can have multiple concurrent devices, and use each + * device from multiple threads. * * Access to the layer driver structures must therefore be kept thread-safe. * For sake of simplicity, we generally implement this by: @@ -83,6 +81,8 @@ class Device * @brief Fetch a device from the global store of dispatchable devices. * * @param handle The dispatchable device handle to use as an indirect lookup. + * + * @return The layer device context. */ static Device* retrieve( VkDevice handle); @@ -91,6 +91,8 @@ class Device * @brief Fetch a device from the global store of dispatchable devices. * * @param handle The dispatchable queue handle to use as an indirect lookup. + * + * @return The layer device context. */ static Device* retrieve( VkQueue handle); @@ -99,6 +101,8 @@ class Device * @brief Fetch a device from the global store of dispatchable devices. * * @param handle The dispatchable command buffer handle to use as an indirect lookup. + * + * @return The layer device context. */ static Device* retrieve( VkCommandBuffer handle); @@ -117,7 +121,7 @@ class Device * @param instance The layer instance object this device is created with. * @param physicalDevice The physical device this logical device is for. * @param device The device handle this device is created with. - * @param nlayerGetProcAddress The vkGetProcAddress function in the driver/next layer down. + * @param nlayerGetProcAddress The vkGetDeviceProcAddress function for the driver. */ Device( Instance* instance, @@ -128,21 +132,27 @@ class Device /** * @brief Destroy this layer device object. */ - ~Device(); + ~Device() = default; /** - * @brief Callback for sending messages + * @brief Callback for sending messages on frame boundary. + * + * @param message The message to send. */ - void onFrame(const std::string& message) - { + void onFrame( + const std::string& message + ) { commsWrapper->txMessage(message); } /** - * @brief Callback for sending messages + * @brief Callback for sending messages on workload submit to a queue. + * + * @param message The message to send. */ - void onWorkloadSubmit(const std::string& message) - { + void onWorkloadSubmit( + const std::string& message + ) { commsWrapper->txMessage(message); } @@ -177,17 +187,17 @@ class Device const VkDevice device; /** - * @brief State tracking for this device; + * @brief State tracker for this device. */ Tracker::Device stateTracker; /** - * @brief Communications module. + * @brief Shared network communications module. */ static std::unique_ptr commsModule; /** - * @brief Communications module message encoder. + * @brief Shared network communications message encoder. */ static std::unique_ptr commsWrapper; }; diff --git a/layer_gpu_timeline/source/device_utils.hpp b/layer_gpu_timeline/source/device_utils.hpp new file mode 100644 index 0000000..eddf193 --- /dev/null +++ b/layer_gpu_timeline/source/device_utils.hpp @@ -0,0 +1,56 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#include + +#include "framework/utils.hpp" + +#include "device.hpp" + +/** + * @brief Emit a start tag via a driver debug utils label. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param tagID The tagID to emit into the label. + */ +[[maybe_unused]] static void emitStartTag( + Device* layer, + VkCommandBuffer commandBuffer, + uint64_t tagID +) { + // Emit the unique workload tag into the command stream + std::string tagLabel = formatString("t%" PRIu64, tagID); + VkDebugUtilsLabelEXT tagInfo { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = tagLabel.c_str(), + .color = { 0.0f, 0.0f, 0.0f, 0.0f } + }; + + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); +} diff --git a/layer_gpu_timeline/source/instance.cpp b/layer_gpu_timeline/source/instance.cpp index 6ac278e..0b62857 100644 --- a/layer_gpu_timeline/source/instance.cpp +++ b/layer_gpu_timeline/source/instance.cpp @@ -29,6 +29,9 @@ #include "instance.hpp" +/** + * @brief The dispatch lookup for all of the created Vulkan instances. + */ static std::unordered_map> g_instances; /* See header for documentation. */ @@ -42,8 +45,8 @@ void Instance::store( /* See header for documentation. */ Instance* Instance::retrieve( - VkInstance handle) -{ + VkInstance handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_instances)); return g_instances.at(key).get(); @@ -51,8 +54,8 @@ Instance* Instance::retrieve( /* See header for documentation. */ Instance* Instance::retrieve( - VkPhysicalDevice handle) -{ + VkPhysicalDevice handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_instances)); return g_instances.at(key).get(); @@ -68,7 +71,8 @@ void Instance::destroy( /* See header for documentation. */ Instance::Instance( VkInstance _instance, - PFN_vkGetInstanceProcAddr _nlayerGetProcAddress) : + PFN_vkGetInstanceProcAddr _nlayerGetProcAddress +) : instance(_instance), nlayerGetProcAddress(_nlayerGetProcAddress) { diff --git a/layer_gpu_timeline/source/instance.hpp b/layer_gpu_timeline/source/instance.hpp index cfda54e..fc6af6b 100644 --- a/layer_gpu_timeline/source/instance.hpp +++ b/layer_gpu_timeline/source/instance.hpp @@ -42,9 +42,8 @@ * Key properties * ============== * - * Unlike EGL contexts, Vulkan instances are designed to be used concurrently - * by multiple application threads. An application can have multiple concurrent - * instances (although this is less common than with OpenGL ES applications), + * Vulkan instances are designed to be used concurrently by multiple + * application threads. An application can have multiple concurrent instances, * and use each instance from multiple threads. * * Access to the layer driver structures must therefore be kept thread-safe. @@ -65,10 +64,6 @@ /** * @brief This class implements the layer state tracker for a single instance. - * - * These objects are relatively light-weight, as they are rarely used once a VkDevice has been - * created, but we need to track the chain-of-ownership as the instance is the root object that - * the application creates when initializing a rendering context. */ class Instance { @@ -87,6 +82,8 @@ class Instance * @brief Fetch an instance from the global store of dispatchable instances. * * @param handle The dispatchable instance handle to use as an indirect lookup. + * + * @return The layer instance context. */ static Instance* retrieve( VkInstance handle); @@ -95,6 +92,8 @@ class Instance * @brief Fetch an instance from the global store of dispatchable instances. * * @param handle The dispatchable physical device handle to use as an indirect lookup. + * + * @return The layer instance context. */ static Instance* retrieve( VkPhysicalDevice handle); diff --git a/layer_gpu_timeline/source/layer_device_functions.hpp b/layer_gpu_timeline/source/layer_device_functions.hpp index 3806398..8c2f8b5 100644 --- a/layer_gpu_timeline/source/layer_device_functions.hpp +++ b/layer_gpu_timeline/source/layer_device_functions.hpp @@ -23,6 +23,8 @@ * ---------------------------------------------------------------------------- */ +#pragma once + #include #include "framework/utils.hpp" diff --git a/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp b/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp index f0cc338..ef8e920 100644 --- a/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp @@ -23,9 +23,7 @@ * ---------------------------------------------------------------------------- */ -#include #include -#include #include "device.hpp" #include "layer_device_functions.hpp" diff --git a/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp b/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp index 31bc1b4..a640a90 100644 --- a/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp @@ -23,9 +23,7 @@ * ---------------------------------------------------------------------------- */ -#include #include -#include #include "device.hpp" #include "layer_device_functions.hpp" diff --git a/layer_gpu_timeline/source/layer_device_functions_debug.cpp b/layer_gpu_timeline/source/layer_device_functions_debug.cpp index 4c1e1d9..1905193 100644 --- a/layer_gpu_timeline/source/layer_device_functions_debug.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_debug.cpp @@ -23,9 +23,7 @@ * ---------------------------------------------------------------------------- */ -#include #include -#include #include "device.hpp" #include "layer_device_functions.hpp" diff --git a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp index 7555501..de5ee10 100644 --- a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp @@ -23,15 +23,25 @@ * ---------------------------------------------------------------------------- */ -#include #include -#include #include "device.hpp" +#include "device_utils.hpp" #include "layer_device_functions.hpp" extern std::mutex g_vulkanLock; +/** + * @brief Register a compute dispatch with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param groupX The X size of the dispatch in groups. + * @param groupY The Y size of the dispatch in groups. + * @param groupZ The Z size of the dispatch in groups. + * + * @return The assigned tagID for the workload. + */ static uint64_t registerDispatch( Device* layer, VkCommandBuffer commandBuffer, @@ -44,23 +54,6 @@ static uint64_t registerDispatch( return cb.dispatch(groupX, groupY, groupZ); } -static void emitStartTag( - Device* layer, - VkCommandBuffer commandBuffer, - uint64_t tagID -) { - // Emit the unique workload tag into the command stream - std::string tagLabel = formatString("t%" PRIu64, tagID); - VkDebugUtilsLabelEXT tagInfo { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = tagLabel.c_str(), - .color = { 0.0f, 0.0f, 0.0f, 0.0f } - }; - - layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); -} - /* See Vulkan API for documentation. */ template <> VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch( diff --git a/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp b/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp index 49cf669..42350d0 100644 --- a/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp @@ -32,6 +32,12 @@ extern std::mutex g_vulkanLock; +/** + * @brief Register a draw call with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + */ static void registerDrawCall( Device* layer, VkCommandBuffer commandBuffer diff --git a/layer_gpu_timeline/source/layer_device_functions_queue.cpp b/layer_gpu_timeline/source/layer_device_functions_queue.cpp index 30ca611..a5c92e2 100644 --- a/layer_gpu_timeline/source/layer_device_functions_queue.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_queue.cpp @@ -23,12 +23,10 @@ * ---------------------------------------------------------------------------- */ -#include #include -#include +#include #include "utils/misc.hpp" -#include "nlohmann/json.hpp" #include "device.hpp" #include "layer_device_functions.hpp" diff --git a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp index b59f305..5d16880 100644 --- a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp @@ -23,16 +23,15 @@ * ---------------------------------------------------------------------------- */ -#include #include -#include - -#include "device.hpp" -#include "layer_device_functions.hpp" #include "framework/utils.hpp" #include "trackers/render_pass.hpp" +#include "device.hpp" +#include "device_utils.hpp" +#include "layer_device_functions.hpp" + extern std::mutex g_vulkanLock; /* See Vulkan API for documentation. */ @@ -166,18 +165,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass( // Notify the command buffer we are starting a new render pass uint64_t tagID = cb.renderPassBegin(rp, width, height); - // Emit the unique workload tag into the command stream - std::string tagLabel = formatString("t%" PRIu64, tagID); - VkDebugUtilsLabelEXT tagInfo { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = tagLabel.c_str(), - .color = { 0.0f, 0.0f, 0.0f, 0.0f } - }; - // Release the lock to call into the driver lock.unlock(); - layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + emitStartTag(layer, commandBuffer, tagID); layer->driver.vkCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents); } @@ -204,18 +194,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2( // Notify the command buffer we are starting a new render pass uint64_t tagID = cb.renderPassBegin(rp, width, height); - // Emit the unique workload tag into the command stream - std::string tagLabel = formatString("t%" PRIu64, tagID); - VkDebugUtilsLabelEXT tagInfo { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = tagLabel.c_str(), - .color = { 0.0f, 0.0f, 0.0f, 0.0f } - }; - // Release the lock to call into the driver lock.unlock(); - layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + emitStartTag(layer, commandBuffer, tagID); layer->driver.vkCmdBeginRenderPass2(commandBuffer, pRenderPassBegin, pSubpassBeginInfo); } @@ -242,18 +223,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR( // Notify the command buffer we are starting a new render pass uint64_t tagID = cb.renderPassBegin(rp, width, height); - // Emit the unique workload tag into the command stream - std::string tagLabel = formatString("t%" PRIu64, tagID); - VkDebugUtilsLabelEXT tagInfo { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = tagLabel.c_str(), - .color = { 0.0f, 0.0f, 0.0f, 0.0f } - }; - // Release the lock to call into the driver lock.unlock(); - layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + emitStartTag(layer, commandBuffer, tagID); layer->driver.vkCmdBeginRenderPass2KHR(commandBuffer, pRenderPassBegin, pSubpassBeginInfo); } @@ -286,22 +258,11 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering( // Release the lock to call into the driver lock.unlock(); - // Emit the label only for new render passes if (!resuming) { - // Emit the unique workload tag into the command stream - std::string tagLabel = formatString("t%" PRIu64, tagID); - VkDebugUtilsLabelEXT tagInfo { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = tagLabel.c_str(), - .color = { 0.0f, 0.0f, 0.0f, 0.0f } - }; - - layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + emitStartTag(layer, commandBuffer, tagID); } - layer->driver.vkCmdBeginRendering(commandBuffer, pRenderingInfo); } @@ -334,22 +295,11 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR( // Release the lock to call into the driver lock.unlock(); - // Emit the label only for new render passes if (!resuming) { - // Emit the unique workload tag into the command stream - std::string tagLabel = formatString("t%" PRIu64, tagID); - VkDebugUtilsLabelEXT tagInfo { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = tagLabel.c_str(), - .color = { 0.0f, 0.0f, 0.0f, 0.0f } - }; - - layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); + emitStartTag(layer, commandBuffer, tagID); } - layer->driver.vkCmdBeginRenderingKHR(commandBuffer, pRenderingInfo); } diff --git a/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp index 2d99a3b..d453a29 100644 --- a/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp @@ -28,10 +28,22 @@ #include #include "device.hpp" +#include "device_utils.hpp" #include "layer_device_functions.hpp" extern std::mutex g_vulkanLock; +/** + * @brief Register a trace rays dispatch with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param itemsX The X size of the dispatch in work items. + * @param itemsY The Y size of the dispatch in work items. + * @param itemsZ The Z size of the dispatch in work items. + * + * @return The assigned tagID for the workload. + */ static uint64_t registerTraceRays( Device* layer, VkCommandBuffer commandBuffer, @@ -44,23 +56,6 @@ static uint64_t registerTraceRays( return cb.traceRays(itemsX, itemsY, itemsZ); } -static void emitStartTag( - Device* layer, - VkCommandBuffer commandBuffer, - uint64_t tagID -) { - // Emit the unique workload tag into the command stream - std::string tagLabel = formatString("t%" PRIu64, tagID); - VkDebugUtilsLabelEXT tagInfo { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = tagLabel.c_str(), - .color = { 0.0f, 0.0f, 0.0f, 0.0f } - }; - - layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); -} - /* See Vulkan API for documentation. */ template <> VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR( diff --git a/layer_gpu_timeline/source/layer_device_functions_transfer.cpp b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp index 066c23f..3aca1a7 100644 --- a/layer_gpu_timeline/source/layer_device_functions_transfer.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp @@ -28,10 +28,21 @@ #include #include "device.hpp" +#include "device_utils.hpp" #include "layer_device_functions.hpp" extern std::mutex g_vulkanLock; +/** + * @brief Register a transfer to a buffer with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param transferType The type of transfer being performed. + * @param byteCount The number of bytes transferred. + * + * @return The assigned tagID for the workload. + */ static uint64_t registerBufferTransfer( Device* layer, VkCommandBuffer commandBuffer, @@ -43,6 +54,16 @@ static uint64_t registerBufferTransfer( return cb.bufferTransfer(transferType, byteCount); } +/** + * @brief Register a transfer to an image with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param transferType The type of transfer being performed. + * @param pixelCount The number of pixels transferred. + * + * @return The assigned tagID for the workload. + */ static uint64_t registerImageTransfer( Device* layer, VkCommandBuffer commandBuffer, @@ -54,23 +75,6 @@ static uint64_t registerImageTransfer( return cb.imageTransfer(transferType, pixelCount); } -static void emitStartTag( - Device* layer, - VkCommandBuffer commandBuffer, - uint64_t tagID -) { - // Emit the unique workload tag into the command stream - std::string tagLabel = formatString("t%" PRIu64, tagID); - VkDebugUtilsLabelEXT tagInfo { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pNext = nullptr, - .pLabelName = tagLabel.c_str(), - .color = { 0.0f, 0.0f, 0.0f, 0.0f } - }; - - layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo); -} - // Commands for transfers /* See Vulkan API for documentation. */ diff --git a/layer_gpu_timeline/source/timeline_comms.cpp b/layer_gpu_timeline/source/timeline_comms.cpp index fbb496a..125acef 100644 --- a/layer_gpu_timeline/source/timeline_comms.cpp +++ b/layer_gpu_timeline/source/timeline_comms.cpp @@ -27,16 +27,19 @@ #include "timeline_comms.hpp" +/* See header for documentation. */ TimelineComms::TimelineComms( - Comms::CommsInterface& commsIf) + Comms::CommsInterface& _comms +): + comms(_comms) { - comms = &commsIf; - if (comms->isConnected()) + if (comms.isConnected()) { - endpoint = comms->getEndpointID("GPUTimeline"); + endpoint = comms.getEndpointID("GPUTimeline"); } } +/* See header for documentation. */ void TimelineComms::txMessage( const std::string& message) { @@ -47,5 +50,5 @@ void TimelineComms::txMessage( } auto data = std::make_unique(message.begin(), message.end()); - comms->txAsync(endpoint, std::move(data)); + comms.txAsync(endpoint, std::move(data)); } diff --git a/layer_gpu_timeline/source/timeline_comms.hpp b/layer_gpu_timeline/source/timeline_comms.hpp index 435f37a..84a3c8a 100644 --- a/layer_gpu_timeline/source/timeline_comms.hpp +++ b/layer_gpu_timeline/source/timeline_comms.hpp @@ -23,20 +23,49 @@ * ---------------------------------------------------------------------------- */ +/** + * @file Declares a simple comms encoded for the timeline layer. + */ + #pragma once #include "comms/comms_interface.hpp" +/** + * @brief A simple message encoder for the timeline comms endpoint. + * + * TODO: This is currently a very simple implementation because we are simply + * passing JSON strings around. This is not the most efficient way of doing + * this and in future this module will be used to implement binary encoders + * for each specific message type that needs sending. + */ class TimelineComms { public: + /** + * @brief Construct a new encoder. + * + * @param comms The common comms module used by all services. + */ TimelineComms( Comms::CommsInterface& comms); + /** + * @brief Send a message to the GPU timeline endpoint service. + * + * @param message The message to send. + */ void txMessage( const std::string& message); private: + /** + * @brief The endpoint ID of the service, or 0 if not found. + */ Comms::EndpointID endpoint { 0 }; - Comms::CommsInterface* comms { nullptr }; + + /** + * @brief The common module for network messaging. + */ + Comms::CommsInterface& comms; }; diff --git a/layer_gpu_timeline/source/version.hpp.in b/layer_gpu_timeline/source/version.hpp.in index 50c30b9..5fcb9c3 100644 --- a/layer_gpu_timeline/source/version.hpp.in +++ b/layer_gpu_timeline/source/version.hpp.in @@ -24,9 +24,7 @@ */ /** - * @file - * This header implements placeholder templates that are populated by CMake - * during configure. + * @file Placeholder templates that are populated by CMake during configure. */ #pragma once diff --git a/source_common/comms/comms_message.cpp b/source_common/comms/comms_message.cpp index 0e3df99..03f0e9f 100644 --- a/source_common/comms/comms_message.cpp +++ b/source_common/comms/comms_message.cpp @@ -37,7 +37,8 @@ Message::Message( EndpointID _endpointID, MessageType _messageType, MessageID _messageID, - std::unique_ptr _transmitData) : + std::unique_ptr _transmitData +) : endpointID(_endpointID), messageType(_messageType), messageID(_messageID), diff --git a/source_common/comms/comms_module.cpp b/source_common/comms/comms_module.cpp index 42c815e..335b79f 100644 --- a/source_common/comms/comms_module.cpp +++ b/source_common/comms/comms_module.cpp @@ -42,7 +42,7 @@ namespace Comms { -/** See header for documentation. */ +/* See header for documentation. */ CommsModule::CommsModule( const std::string& domainAddress ) { @@ -76,7 +76,7 @@ CommsModule::CommsModule( receiver = std::make_unique(*this); } -/** See header for documentation. */ +/* See header for documentation. */ CommsModule::CommsModule( const std::string& hostAddress, int port @@ -109,7 +109,7 @@ CommsModule::CommsModule( receiver = std::make_unique(*this); } -/** See header for documentation. */ +/* See header for documentation. */ CommsModule::~CommsModule() { // Stop async worker threads before closing the socket @@ -132,13 +132,13 @@ CommsModule::~CommsModule() } } -/** See header for documentation. */ +/* See header for documentation. */ bool CommsModule::isConnected() { return sockfd >= 0; } -/** See header for documentation. */ +/* See header for documentation. */ EndpointID CommsModule::getEndpointID( const std::string& name ) { @@ -192,7 +192,7 @@ EndpointID CommsModule::getEndpointID( } } -/** See header for documentation. */ +/* See header for documentation. */ void CommsModule::txAsync( EndpointID endpoint, std::unique_ptr data @@ -206,7 +206,7 @@ void CommsModule::txAsync( enqueueMessage(std::move(message)); } -/** See header for documentation. */ +/* See header for documentation. */ void CommsModule::tx( EndpointID endpoint, std::unique_ptr data @@ -221,7 +221,7 @@ void CommsModule::tx( message->wait(); } -/** See header for documentation. */ +/* See header for documentation. */ std::unique_ptr CommsModule::txRx( EndpointID endpoint, std::unique_ptr data @@ -238,20 +238,20 @@ std::unique_ptr CommsModule::txRx( return std::move(message->responseData); } -/** See header for documentation. */ +/* See header for documentation. */ MessageID CommsModule::assignMessageID() { return nextMessageID.fetch_add(1, std::memory_order_relaxed); } -/** See header for documentation. */ +/* See header for documentation. */ void CommsModule::enqueueMessage( std::shared_ptr message ) { messageQueue.put(std::move(message)); } -/** See header for documentation. */ +/* See header for documentation. */ std::shared_ptr CommsModule::dequeueMessage() { return messageQueue.get(); diff --git a/source_common/comms/comms_receiver.cpp b/source_common/comms/comms_receiver.cpp index 6c3e40d..d27b868 100644 --- a/source_common/comms/comms_receiver.cpp +++ b/source_common/comms/comms_receiver.cpp @@ -37,13 +37,15 @@ #include "comms/comms_receiver.hpp" #include "comms/comms_module.hpp" #include "framework/utils.hpp" +#include "utils/misc.hpp" namespace Comms { -/** See header for documentation. */ +/* See header for documentation. */ Receiver::Receiver( CommsModule& _parent -) : parent(_parent) +) : + parent(_parent) { int pipe_err = pipe(stopRequestPipe); if (pipe_err) @@ -55,7 +57,7 @@ Receiver::Receiver( worker = std::thread(&Receiver::runReceiver, this); } -/** See header for documentation. */ +/* See header for documentation. */ Receiver::~Receiver() { // Stop the worker thread if it's not stopped already @@ -69,7 +71,7 @@ Receiver::~Receiver() close(stopRequestPipe[1]); } -/** See header for documentation. */ +/* See header for documentation. */ void Receiver::stop() { // Mark the engine as stopping @@ -77,13 +79,14 @@ void Receiver::stop() // Poke the pipe to wake the worker thread if it is blocked on a read int data = 0xdead; - [[maybe_unused]] int _ = write(stopRequestPipe[1], &data, sizeof(int)); + int ret = write(stopRequestPipe[1], &data, sizeof(int)); + UNUSED(ret); // Join on the worker thread worker.join(); } -/** See header for documentation. */ +/* See header for documentation. */ void Receiver::parkMessage( std::shared_ptr message ) { @@ -91,7 +94,7 @@ void Receiver::parkMessage( parkingBuffer.insert({ message->messageID, std::move(message) }); } -/** See header for documentation. */ +/* See header for documentation. */ void Receiver::runReceiver() { while (!stopRequested) @@ -119,7 +122,7 @@ void Receiver::runReceiver() } } -/** See header for documentation. */ +/* See header for documentation. */ void Receiver::wakeMessage( MessageID messageID, std::unique_ptr data @@ -142,7 +145,7 @@ void Receiver::wakeMessage( message->notify(); } -/** See header for documentation. */ +/* See header for documentation. */ bool Receiver::receiveData( uint8_t* data, size_t dataSize diff --git a/source_common/comms/comms_transmitter.cpp b/source_common/comms/comms_transmitter.cpp index dddd290..3088754 100644 --- a/source_common/comms/comms_transmitter.cpp +++ b/source_common/comms/comms_transmitter.cpp @@ -37,16 +37,17 @@ namespace Comms { -/** See header for documentation. */ +/* See header for documentation. */ Transmitter::Transmitter( CommsModule& _parent -) : parent(_parent) +) : + parent(_parent) { // Create and start a worker thread worker = std::thread(&Transmitter::runTransmitter, this); } -/** See header for documentation. */ +/* See header for documentation. */ Transmitter::~Transmitter() { // Stop the worker thread if it's not stopped already @@ -56,7 +57,7 @@ Transmitter::~Transmitter() } } -/** See header for documentation. */ +/* See header for documentation. */ void Transmitter::runTransmitter() { // Keep looping until we are told to stop and message queue is empty @@ -87,7 +88,7 @@ void Transmitter::runTransmitter() } } -/** See header for documentation. */ +/* See header for documentation. */ void Transmitter::stop() { // Mark the engine as stopping @@ -103,7 +104,7 @@ void Transmitter::stop() worker.join(); } -/** See header for documentation. */ +/* See header for documentation. */ void Transmitter::sendMessage( const Message& message ) { @@ -124,7 +125,7 @@ void Transmitter::sendMessage( sendData(data, dataSize); } -/** See header for documentation. */ +/* See header for documentation. */ void Transmitter::sendData( uint8_t* data, size_t dataSize diff --git a/source_common/comms/test/comms_test_server.cpp b/source_common/comms/test/comms_test_server.cpp index de64196..116cfb2 100644 --- a/source_common/comms/test/comms_test_server.cpp +++ b/source_common/comms/test/comms_test_server.cpp @@ -40,7 +40,7 @@ namespace CommsTest { -/** See header for documentation. */ +/* See header for documentation. */ CommsTestServer::CommsTestServer( const std::string& domainAddress ) { @@ -94,7 +94,7 @@ CommsTestServer::CommsTestServer( worker = std::thread(&CommsTestServer::runServer, this); } -/** See header for documentation. */ +/* See header for documentation. */ CommsTestServer::CommsTestServer( int port ) { @@ -152,7 +152,7 @@ CommsTestServer::CommsTestServer( worker = std::thread(&CommsTestServer::runServer, this); } -/** See header for documentation. */ +/* See header for documentation. */ CommsTestServer::~CommsTestServer() { // Stop the worker thread if it's not stopped already @@ -172,7 +172,7 @@ CommsTestServer::~CommsTestServer() close(stopRequestPipe[1]); } -/** See header for documentation. */ +/* See header for documentation. */ void CommsTestServer::stop() { // Mark the engine as stopping @@ -186,7 +186,7 @@ void CommsTestServer::stop() worker.join(); } -/** See header for documentation. */ +/* See header for documentation. */ void CommsTestServer::runServer() { int dataSockfd = accept(listenSockfd, NULL, NULL); @@ -251,7 +251,7 @@ void CommsTestServer::runServer() close(dataSockfd); } -/** See header for documentation. */ +/* See header for documentation. */ bool CommsTestServer::receiveData( int sockfd, uint8_t* data, @@ -297,7 +297,7 @@ bool CommsTestServer::receiveData( return true; } -/** See header for documentation. */ +/* See header for documentation. */ void CommsTestServer::send_data( int sockfd, uint8_t* data, diff --git a/source_common/framework/device_functions.cpp b/source_common/framework/device_functions.cpp index 52c2dae..805aefb 100644 --- a/source_common/framework/device_functions.cpp +++ b/source_common/framework/device_functions.cpp @@ -23,9 +23,7 @@ * ---------------------------------------------------------------------------- */ -#include #include -#include // Include from per-layer code #include "utils.hpp" diff --git a/source_common/framework/instance_functions.cpp b/source_common/framework/instance_functions.cpp index 3c613f3..02ecf28 100644 --- a/source_common/framework/instance_functions.cpp +++ b/source_common/framework/instance_functions.cpp @@ -23,9 +23,7 @@ * ---------------------------------------------------------------------------- */ -#include #include -#include // Include from per-layer code #include "device.hpp" diff --git a/source_common/trackers/command_buffer.hpp b/source_common/trackers/command_buffer.hpp index ea474c0..3962f48 100644 --- a/source_common/trackers/command_buffer.hpp +++ b/source_common/trackers/command_buffer.hpp @@ -43,7 +43,6 @@ #pragma once -#include #include #include #include @@ -168,6 +167,8 @@ class CommandBuffer /** * @brief Begin a user debug marker range. + * + * @param marker The marker label. */ void debugMarkerBegin( std::string marker); @@ -191,7 +192,7 @@ class CommandBuffer /** * @brief Begin recording back into the @a Recording state. * - * @param oneTimeSubmit Is this a one-time submit recording. + * @param oneTimeSubmit Is this a one-time submit recording? */ void begin( bool oneTimeSubmit); @@ -254,14 +255,16 @@ class CommandPool * * \return The layer wrapper object for the command buffer. */ - CommandBuffer& allocateCommandBuffer(VkCommandBuffer commandBuffer); + CommandBuffer& allocateCommandBuffer( + VkCommandBuffer commandBuffer); /** * @brief Free the command buffer in the pool with the given handle. * * @param commandBuffer The Vulkan handle of the command buffer to free. */ - void freeCommandBuffer(VkCommandBuffer commandBuffer); + void freeCommandBuffer( + VkCommandBuffer commandBuffer); /** * @brief Reset all allocated command buffers into the @a Initial state. diff --git a/source_common/trackers/device.cpp b/source_common/trackers/device.cpp index a1f0687..af5fee7 100644 --- a/source_common/trackers/device.cpp +++ b/source_common/trackers/device.cpp @@ -23,12 +23,6 @@ * ---------------------------------------------------------------------------- */ -#include -#include -#include -#include -#include - #include "trackers/device.hpp" #include "utils/misc.hpp" @@ -70,12 +64,10 @@ void Device::allocateCommandBuffer( auto& buffer = pool.allocateCommandBuffer(commandBuffer); // Insert into the tracker lookup map - [[maybe_unused]] auto result = commandBuffers.insert({ + commandBuffers.insert({ commandBuffer, buffer }); - - assert(result.second); } /* See header for documentation. */ diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp index 90c8c72..e3df7d8 100644 --- a/source_common/trackers/layer_command_stream.cpp +++ b/source_common/trackers/layer_command_stream.cpp @@ -24,8 +24,7 @@ */ #include - -#include "nlohmann/json.hpp" +#include #include "trackers/layer_command_stream.hpp" @@ -37,7 +36,8 @@ namespace Tracker std::atomic LCSWorkload::nextTagID { 1 }; LCSWorkload::LCSWorkload( - uint64_t _tagID): + uint64_t _tagID +): tagID(_tagID) { @@ -45,7 +45,8 @@ LCSWorkload::LCSWorkload( /* See header for details. */ LCSMarker::LCSMarker( - const std::string& _label) : + const std::string& _label +) : LCSWorkload(0), label(_label) { @@ -59,14 +60,15 @@ LCSRenderPass::LCSRenderPass( uint32_t _width, uint32_t _height, bool _suspending, - bool _oneTimeSubmit) : + bool _oneTimeSubmit +) : LCSWorkload(_tagID), width(_width), height(_height), suspending(_suspending), oneTimeSubmit(_oneTimeSubmit) { - // Copy these as the renderpass object may be transient. + // Copy these as the render pass object may be transient. subpassCount = renderPass.getSubpassCount(); attachments = renderPass.getAttachments(); } @@ -188,7 +190,8 @@ LCSDispatch::LCSDispatch( uint64_t _tagID, int64_t _xGroups, int64_t _yGroups, - int64_t _zGroups) : + int64_t _zGroups +) : LCSWorkload(_tagID), xGroups(_xGroups), yGroups(_yGroups), @@ -227,7 +230,8 @@ LCSTraceRays::LCSTraceRays( uint64_t _tagID, int64_t _xItems, int64_t _yItems, - int64_t _zItems) : + int64_t _zItems +) : LCSWorkload(_tagID), xItems(_xItems), yItems(_yItems), @@ -265,7 +269,8 @@ std::string LCSTraceRays::getMetadata( LCSImageTransfer::LCSImageTransfer( uint64_t _tagID, const std::string& _transferType, - int64_t _pixelCount): + int64_t _pixelCount +): LCSWorkload(_tagID), transferType(_transferType), pixelCount(_pixelCount) @@ -301,7 +306,8 @@ std::string LCSImageTransfer::getMetadata( LCSBufferTransfer::LCSBufferTransfer( uint64_t _tagID, const std::string& _transferType, - int64_t _byteCount): + int64_t _byteCount +): LCSWorkload(_tagID), transferType(_transferType), byteCount(_byteCount) diff --git a/source_common/trackers/queue.hpp b/source_common/trackers/queue.hpp index ff62b87..b57a07b 100644 --- a/source_common/trackers/queue.hpp +++ b/source_common/trackers/queue.hpp @@ -64,6 +64,9 @@ class Queue /** * @brief Execute a layer command stream. + * + * @param stream The layer command stream to execute. + * @param callback The callback to pass submitted workloads to. */ void runSubmitCommandStream( const std::vector& stream, @@ -91,7 +94,7 @@ class Queue std::vector debugStack; /** - * @brief The last non-zero renderpass tagID submitted. + * @brief The last non-zero render pass tagID submitted. */ uint64_t lastRenderPassTagID { 0 }; @@ -99,8 +102,6 @@ class Queue * @brief The command buffer submitID allocator. */ static std::atomic nextSubmitID; - - }; } diff --git a/source_common/trackers/render_pass.cpp b/source_common/trackers/render_pass.cpp index 181cf53..8a6da5e 100644 --- a/source_common/trackers/render_pass.cpp +++ b/source_common/trackers/render_pass.cpp @@ -35,7 +35,8 @@ RenderPassAttachment::RenderPassAttachment( RenderPassAttachName _name, VkAttachmentLoadOp _loadOp, VkAttachmentStoreOp _storeOp, - bool _resolve) : + bool _resolve +) : name(_name), loadOp(_loadOp), storeOp(_storeOp), @@ -44,10 +45,43 @@ RenderPassAttachment::RenderPassAttachment( } +/* See header for details. */ +std::string RenderPassAttachment::getAttachmentStr() const +{ + switch(name) + { + case RenderPassAttachName::COLOR0: + return "C0"; + case RenderPassAttachName::COLOR1: + return "C1"; + case RenderPassAttachName::COLOR2: + return "C2"; + case RenderPassAttachName::COLOR3: + return "C3"; + case RenderPassAttachName::COLOR4: + return "C4"; + case RenderPassAttachName::COLOR5: + return "C5"; + case RenderPassAttachName::COLOR6: + return "C6"; + case RenderPassAttachName::COLOR7: + return "C7"; + case RenderPassAttachName::DEPTH: + return "D"; + case RenderPassAttachName::STENCIL: + return "S"; + default: + assert(false); + } + + return "U"; +} + /* See header for details. */ RenderPass::RenderPass( VkRenderPass _handle, - const VkRenderPassCreateInfo& createInfo) : + const VkRenderPassCreateInfo& createInfo +) : handle(_handle) { subpassCount = createInfo.subpassCount; @@ -155,7 +189,8 @@ RenderPass::RenderPass( /* See header for details. */ RenderPass::RenderPass( VkRenderPass _handle, - const VkRenderPassCreateInfo2& createInfo) : + const VkRenderPassCreateInfo2& createInfo +) : handle(_handle) { subpassCount = createInfo.subpassCount; @@ -262,7 +297,8 @@ RenderPass::RenderPass( /* See header for details. */ RenderPass::RenderPass( - const VkRenderingInfo& createInfo) : + const VkRenderingInfo& createInfo +) : handle(VK_NULL_HANDLE) { // No subpasses in dynamic rendering diff --git a/source_common/trackers/render_pass.hpp b/source_common/trackers/render_pass.hpp index fc5044e..92d964d 100644 --- a/source_common/trackers/render_pass.hpp +++ b/source_common/trackers/render_pass.hpp @@ -61,58 +61,61 @@ enum class RenderPassAttachName }; /** - * @brief The state tracker for a render pass. + * @brief The state tracker for a single render pass attachment. */ class RenderPassAttachment { public: + /** + * @brief Construct a new render pass attachment tracker. + * + * @param name The name of the attachment point. + * @param loadOp The render pass loadOp for this attachment. + * @param storeOp The render pass storeOp for this attachment. + * @param resolve Is this a resolve attachment or the main attachment? + */ RenderPassAttachment( RenderPassAttachName name, VkAttachmentLoadOp loadOp, VkAttachmentStoreOp storeOp, bool resolve); - std::string getAttachmentStr() const - { - switch(name) - { - case RenderPassAttachName::COLOR0: - return "C0"; - case RenderPassAttachName::COLOR1: - return "C1"; - case RenderPassAttachName::COLOR2: - return "C2"; - case RenderPassAttachName::COLOR3: - return "C3"; - case RenderPassAttachName::COLOR4: - return "C4"; - case RenderPassAttachName::COLOR5: - return "C5"; - case RenderPassAttachName::COLOR6: - return "C6"; - case RenderPassAttachName::COLOR7: - return "C7"; - case RenderPassAttachName::DEPTH: - return "D"; - case RenderPassAttachName::STENCIL: - return "S"; - default: - assert(false); - } - - return "U"; - } + /** + * @brief Get a string form of the attachment point name. + * + * @return The attachment point name. + */ + std::string getAttachmentStr() const; + /** + * @brief Is this attachment loaded at the start of the render pass? + * + * @return @c true if loaded from memory. + */ bool isLoaded() const { return loadOp == VK_ATTACHMENT_LOAD_OP_LOAD; } + /** + * @brief Is this attachment stored at the end of the render pass? + * + * @return @c true if stored to memory. + */ bool isStored() const { - return storeOp == VK_ATTACHMENT_STORE_OP_STORE; + return storeOp == VK_ATTACHMENT_STORE_OP_STORE; } + + /** + * @brief Is this attachment a resolve attachment? + * + * Note that in dynamic rendering resolve attachments are implicitly not + * loaded and stored. There are no explicit settings for this. + * + * @return @c true if this is a resolve attachment. + */ bool isResolved() const { return resolve; @@ -146,22 +149,48 @@ class RenderPassAttachment class RenderPass { public: + /** + * @brief Construct a new render pass from Vulkan 1.0-style render passes. + * + * @param handle The driver handle of the render pass. + * @param createInfo The API context creating the render pass. + */ RenderPass( VkRenderPass handle, const VkRenderPassCreateInfo& createInfo); + /** + * @brief Construct a new render pass from Vulkan 1.0-style render passes. + * + * @param handle The driver handle of the render pass. + * @param createInfo The API context creating the render pass. + */ RenderPass( VkRenderPass handle, const VkRenderPassCreateInfo2& createInfo); + /** + * @brief Construct a new render pass from Vulkan 1.3 dynamic rendering. + * + * @param createInfo The API context starting the render pass. + */ RenderPass( const VkRenderingInfo& createInfo); + /** + * @brief Get the number of subpasses in the render pass. + * + * @return The number of subpasses. Always returns 1 for dynamic render + * passes which no longer use subpasses. + */ uint32_t getSubpassCount() const { return subpassCount; }; + /** + * @brief Get the attachment list for the render pass. + */ const std::vector& getAttachments() const { return attachments; diff --git a/source_common/utils/misc.hpp b/source_common/utils/misc.hpp index e38e0b7..8709e2e 100644 --- a/source_common/utils/misc.hpp +++ b/source_common/utils/misc.hpp @@ -130,8 +130,8 @@ bool isInMap( /** * @brief Append all values in one vector to the back of another. * - * @param src The destination vector to append to. - * @param dst The source vector; must not be src vector. + * @param dst The destination vector to append to; must not be source vector. + * @param src The source vector to append. */ template void vecAppend( @@ -149,7 +149,12 @@ void vecAppend( /** * @brief Get a displayable pointer. * - * On 64-bit Arm systems this strips the MTE tag in the top byte. + * On 64-bit Arm systems this strips the MTE tag in the top byte, which means + * that the pointer cannot be converted back into a usable pointer without + * triggering an MTE tag violation, so the returns value is for cosmetic use + * only. + * + * @param pointer The pointer to display. * * @return The displayable pointer. */ diff --git a/source_common/utils/queue.hpp b/source_common/utils/queue.hpp index 9702bed..4434837 100644 --- a/source_common/utils/queue.hpp +++ b/source_common/utils/queue.hpp @@ -36,7 +36,7 @@ #include /** - * @brief Baseclass for a task. + * @brief Base class for a task. */ class Task { @@ -98,8 +98,9 @@ class TaskQueue * * @param task The new task to append to the queue. */ - void put(T task) - { + void put( + T task + ) { std::lock_guard lock(store_lock); store.push_back(task); condition.notify_one(); From bc8c3c98d7dd987c9c56a086291cdb4ec7588dfa Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 12 Dec 2024 22:45:12 +0000 Subject: [PATCH 8/8] Update example to generator version --- layer_example/source/device.cpp | 24 +++++++++++------------- layer_example/source/device.hpp | 18 +++++++++++------- layer_example/source/instance.cpp | 14 +++++++++----- layer_example/source/instance.hpp | 13 ++++++------- layer_example/source/version.hpp.in | 4 +--- 5 files changed, 38 insertions(+), 35 deletions(-) diff --git a/layer_example/source/device.cpp b/layer_example/source/device.cpp index 580e339..3371cff 100644 --- a/layer_example/source/device.cpp +++ b/layer_example/source/device.cpp @@ -34,6 +34,9 @@ #include "device.hpp" #include "instance.hpp" +/** + * @brief The dispatch lookup for all of the created Vulkan instances. + */ static std::unordered_map> g_devices; /* See header for documentation. */ @@ -47,8 +50,8 @@ void Device::store( /* See header for documentation. */ Device* Device::retrieve( - VkDevice handle) -{ + VkDevice handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_devices)); return g_devices.at(key).get(); @@ -56,8 +59,8 @@ Device* Device::retrieve( /* See header for documentation. */ Device* Device::retrieve( - VkQueue handle) -{ + VkQueue handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_devices)); return g_devices.at(key).get(); @@ -65,8 +68,8 @@ Device* Device::retrieve( /* See header for documentation. */ Device* Device::retrieve( - VkCommandBuffer handle) -{ + VkCommandBuffer handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_devices)); return g_devices.at(key).get(); @@ -85,15 +88,10 @@ Device::Device( VkPhysicalDevice _physicalDevice, VkDevice _device, PFN_vkGetDeviceProcAddr nlayerGetProcAddress -): instance(_instance), +): + instance(_instance), physicalDevice(_physicalDevice), device(_device) { initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver); } - -/* See header for documentation. */ -Device::~Device() -{ - -} diff --git a/layer_example/source/device.hpp b/layer_example/source/device.hpp index d6ecad7..c0e1f0a 100644 --- a/layer_example/source/device.hpp +++ b/layer_example/source/device.hpp @@ -24,8 +24,7 @@ */ /** - * @file - * Declares the root class for layer management of VkDevice objects. + * @file Declares the root class for layer management of VkDevice objects. * * Role summary * ============ @@ -41,10 +40,9 @@ * Key properties * ============== * - * Unlike EGL contexts, Vulkan devices are designed to be used concurrently by - * multiple application threads. An application can have multiple concurrent - * devices (although this is less common than with OpenGL ES applications), and - * use each device from multiple threads. + * Vulkan devices are designed to be used concurrently by multiple application + * threads. An application can have multiple concurrent devices, and use each + * device from multiple threads. * * Access to the layer driver structures must therefore be kept thread-safe. * For sake of simplicity, we generally implement this by: @@ -80,6 +78,8 @@ class Device * @brief Fetch a device from the global store of dispatchable devices. * * @param handle The dispatchable device handle to use as an indirect lookup. + * + * @return The layer device context. */ static Device* retrieve( VkDevice handle); @@ -88,6 +88,8 @@ class Device * @brief Fetch a device from the global store of dispatchable devices. * * @param handle The dispatchable queue handle to use as an indirect lookup. + * + * @return The layer device context. */ static Device* retrieve( VkQueue handle); @@ -96,6 +98,8 @@ class Device * @brief Fetch a device from the global store of dispatchable devices. * * @param handle The dispatchable command buffer handle to use as an indirect lookup. + * + * @return The layer device context. */ static Device* retrieve( VkCommandBuffer handle); @@ -125,7 +129,7 @@ class Device /** * @brief Destroy this layer device object. */ - ~Device(); + ~Device() = default; public: /** diff --git a/layer_example/source/instance.cpp b/layer_example/source/instance.cpp index 6ac278e..0b62857 100644 --- a/layer_example/source/instance.cpp +++ b/layer_example/source/instance.cpp @@ -29,6 +29,9 @@ #include "instance.hpp" +/** + * @brief The dispatch lookup for all of the created Vulkan instances. + */ static std::unordered_map> g_instances; /* See header for documentation. */ @@ -42,8 +45,8 @@ void Instance::store( /* See header for documentation. */ Instance* Instance::retrieve( - VkInstance handle) -{ + VkInstance handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_instances)); return g_instances.at(key).get(); @@ -51,8 +54,8 @@ Instance* Instance::retrieve( /* See header for documentation. */ Instance* Instance::retrieve( - VkPhysicalDevice handle) -{ + VkPhysicalDevice handle +) { void* key = getDispatchKey(handle); assert(isInMap(key, g_instances)); return g_instances.at(key).get(); @@ -68,7 +71,8 @@ void Instance::destroy( /* See header for documentation. */ Instance::Instance( VkInstance _instance, - PFN_vkGetInstanceProcAddr _nlayerGetProcAddress) : + PFN_vkGetInstanceProcAddr _nlayerGetProcAddress +) : instance(_instance), nlayerGetProcAddress(_nlayerGetProcAddress) { diff --git a/layer_example/source/instance.hpp b/layer_example/source/instance.hpp index cfda54e..fc6af6b 100644 --- a/layer_example/source/instance.hpp +++ b/layer_example/source/instance.hpp @@ -42,9 +42,8 @@ * Key properties * ============== * - * Unlike EGL contexts, Vulkan instances are designed to be used concurrently - * by multiple application threads. An application can have multiple concurrent - * instances (although this is less common than with OpenGL ES applications), + * Vulkan instances are designed to be used concurrently by multiple + * application threads. An application can have multiple concurrent instances, * and use each instance from multiple threads. * * Access to the layer driver structures must therefore be kept thread-safe. @@ -65,10 +64,6 @@ /** * @brief This class implements the layer state tracker for a single instance. - * - * These objects are relatively light-weight, as they are rarely used once a VkDevice has been - * created, but we need to track the chain-of-ownership as the instance is the root object that - * the application creates when initializing a rendering context. */ class Instance { @@ -87,6 +82,8 @@ class Instance * @brief Fetch an instance from the global store of dispatchable instances. * * @param handle The dispatchable instance handle to use as an indirect lookup. + * + * @return The layer instance context. */ static Instance* retrieve( VkInstance handle); @@ -95,6 +92,8 @@ class Instance * @brief Fetch an instance from the global store of dispatchable instances. * * @param handle The dispatchable physical device handle to use as an indirect lookup. + * + * @return The layer instance context. */ static Instance* retrieve( VkPhysicalDevice handle); diff --git a/layer_example/source/version.hpp.in b/layer_example/source/version.hpp.in index 50c30b9..5fcb9c3 100644 --- a/layer_example/source/version.hpp.in +++ b/layer_example/source/version.hpp.in @@ -24,9 +24,7 @@ */ /** - * @file - * This header implements placeholder templates that are populated by CMake - * during configure. + * @file Placeholder templates that are populated by CMake during configure. */ #pragma once