From 6eb6fb63054adf9497b27895909307d6711e43be Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Fri, 19 Dec 2025 11:47:58 +0000 Subject: [PATCH 1/3] Remove isEmulatingExtFrameBoundary --- layer_gpu_timeline/source/device.hpp | 8 ----- .../source/layer_device_functions_queue.cpp | 18 ++++------ .../source/layer_instance_functions.cpp | 33 ------------------- .../source/layer_instance_functions.hpp | 8 ----- 4 files changed, 6 insertions(+), 61 deletions(-) diff --git a/layer_gpu_timeline/source/device.hpp b/layer_gpu_timeline/source/device.hpp index e3bb6b8..937b007 100644 --- a/layer_gpu_timeline/source/device.hpp +++ b/layer_gpu_timeline/source/device.hpp @@ -185,14 +185,6 @@ class Device */ static const std::vector createInfoPatches; - /** - * @brief Is this layer emulating VK_EXT_frame_boundary? - * - * Set to @c true if layer is emulating on top of a driver that doesn't - * support it, @c false if layer knows driver supports it. - */ - bool isEmulatingExtFrameBoundary { false }; - private: /** * @brief State tracker for this device. diff --git a/layer_gpu_timeline/source/layer_device_functions_queue.cpp b/layer_gpu_timeline/source/layer_device_functions_queue.cpp index a4ba1a0..4159ae8 100644 --- a/layer_gpu_timeline/source/layer_device_functions_queue.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_queue.cpp @@ -107,7 +107,7 @@ static void emitCommandBufferMetadata(Device& layer, * @param workloadVisitor Visitor for the protobuf encoder. */ static void checkManualFrameBoundary( - Device* layer, + Device& layer, VkQueue queue, const void* pNext, bool isLastSubmit, @@ -118,10 +118,10 @@ static void checkManualFrameBoundary( if (ext && (ext->flags & VK_FRAME_BOUNDARY_FRAME_END_BIT_EXT)) { // Emulate a queue present to indicate end of frame - auto& tracker = layer->getStateTracker(); + auto& tracker = layer.getStateTracker(); tracker.queuePresent(); - TimelineProtobufEncoder::emitFrame(*layer, tracker.totalStats.getFrameCount(), getClockMonotonicRaw()); + TimelineProtobufEncoder::emitFrame(layer, tracker.totalStats.getFrameCount(), getClockMonotonicRaw()); // Emulate a new queue submit if work remains to submit if (!isLastSubmit) @@ -148,12 +148,6 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR(VkQueue queue, vku::safe_VkPresentInfoKHR safePresentInfo(pPresentInfo); auto* newPresentInfo = reinterpret_cast(&safePresentInfo); - // Remove emulated frame boundaries - if (layer->isEmulatingExtFrameBoundary) - { - vku::RemoveFromPnext(safePresentInfo, VK_STRUCTURE_TYPE_FRAME_BOUNDARY_EXT); - } - // Note that we assume QueuePresent is _always_ the end of a frame. // This is run with the lock held to ensure that all queue submit messages // are sent sequentially to the host tool @@ -194,7 +188,7 @@ VKAPI_ATTR VkResult VKAPI_CALL // Check for end of frame boundary bool isLast = i == submitCount - 1; - checkManualFrameBoundary(layer, queue, submit.pNext, isLast, workloadVisitor); + checkManualFrameBoundary(*layer, queue, submit.pNext, isLast, workloadVisitor); } // Release the lock to call into the driver @@ -232,7 +226,7 @@ VKAPI_ATTR VkResult VKAPI_CALL // Check for end of frame boundary bool isLast = i == submitCount - 1; - checkManualFrameBoundary(layer, queue, submit.pNext, isLast, workloadVisitor); + checkManualFrameBoundary(*layer, queue, submit.pNext, isLast, workloadVisitor); } // Release the lock to call into the driver @@ -270,7 +264,7 @@ VKAPI_ATTR VkResult VKAPI_CALL // Check for end of frame boundary bool isLast = i == submitCount - 1; - checkManualFrameBoundary(layer, queue, submit.pNext, isLast, workloadVisitor); + checkManualFrameBoundary(*layer, queue, submit.pNext, isLast, workloadVisitor); } // Release the lock to call into the driver diff --git a/layer_gpu_timeline/source/layer_instance_functions.cpp b/layer_gpu_timeline/source/layer_instance_functions.cpp index 0d36dfd..f152da2 100644 --- a/layer_gpu_timeline/source/layer_instance_functions.cpp +++ b/layer_gpu_timeline/source/layer_instance_functions.cpp @@ -31,39 +31,6 @@ extern std::mutex g_vulkanLock; -/* See header for documentation. */ -template <> -VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateDevice( - VkPhysicalDevice physicalDevice, - const VkDeviceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDevice* pDevice -) { - LAYER_TRACE(__func__); - - // Use the default function for the heavy-lifting - auto res = layer_vkCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice); - if (res != VK_SUCCESS) - { - return res; - } - - // Cache flags indicating extension emulation - std::unique_lock lock {g_vulkanLock}; - auto* layer = Device::retrieve(*pDevice); - - static const std::string target { VK_EXT_FRAME_BOUNDARY_EXTENSION_NAME }; - for (auto& ext : layer->instance->injectedDeviceExtensions) - { - if (ext.first == target) - { - layer->isEmulatingExtFrameBoundary = true; - } - } - - return res; -} - /* See Vulkan API for documentation. */ template <> VKAPI_ATTR void VKAPI_CALL layer_vkGetPhysicalDeviceFeatures2( diff --git a/layer_gpu_timeline/source/layer_instance_functions.hpp b/layer_gpu_timeline/source/layer_instance_functions.hpp index c2e6f1d..3bab9ec 100644 --- a/layer_gpu_timeline/source/layer_instance_functions.hpp +++ b/layer_gpu_timeline/source/layer_instance_functions.hpp @@ -29,14 +29,6 @@ // Functions for devices -/* See Vulkan API for documentation. */ -template <> -VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateDevice( - VkPhysicalDevice physicalDevice, - const VkDeviceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDevice* pDevice); - /* See Vulkan API for documentation. */ template <> VKAPI_ATTR void VKAPI_CALL layer_vkGetPhysicalDeviceFeatures2( From f12377969b3adc2970ceb0fdeeb9e0a6b9534e00 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Fri, 19 Dec 2025 11:49:15 +0000 Subject: [PATCH 2/3] Profile layer: Add per frame support --- layer_gpu_profile/README_LAYER.md | 33 +- layer_gpu_profile/android_build.sh | 2 +- layer_gpu_profile/layer_config.json | 3 +- layer_gpu_profile/source/CMakeLists.txt | 1 + layer_gpu_profile/source/device_utils.hpp | 3 +- layer_gpu_profile/source/instance.cpp | 4 +- layer_gpu_profile/source/layer_config.cpp | 77 +++-- layer_gpu_profile/source/layer_config.hpp | 53 +++- .../source/layer_device_functions.hpp | 7 + .../source/layer_device_functions_queue.cpp | 285 +++++++++++++++--- .../source/layer_instance_functions.cpp | 80 +++++ .../source/layer_instance_functions.hpp | 42 +++ lglpy/comms/service_gpu_profile.py | 122 ++++++-- 13 files changed, 616 insertions(+), 96 deletions(-) create mode 100644 layer_gpu_profile/source/layer_instance_functions.cpp create mode 100644 layer_gpu_profile/source/layer_instance_functions.hpp diff --git a/layer_gpu_profile/README_LAYER.md b/layer_gpu_profile/README_LAYER.md index 760fe3b..91f1499 100644 --- a/layer_gpu_profile/README_LAYER.md +++ b/layer_gpu_profile/README_LAYER.md @@ -113,19 +113,32 @@ application under test and the capture process. For full instructions see the ## Layer configuration -The current layer supports two `sampling_mode` values: +### Setting frame selection mode -* `periodic_frame`: Sample every N frames. -* `frame_list`: Sample specific frames. +The current layer supports the following ways to select frames to profile using +the `frame_mode` config option: -When `mode` is `periodic_frame` the integer value of the `periodic_frame` key -defines the frame sampling period. The integer value of the -`periodic_min_frame` key defines the first possible frame that could be -profiled, allowing profiles to skip over any loading frames. By default frame 0 -is ignored. +* `disabled`: Sampling is disabled. +* `periodic`: Sample every N frames. +* `list`: Sample specific frames. -When `mode` is `frame_list` the value of the `frame_list` key defines a list -of integers giving the specific frames to capture. +When frame selection mode is `periodic` the integer value of the +`periodic_frame` key defines the frame sampling period. The integer value of +the `periodic_min_frame` key defines the first possible frame that could be +profiled, allowing profiles to skip over any loading frames. By default frame +0 is ignored. + +When frame selection mode is `list` the value of the `frame_list` key defines +a list of integers giving the specific frames to capture. + +### Setting counter sampling mode + +The current layer supports the following ways to select how to sample counters +to profile using the `sample_mode` config option: + +* `disabled`: Sampling is disabled. +* `workload`: Sample every workload in each frame of interest. +* `frame`: Sample at the end of each frame of interest. ## Layer counters diff --git a/layer_gpu_profile/android_build.sh b/layer_gpu_profile/android_build.sh index 51489da..64e47ac 100755 --- a/layer_gpu_profile/android_build.sh +++ b/layer_gpu_profile/android_build.sh @@ -68,7 +68,7 @@ cmake \ -DCMAKE_WARN_DEPRECATED=OFF \ .. -cmake --build . -j4 +cmake --build . popd diff --git a/layer_gpu_profile/layer_config.json b/layer_gpu_profile/layer_config.json index c24a31a..6fa5d47 100644 --- a/layer_gpu_profile/layer_config.json +++ b/layer_gpu_profile/layer_config.json @@ -1,6 +1,7 @@ { "layer": "VK_LAYER_LGL_gpu_profile", - "sample_mode": "periodic_frame", + "frame_mode": "periodic", + "sample_mode": "frame", "periodic_min_frame": 1, "periodic_frame": 600, "frame_list": [] diff --git a/layer_gpu_profile/source/CMakeLists.txt b/layer_gpu_profile/source/CMakeLists.txt index 0b1bea8..51810ed 100644 --- a/layer_gpu_profile/source/CMakeLists.txt +++ b/layer_gpu_profile/source/CMakeLists.txt @@ -55,6 +55,7 @@ add_library( layer_device_functions_render_pass.cpp layer_device_functions_trace_rays.cpp layer_device_functions_transfer.cpp + layer_instance_functions.cpp submit_visitor.cpp) target_include_directories( diff --git a/layer_gpu_profile/source/device_utils.hpp b/layer_gpu_profile/source/device_utils.hpp index f0b3415..530cf50 100644 --- a/layer_gpu_profile/source/device_utils.hpp +++ b/layer_gpu_profile/source/device_utils.hpp @@ -58,7 +58,8 @@ VkCommandBuffer commandBuffer ) { // Don't instrument outside of active frame of interest - if(!layer.isFrameOfInterest) + bool isEnabled = layer.instance->config.isSamplingWorkloads(); + if(!layer.isFrameOfInterest || !isEnabled) { return; } diff --git a/layer_gpu_profile/source/instance.cpp b/layer_gpu_profile/source/instance.cpp index df4f55a..0aaf9b1 100644 --- a/layer_gpu_profile/source/instance.cpp +++ b/layer_gpu_profile/source/instance.cpp @@ -46,7 +46,9 @@ const std::vector Instance::requiredDriverExtensions { const std::vector> Instance::injectedInstanceExtensions {}; /* See header for documentation. */ -std::vector> Instance::injectedDeviceExtensions {}; +std::vector> Instance::injectedDeviceExtensions { + {VK_EXT_FRAME_BOUNDARY_EXTENSION_NAME, VK_EXT_FRAME_BOUNDARY_SPEC_VERSION} +}; /* See header for documentation. */ void Instance::store(VkInstance handle, std::unique_ptr& instance) diff --git a/layer_gpu_profile/source/layer_config.cpp b/layer_gpu_profile/source/layer_config.cpp index 1154401..ee4b365 100644 --- a/layer_gpu_profile/source/layer_config.cpp +++ b/layer_gpu_profile/source/layer_config.cpp @@ -43,45 +43,70 @@ /* See header for documentation. */ void LayerConfig::parseSamplingOptions(const json& config) { - // Decode top level options - std::string rawMode = config.at("sample_mode"); + // Decode frame selection mode + std::string rawFrameMode = config.at("frame_mode"); - if (rawMode == "disabled") + if (rawFrameMode == "disabled") { - mode = MODE_DISABLED; + frameMode = FRAME_SELECTION_DISABLED; } - else if (rawMode == "periodic_frame") + else if (rawFrameMode == "periodic") { - mode = MODE_PERIODIC_FRAME; + frameMode = FRAME_SELECTION_PERIODIC; periodicFrame = config.at("periodic_frame"); periodicMinFrame = config.at("periodic_min_frame"); } - else if (rawMode == "frame_list") + else if (rawFrameMode == "list") { - mode = MODE_FRAME_LIST; + frameMode = FRAME_SELECTION_LIST; specificFrames = config.at("frame_list").get>(); } else { - LAYER_ERR("Unknown counter sample_mode: %s", rawMode.c_str()); - rawMode = "disabled"; + LAYER_ERR("Unknown frame_mode: %s", rawFrameMode.c_str()); + frameMode = FRAME_SELECTION_DISABLED; + rawFrameMode = "disabled"; + } + + // Decode counter sampling mode + std::string rawSampleMode = config.at("sample_mode"); + + if (rawSampleMode == "disabled") + { + samplingMode = COUNTER_SAMPLING_DISABLED; + } + else if (rawSampleMode == "frame") + { + samplingMode = COUNTER_SAMPLING_FRAMES; + } + else if (rawSampleMode == "workload") + { + samplingMode = COUNTER_SAMPLING_WORKLOADS; + } + else + { + LAYER_ERR("Unknown sample_mode: %s", rawSampleMode.c_str()); + samplingMode = COUNTER_SAMPLING_DISABLED; + rawSampleMode = "disabled"; } LAYER_LOG("Layer sampling configuration"); LAYER_LOG("============================"); - LAYER_LOG(" - Sample mode: %s", rawMode.c_str()); + LAYER_LOG(" - Frame selection mode: %s", rawFrameMode.c_str()); - if (mode == MODE_PERIODIC_FRAME) + if (frameMode == FRAME_SELECTION_PERIODIC) { LAYER_LOG(" - Frame period: %" PRIu64, periodicFrame); LAYER_LOG(" - Minimum frame: %" PRIu64, periodicMinFrame); } - else if (mode == MODE_FRAME_LIST) + else if (frameMode == FRAME_SELECTION_LIST) { std::stringstream result; std::copy(specificFrames.begin(), specificFrames.end(), std::ostream_iterator(result, " ")); LAYER_LOG(" - Frames: %s", result.str().c_str()); } + + LAYER_LOG(" - Counter sampling mode: %s", rawSampleMode.c_str()); } /* See header for documentation. */ @@ -131,14 +156,14 @@ LayerConfig::LayerConfig() bool LayerConfig::isFrameOfInterest( uint64_t frameID ) const { - switch(mode) + switch(frameMode) { - case MODE_DISABLED: + case FRAME_SELECTION_DISABLED: return false; - case MODE_PERIODIC_FRAME: + case FRAME_SELECTION_PERIODIC: return (frameID >= periodicMinFrame) && ((frameID % periodicFrame) == 0); - case MODE_FRAME_LIST: + case FRAME_SELECTION_LIST: return isIn(frameID, specificFrames); } @@ -146,3 +171,21 @@ bool LayerConfig::isFrameOfInterest( return false; } +/* See header for documentation. */ +bool LayerConfig::isSamplingWorkloads() const { + return frameMode != FRAME_SELECTION_DISABLED && + samplingMode == COUNTER_SAMPLING_WORKLOADS; +} + +/* See header for documentation. */ +bool LayerConfig::isSamplingFrames() const { + return frameMode != FRAME_SELECTION_DISABLED && + samplingMode == COUNTER_SAMPLING_FRAMES; +} + +/* See header for documentation. */ +bool LayerConfig::isSamplingAny() const { + return frameMode != FRAME_SELECTION_DISABLED && + samplingMode != COUNTER_SAMPLING_DISABLED; +} + diff --git a/layer_gpu_profile/source/layer_config.hpp b/layer_gpu_profile/source/layer_config.hpp index 2c54cb3..17d2492 100644 --- a/layer_gpu_profile/source/layer_config.hpp +++ b/layer_gpu_profile/source/layer_config.hpp @@ -54,19 +54,50 @@ class LayerConfig * * @param frameID The index of the next frame. * - * @return True if profiling should be enabled, False otherwise. + * @return @c true if profiling should be enabled, @c false otherwise. */ bool isFrameOfInterest(uint64_t frameID) const; + /** + * @brief Test if we are sampling workloads. + * + * @return @c true if profiling workloads, @c false otherwise. + */ + bool isSamplingWorkloads() const; + + /** + * @brief Test if we are sampling frames. + * + * @return @c true if profiling frames, @c false otherwise. + */ + bool isSamplingFrames() const; + + /** + * @brief Test if any kind of sampling is active. + * + * @return @c true if profiling, @c false otherwise. + */ + bool isSamplingAny() const; + private: /** - * @brief Supported sampling modes. + * @brief Supported frame selection modes. + */ + enum FrameSelectionMode + { + FRAME_SELECTION_DISABLED, + FRAME_SELECTION_LIST, + FRAME_SELECTION_PERIODIC + }; + + /** + * @brief Supported counter sampling modes. */ - enum SamplingMode + enum CounterSamplingMode { - MODE_DISABLED, - MODE_FRAME_LIST, - MODE_PERIODIC_FRAME + COUNTER_SAMPLING_DISABLED, + COUNTER_SAMPLING_WORKLOADS, + COUNTER_SAMPLING_FRAMES }; /** @@ -79,9 +110,15 @@ class LayerConfig void parseSamplingOptions(const json& config); /** - * @brief The sampling mode. + * @brief The frame selection mode. */ - SamplingMode mode {MODE_DISABLED}; + FrameSelectionMode frameMode {FRAME_SELECTION_DISABLED}; + + /** + * @brief The counter sampling mode. + */ + CounterSamplingMode samplingMode {COUNTER_SAMPLING_DISABLED}; + /** * @brief The sampling period in frames, or 0 if disabled. diff --git a/layer_gpu_profile/source/layer_device_functions.hpp b/layer_gpu_profile/source/layer_device_functions.hpp index e0e6b10..d0eb6a3 100644 --- a/layer_gpu_profile/source/layer_device_functions.hpp +++ b/layer_gpu_profile/source/layer_device_functions.hpp @@ -419,3 +419,10 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueBindSparse(VkQueue queue, + uint32_t bindInfoCount, + const VkBindSparseInfo* pBindInfo, + VkFence fence); diff --git a/layer_gpu_profile/source/layer_device_functions_queue.cpp b/layer_gpu_profile/source/layer_device_functions_queue.cpp index 57c722d..fd7ba69 100644 --- a/layer_gpu_profile/source/layer_device_functions_queue.cpp +++ b/layer_gpu_profile/source/layer_device_functions_queue.cpp @@ -31,9 +31,9 @@ #include "trackers/queue.hpp" #include - #include #include +#include using json = nlohmann::json; @@ -50,6 +50,12 @@ static void processLayerCommandStream(Device& layer, VkQueue queue, VkCommandBuffer commandBuffer) { + // Skip doing this if we are not doing per-workload profiling + if (!layer.instance->config.isSamplingWorkloads()) + { + return; + } + // Fetch layer proxies for this workload auto& tracker = layer.getStateTracker(); auto& trackQueue = tracker.getQueue(queue); @@ -62,66 +68,174 @@ static void processLayerCommandStream(Device& layer, trackQueue.runSubmitCommandStream(cbLCS, workloadVisitor); } -/* See Vulkan API for documentation. */ -template<> -VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo) -{ - LAYER_TRACE(__func__); - - // Hold the lock to access layer-wide global store - std::unique_lock lock {g_vulkanLock}; - auto* layer = Device::retrieve(queue); - - auto& tracker = layer->getStateTracker(); +/** + * @brief Process frame boundaries + * + * @param layer The layer context. + */ +static void processFrameBoundaryPreSubmit( + Device& layer +) { + const auto& config = layer.instance->config; + auto& tracker = layer.getStateTracker(); tracker.queuePresent(); // End the previous frame if it was "of interest" - if (layer->isFrameOfInterest) + if (layer.isFrameOfInterest && config.isSamplingWorkloads()) { json endFrameMessage { { "type", "end_frame" } }; - layer->txMessage(endFrameMessage.dump()); + layer.txMessage(endFrameMessage.dump()); } uint64_t frameID = tracker.totalStats.getFrameCount(); - layer->isFrameOfInterest = layer->instance->config.isFrameOfInterest(frameID); + layer.isFrameOfInterest = layer.instance->config.isFrameOfInterest(frameID); // Start the next frame if it is "of interest" - if (layer->isFrameOfInterest) + if (layer.isFrameOfInterest && config.isSamplingWorkloads()) { json startFrameMessage { { "type", "start_frame" }, { "frame", frameID }, }; - layer->txMessage(startFrameMessage.dump()); + layer.txMessage(startFrameMessage.dump()); } +} - // If a "normal" frame then release the lock before calling in to the - // driver, otherwise keep the lock to stop other threads using Vulkan - // while we sync and reset the counter stream - if (!layer->isFrameOfInterest) +/** + * @brief Check a pNext chain for a manual frame boundary marker. + * + * Emits the necessary metadata to emulate a vkQueuePresent. + * + * @param layer The layer context. + * @param pNext The submit pNext pointer. + * + * @returns @c true if end of frame detected, @c false otherwise. + */ +static bool processManualFrameBoundaryPreSubmit( + Device& layer, + const void* pNext +) { + // Check for end of frame boundary + auto* ext = vku::FindStructInPNextChain(pNext); + if (ext && (ext->flags & VK_FRAME_BOUNDARY_FRAME_END_BIT_EXT)) { - lock.unlock(); + processFrameBoundaryPreSubmit(layer); + return true; } - auto ret = layer->driver.vkQueuePresentKHR(queue, pPresentInfo); + return false; +} +/** + * @brief Process frame boundaries + * + * @param layer The layer context. + * @param frameSample @c true to emit frame sample, @c false just to reset. + */ +static void processFrameBoundaryPostSubmit( + Device& layer, + bool frameSample +) { // If we are measuring performance ensure the previous frame has finished // and then take an initial sample to reset the counters - if (layer->isFrameOfInterest) + layer.driver.vkDeviceWaitIdle(layer.device); + workaroundDelay(); + auto ec = layer.lgcSampler->sample_now(); + if (ec) { - layer->driver.vkDeviceWaitIdle(layer->device); - workaroundDelay(); - auto ec = layer->lgcSampler->sample_now(); + LAYER_ERR("Failed to make libGPUCounters GPU counter sample"); + } + + // No sample data needed - just use it to reset the counters + if (!frameSample) + { + return; + } + + // Otherwise emit a frame sample data packet + auto& tracker = layer.getStateTracker(); + + // Frame count has already been incremented for next frame so decrement + // for reporting purposes + uint64_t frameID = tracker.totalStats.getFrameCount() - 1; + + json message { + { "type", "frame" }, + { "frame", frameID }, + { "counters", json::array() } + }; + + for (const auto& pair : layer.lgcActiveCounters) + { + hwcpipe::counter_sample sample; + ec = layer.lgcSampler->get_counter_value(pair.first, sample); if (ec) { - LAYER_ERR("Failed to make libGPUCounters GPU counter sample"); + LAYER_ERR("Failed to get libGPUCounters GPU counter value"); + continue; + } + + if (sample.type == hwcpipe::counter_sample::type::uint64) + { + json counter { + { pair.second, sample.value.uint64 }, + }; + + message["counters"].push_back(counter); + + } + else + { + json counter { + { pair.second, sample.value.float64 }, + }; + + message["counters"].push_back(counter); + } } + layer.txMessage(message.dump()); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(queue); + const auto& config = layer->instance->config; + + bool sampleLastFrame = layer->isFrameOfInterest && config.isSamplingFrames(); + + // Process frame boundary pre-flight handling for metadata + processFrameBoundaryPreSubmit(*layer); + + bool resetThisFrame = layer->isFrameOfInterest && config.isSamplingAny(); + + // If a "normal" frame then release the lock before calling in to the + // driver, otherwise keep the lock to stop other threads using Vulkan + // while we sync and reset the counter stream + if (!sampleLastFrame && !resetThisFrame) + { + lock.unlock(); + } + + auto ret = layer->driver.vkQueuePresentKHR(queue, pPresentInfo); + + // Process frame boundary post-flight isolation of frames + if (sampleLastFrame || resetThisFrame) + { + processFrameBoundaryPostSubmit(*layer, sampleLastFrame); + } + return ret; } @@ -135,11 +249,21 @@ VKAPI_ATTR VkResult VKAPI_CALL // Hold the lock to access layer-wide global store std::unique_lock lock {g_vulkanLock}; auto* layer = Device::retrieve(queue); + const auto& config = layer->instance->config; + + bool sampleLastFrame = layer->isFrameOfInterest && config.isSamplingFrames(); + + // TODO: Handling here does not split submits if a submit before the last + // indicates that it's the end of a frame, but I've never seen it done + bool isFrameEnd = processManualFrameBoundaryPreSubmit(*layer, pSubmits->pNext); + + sampleLastFrame = isFrameEnd && sampleLastFrame; + bool resetThisFrame = isFrameEnd && layer->isFrameOfInterest && config.isSamplingAny(); // If a "normal" frame then release the lock before calling in to the // driver, otherwise keep the lock to stop other threads using Vulkan // while we sync and reset the counter stream - if (!layer->isFrameOfInterest) + if (!sampleLastFrame && !resetThisFrame) { lock.unlock(); } @@ -152,7 +276,7 @@ VKAPI_ATTR VkResult VKAPI_CALL // If we are measuring performance then run the layer command stream with // the lock held to stop other submits perturbing the counter data - if (layer->isFrameOfInterest) + if (layer->isFrameOfInterest && config.isSamplingWorkloads()) { for (uint32_t i = 0; i < submitCount; i++) { @@ -165,6 +289,11 @@ VKAPI_ATTR VkResult VKAPI_CALL } } + if (sampleLastFrame || resetThisFrame) + { + processFrameBoundaryPostSubmit(*layer, sampleLastFrame); + } + return res; } @@ -178,11 +307,21 @@ VKAPI_ATTR VkResult VKAPI_CALL // Hold the lock to access layer-wide global store std::unique_lock lock {g_vulkanLock}; auto* layer = Device::retrieve(queue); + const auto& config = layer->instance->config; + + bool sampleLastFrame = layer->isFrameOfInterest && config.isSamplingFrames(); + + // TODO: Handling here does not split submits if a submit before the last + // indicates that it's the end of a frame, but I've never seen it done + bool isFrameEnd = processManualFrameBoundaryPreSubmit(*layer, pSubmits->pNext); + + sampleLastFrame = isFrameEnd && sampleLastFrame; + bool resetThisFrame = isFrameEnd && layer->isFrameOfInterest && config.isSamplingAny(); // If a "normal" frame then release the lock before calling in to the // driver, otherwise keep the lock to stop other threads using Vulkan // while we sync and reset the counter stream - if (!layer->isFrameOfInterest) + if (!sampleLastFrame && !resetThisFrame) { lock.unlock(); } @@ -195,7 +334,7 @@ VKAPI_ATTR VkResult VKAPI_CALL // If we are measuring performance then run the layer command stream with // the lock held to stop other submits perturbing the counter data - if (layer->isFrameOfInterest) + if (layer->isFrameOfInterest && config.isSamplingWorkloads()) { for (uint32_t i = 0; i < submitCount; i++) { @@ -208,6 +347,11 @@ VKAPI_ATTR VkResult VKAPI_CALL } } + if (sampleLastFrame || resetThisFrame) + { + processFrameBoundaryPostSubmit(*layer, sampleLastFrame); + } + return res; } @@ -221,24 +365,34 @@ VKAPI_ATTR VkResult VKAPI_CALL // Hold the lock to access layer-wide global store std::unique_lock lock {g_vulkanLock}; auto* layer = Device::retrieve(queue); + const auto& config = layer->instance->config; + + bool sampleLastFrame = layer->isFrameOfInterest && config.isSamplingFrames(); + + // TODO: Handling here does not split submits if a submit before the last + // indicates that it's the end of a frame, but I've never seen it done + bool isFrameEnd = processManualFrameBoundaryPreSubmit(*layer, pSubmits->pNext); + + sampleLastFrame = isFrameEnd && sampleLastFrame; + bool resetThisFrame = isFrameEnd && layer->isFrameOfInterest && config.isSamplingAny(); // If a "normal" frame then release the lock before calling in to the // driver, otherwise keep the lock to stop other threads using Vulkan // while we sync and reset the counter stream - if (!layer->isFrameOfInterest) + if (!sampleLastFrame && !resetThisFrame) { lock.unlock(); } auto res = layer->driver.vkQueueSubmit2KHR(queue, submitCount, pSubmits, fence); - if (res != VK_SUCCESS || !layer->isFrameOfInterest) + if (res != VK_SUCCESS) { return res; } // If we are measuring performance then run the layer command stream with // the lock held to stop other submits perturbing the counter data - if (layer->isFrameOfInterest) + if (layer->isFrameOfInterest && config.isSamplingWorkloads()) { for (uint32_t i = 0; i < submitCount; i++) { @@ -251,5 +405,66 @@ VKAPI_ATTR VkResult VKAPI_CALL } } + if (sampleLastFrame || resetThisFrame) + { + processFrameBoundaryPostSubmit(*layer, sampleLastFrame); + } + + return res; +} + +/** + * See Vulkan API for documentation. + * + * Note: Modelling of this function is only implemented to support manual frame + * boundaries. There is no reporting of the workload associated with bind + * sparse submissions in the Mali timeline driver data model. + */ +template <> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueBindSparse( + VkQueue queue, + uint32_t bindInfoCount, + const VkBindSparseInfo* pBindInfo, + VkFence fence +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(queue); + const auto& config = layer->instance->config; + + bool sampleLastFrame = layer->isFrameOfInterest && config.isSamplingFrames(); + + // Scan infos for frame boundaries + bool isFrameEnd { false }; + for (uint32_t i = 0; i < bindInfoCount; i++) + { + const auto& info = pBindInfo[i]; + isFrameEnd |= processManualFrameBoundaryPreSubmit(*layer, info.pNext); + } + + sampleLastFrame = isFrameEnd && sampleLastFrame; + bool resetThisFrame = isFrameEnd && layer->isFrameOfInterest && config.isSamplingAny(); + + // If a "normal" frame then release the lock before calling in to the + // driver, otherwise keep the lock to stop other threads using Vulkan + // while we sync and reset the counter stream + if (!sampleLastFrame && !resetThisFrame) + { + lock.unlock(); + } + + auto res = layer->driver.vkQueueBindSparse(queue, bindInfoCount, pBindInfo, fence); + if (res != VK_SUCCESS) + { + return res; + } + + if (sampleLastFrame || resetThisFrame) + { + processFrameBoundaryPostSubmit(*layer, sampleLastFrame); + } + return res; } diff --git a/layer_gpu_profile/source/layer_instance_functions.cpp b/layer_gpu_profile/source/layer_instance_functions.cpp new file mode 100644 index 0000000..f152da2 --- /dev/null +++ b/layer_gpu_profile/source/layer_instance_functions.cpp @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include "instance.hpp" +#include "device.hpp" + +#include +#include + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkGetPhysicalDeviceFeatures2( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2* pFeatures +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Instance::retrieve(physicalDevice); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkGetPhysicalDeviceFeatures2(physicalDevice, pFeatures); + + // Patch the query response to show that it is supported + auto* ext = vku::FindStructInPNextChain(pFeatures->pNext); + if (ext) + { + ext->frameBoundary = VK_TRUE; + } +} + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkGetPhysicalDeviceFeatures2KHR( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2* pFeatures +) { + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock { g_vulkanLock }; + auto* layer = Instance::retrieve(physicalDevice); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkGetPhysicalDeviceFeatures2KHR(physicalDevice, pFeatures); + + // Patch the query response to show that it is supported + auto* ext = vku::FindStructInPNextChain(pFeatures->pNext); + if (ext) + { + ext->frameBoundary = VK_TRUE; + } +} diff --git a/layer_gpu_profile/source/layer_instance_functions.hpp b/layer_gpu_profile/source/layer_instance_functions.hpp new file mode 100644 index 0000000..3bab9ec --- /dev/null +++ b/layer_gpu_profile/source/layer_instance_functions.hpp @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#include + +// Functions for devices + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkGetPhysicalDeviceFeatures2( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2* pFeatures); + +/* See Vulkan API for documentation. */ +template <> +VKAPI_ATTR void VKAPI_CALL layer_vkGetPhysicalDeviceFeatures2KHR( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2* pFeatures); diff --git a/lglpy/comms/service_gpu_profile.py b/lglpy/comms/service_gpu_profile.py index 7c30e5e..adea4c3 100644 --- a/lglpy/comms/service_gpu_profile.py +++ b/lglpy/comms/service_gpu_profile.py @@ -27,6 +27,7 @@ ''' import csv +import enum import json import os from typing import Any, Optional, TypedDict, Union @@ -51,13 +52,30 @@ class EndFrameMessage(TypedDict): class WorkloadMessage(TypedDict): ''' - Type information for any workload JSON message. + Type information for any per-workload sample JSON message. ''' type: str counters: list[dict[str, Union[int, float]]] labels: list[str] +class FrameMessage(TypedDict): + ''' + Type information for a per-frame sample JSON message. + ''' + type: str + counters: list[dict[str, Union[int, float]]] + frame: int + + +class SampleMode(enum.Enum): + ''' + Type of sampling detected. + ''' + PER_WORKLOAD = 0 + PER_FRAME = 1 + + class GPUProfileService: ''' A service for handling network comms from the layer_gpu_profile layer. @@ -73,9 +91,13 @@ def __init__(self, dir_path: str, verbose: bool = False): ''' self.base_dir = dir_path + # Sample mode is detected on the fly when we get our first data + self.sample_mode = SampleMode.PER_WORKLOAD + self.frame_id: Optional[int] = None - self.frame_header: Optional[list[str]] = None - self.frame_data: Optional[list[list[str]]] = None + + self.table_header: Optional[list[str]] = None + self.table_data: list[list[str]] = [] os.makedirs(dir_path, exist_ok=True) @@ -96,8 +118,8 @@ def handle_start_frame(self, message: StartFrameMessage): message: The decoded JSON. ''' self.frame_id = message["frame"] - self.frame_header = None - self.frame_data = [] + self.table_header = None + self.table_data.clear() def handle_end_frame(self, message: EndFrameMessage): ''' @@ -110,25 +132,24 @@ def handle_end_frame(self, message: EndFrameMessage): del message assert self.frame_id is not None - assert self.frame_header is not None - assert self.frame_data is not None + assert self.table_header is not None # Emit the CSV file print(f'Generating CSV for frame {self.frame_id}') path = os.path.join(self.base_dir, f'frame_{self.frame_id:05d}.csv') with open(path, 'w', newline='') as handle: writer = csv.writer(handle) - writer.writerow(self.frame_header) - writer.writerows(self.frame_data) + writer.writerow(self.table_header) + writer.writerows(self.table_data) # Reset the state self.frame_id = None - self.frame_header = None - self.frame_data = None + self.table_header = None + self.table_data.clear() def create_workload_header(self, message: WorkloadMessage): ''' - Create a table header row from a workload. + Create a table header row from a workload sample. Args: message: The decoded JSON. @@ -142,22 +163,21 @@ def create_workload_header(self, message: WorkloadMessage): columns.append(key) columns.append('Label') - self.frame_header = columns + self.table_header = columns def create_workload_data(self, message: WorkloadMessage): ''' - Create a table data row from a workload. + Create a table data row from a workload sample. Args: message: The decoded JSON. ''' assert self.frame_id is not None - assert self.frame_header is not None - assert self.frame_data is not None + assert self.table_header is not None columns: list[str] = [] - columns.append(str(len(self.frame_data))) + columns.append(str(len(self.table_data))) columns.append(message['type']) for counter in message['counters']: @@ -165,20 +185,76 @@ def create_workload_data(self, message: WorkloadMessage): columns.append(f'{value:0.2f}') columns.append('|'.join(message['labels'])) - self.frame_data.append(columns) + self.table_data.append(columns) - def handle_workload(self, message: WorkloadMessage): + def handle_workload_sample(self, message: WorkloadMessage): ''' - Handle a workload message. + Handle a workload sample message. Args: message: The decoded JSON. ''' - if not self.frame_header: + if not self.table_header: self.create_workload_header(message) self.create_workload_data(message) + def create_frame_header(self, message: FrameMessage): + ''' + Create a table header row from a frame sample. + Args: + message: The decoded JSON. + ''' + columns = [] + + columns.append('Frame ID') + for counter in message['counters']: + key = list(counter.keys())[0] + columns.append(key) + + self.table_header = columns + + def create_frame_data(self, message: FrameMessage): + ''' + Create a table data row from a frame sample. + + Args: + message: The decoded JSON. + ''' + assert self.table_header is not None + + columns: list[str] = [] + + columns.append(f'{self.frame_id}') + + for counter in message['counters']: + value = list(counter.values())[0] + columns.append(f'{value:0.2f}') + + self.table_data.append(columns) + + def handle_frame_sample(self, message: FrameMessage): + ''' + Handle a frame message. + + Args: + message: The decoded JSON. + ''' + self.frame_id = message['frame'] + + if not self.table_header: + self.create_frame_header(message) + + assert self.table_header is not None + self.create_frame_data(message) + + print(f'Updating CSV for frame {self.frame_id}') + path = os.path.join(self.base_dir, f'capture.csv') + with open(path, 'w', newline='') as handle: + writer = csv.writer(handle) + writer.writerow(self.table_header) + writer.writerows(self.table_data) + def handle_message(self, message: Message) -> None: ''' Handle a service request from a layer. @@ -193,5 +269,7 @@ def handle_message(self, message: Message) -> None: self.handle_start_frame(payload) elif payload['type'] == 'end_frame': self.handle_end_frame(payload) + elif payload['type'] == 'frame': + self.handle_frame_sample(payload) else: - self.handle_workload(payload) + self.handle_workload_sample(payload) From 3f2e89bfc850af02d218f8b8348f9bd96b360b53 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Fri, 19 Dec 2025 16:20:29 +0000 Subject: [PATCH 3/3] Profile layer: Add frame serialization config option --- .github/workflows/native_test.yaml | 2 ++ .github/workflows/python_test.yaml | 2 ++ layer_gpu_profile/README_LAYER.md | 8 +++++++ layer_gpu_profile/layer_config.json | 3 ++- layer_gpu_profile/source/layer_config.cpp | 23 ++++++++++++++++--- layer_gpu_profile/source/layer_config.hpp | 11 +++++++++ .../source/layer_device_functions_queue.cpp | 10 ++++++-- 7 files changed, 53 insertions(+), 6 deletions(-) diff --git a/.github/workflows/native_test.yaml b/.github/workflows/native_test.yaml index 1e4e3f2..8ed1c3e 100644 --- a/.github/workflows/native_test.yaml +++ b/.github/workflows/native_test.yaml @@ -11,12 +11,14 @@ on: paths-ignore: - 'lglpy/**' - '**/*.md' + - '**/*.json' pull_request: branches: - main paths-ignore: - 'lglpy/**' - '**/*.md' + - '**/*.json' env: CMAKE_BUILD_PARALLEL_LEVEL: '8' diff --git a/.github/workflows/python_test.yaml b/.github/workflows/python_test.yaml index d700acc..67b3a8f 100644 --- a/.github/workflows/python_test.yaml +++ b/.github/workflows/python_test.yaml @@ -10,11 +10,13 @@ on: - '*' paths-ignore: - '**/*.md' + - '**/*.json' pull_request: branches: - main paths-ignore: - '**/*.md' + - '**/*.json' jobs: python-test: diff --git a/layer_gpu_profile/README_LAYER.md b/layer_gpu_profile/README_LAYER.md index 91f1499..de4d3bc 100644 --- a/layer_gpu_profile/README_LAYER.md +++ b/layer_gpu_profile/README_LAYER.md @@ -140,6 +140,14 @@ to profile using the `sample_mode` config option: * `workload`: Sample every workload in each frame of interest. * `frame`: Sample at the end of each frame of interest. +By default per-frame samples are isolated from other frames by inserting a +`vkDeviceWaitIdle()` before and after the frame to ensure that workload +in the sampled region does not overlap neighboring frames. Setting the +`frame_serialization` config option to `false` will allow frames to overlap +without serialization, but can add noise to the returned counter values. This +option has no effect for per-workload sampling, which must always use +serialization. + ## Layer counters The current layer uses a hard-coded set of performance counters defined in the diff --git a/layer_gpu_profile/layer_config.json b/layer_gpu_profile/layer_config.json index 6fa5d47..cd114c5 100644 --- a/layer_gpu_profile/layer_config.json +++ b/layer_gpu_profile/layer_config.json @@ -4,5 +4,6 @@ "sample_mode": "frame", "periodic_min_frame": 1, "periodic_frame": 600, - "frame_list": [] + "frame_list": [], + "frame_serialization": true } diff --git a/layer_gpu_profile/source/layer_config.cpp b/layer_gpu_profile/source/layer_config.cpp index ee4b365..ab90cb8 100644 --- a/layer_gpu_profile/source/layer_config.cpp +++ b/layer_gpu_profile/source/layer_config.cpp @@ -90,6 +90,9 @@ void LayerConfig::parseSamplingOptions(const json& config) rawSampleMode = "disabled"; } + // Decode frame serialization mode + frameSerialization = config.at("frame_serialization"); + LAYER_LOG("Layer sampling configuration"); LAYER_LOG("============================"); LAYER_LOG(" - Frame selection mode: %s", rawFrameMode.c_str()); @@ -107,6 +110,11 @@ void LayerConfig::parseSamplingOptions(const json& config) } LAYER_LOG(" - Counter sampling mode: %s", rawSampleMode.c_str()); + + if (samplingMode == COUNTER_SAMPLING_FRAMES) + { + LAYER_LOG(" - Frame serialization: %u", frameSerialization); + } } /* See header for documentation. */ @@ -172,20 +180,29 @@ bool LayerConfig::isFrameOfInterest( } /* See header for documentation. */ -bool LayerConfig::isSamplingWorkloads() const { +bool LayerConfig::isSamplingWorkloads() const +{ return frameMode != FRAME_SELECTION_DISABLED && samplingMode == COUNTER_SAMPLING_WORKLOADS; } /* See header for documentation. */ -bool LayerConfig::isSamplingFrames() const { +bool LayerConfig::isSamplingFrames() const +{ return frameMode != FRAME_SELECTION_DISABLED && samplingMode == COUNTER_SAMPLING_FRAMES; } /* See header for documentation. */ -bool LayerConfig::isSamplingAny() const { +bool LayerConfig::isSamplingAny() const +{ return frameMode != FRAME_SELECTION_DISABLED && samplingMode != COUNTER_SAMPLING_DISABLED; } +/* See header for documentation. */ +bool LayerConfig::isSerializingFrames() const +{ + return isSamplingWorkloads() || + (isSamplingFrames() && frameSerialization); +}; diff --git a/layer_gpu_profile/source/layer_config.hpp b/layer_gpu_profile/source/layer_config.hpp index 17d2492..9c88d4b 100644 --- a/layer_gpu_profile/source/layer_config.hpp +++ b/layer_gpu_profile/source/layer_config.hpp @@ -79,6 +79,13 @@ class LayerConfig */ bool isSamplingAny() const; + /** + * @brief Test if we are serializing frames. + * + * @return @c true if serializing, @c false otherwise. + */ + bool isSerializingFrames() const; + private: /** * @brief Supported frame selection modes. @@ -119,6 +126,10 @@ class LayerConfig */ CounterSamplingMode samplingMode {COUNTER_SAMPLING_DISABLED}; + /** + * @brief The frame sample serialization mode. + */ + bool frameSerialization {true}; /** * @brief The sampling period in frames, or 0 if disabled. diff --git a/layer_gpu_profile/source/layer_device_functions_queue.cpp b/layer_gpu_profile/source/layer_device_functions_queue.cpp index fd7ba69..6a8b104 100644 --- a/layer_gpu_profile/source/layer_device_functions_queue.cpp +++ b/layer_gpu_profile/source/layer_device_functions_queue.cpp @@ -140,10 +140,16 @@ static void processFrameBoundaryPostSubmit( Device& layer, bool frameSample ) { + const auto& config = layer.instance->config; + // If we are measuring performance ensure the previous frame has finished // and then take an initial sample to reset the counters - layer.driver.vkDeviceWaitIdle(layer.device); - workaroundDelay(); + if (config.isSerializingFrames()) + { + layer.driver.vkDeviceWaitIdle(layer.device); + workaroundDelay(); + } + auto ec = layer.lgcSampler->sample_now(); if (ec) {