From 232f3cf33eaf50f0683a5c73d8cd907716d525bc Mon Sep 17 00:00:00 2001
From: Peter Harris <peter.harris@arm.com>
Date: Mon, 2 Dec 2024 15:03:28 +0000
Subject: [PATCH 1/8] Add preliminary gpu_timeline layer

---
 .../source/layer_device_functions.hpp         |   2 +
 layer_gpu_timeline/CMakeLists.txt             |   4 +-
 layer_gpu_timeline/README_LAYER.md            | 164 +++++++
 .../docs/command_buffer_model.md              | 155 +++++++
 layer_gpu_timeline/source/CMakeLists.txt      |  13 +-
 layer_gpu_timeline/source/device.cpp          |  14 +
 layer_gpu_timeline/source/device.hpp          |  39 +-
 .../source/layer_device_functions.hpp         | 388 ++++++++++++++++
 .../layer_device_functions_command_buffer.cpp | 161 +++++++
 .../layer_device_functions_command_pool.cpp   | 105 +++++
 .../source/layer_device_functions_debug.cpp   | 123 +++++
 .../layer_device_functions_dispatch.cpp       | 131 ++++++
 .../layer_device_functions_draw_call.cpp      | 251 ++++++++++
 .../source/layer_device_functions_queue.cpp   | 129 ++++++
 .../layer_device_functions_render_pass.cpp    | 433 ++++++++++++++++++
 .../layer_device_functions_trace_rays.cpp     | 109 +++++
 layer_gpu_timeline/source/timeline_comms.cpp  |  51 +++
 layer_gpu_timeline/source/timeline_comms.hpp  |  42 ++
 lgl_host_server.py                            |  18 +-
 lglpy/service_gpu_timeline.py                 |  42 ++
 source_common/CMakeLists.txt                  |   1 +
 source_common/comms/comms_module.cpp          |   7 +
 source_common/trackers/CMakeLists.txt         |  46 ++
 source_common/trackers/command_buffer.cpp     | 185 ++++++++
 source_common/trackers/command_buffer.hpp     | 218 +++++++++
 source_common/trackers/device.cpp             | 175 +++++++
 source_common/trackers/device.hpp             | 208 +++++++++
 .../trackers/layer_command_stream.cpp         | 161 +++++++
 .../trackers/layer_command_stream.hpp         | 196 ++++++++
 source_common/trackers/queue.cpp              |  67 +++
 source_common/trackers/queue.hpp              |  99 ++++
 source_common/trackers/render_pass.cpp        | 386 ++++++++++++++++
 source_common/trackers/render_pass.hpp        | 194 ++++++++
 source_common/trackers/stats.hpp              | 227 +++++++++
 source_common/utils/misc.hpp                  |  61 ++-
 35 files changed, 4588 insertions(+), 17 deletions(-)
 create mode 100644 layer_gpu_timeline/README_LAYER.md
 create mode 100644 layer_gpu_timeline/docs/command_buffer_model.md
 create mode 100644 layer_gpu_timeline/source/layer_device_functions.hpp
 create mode 100644 layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp
 create mode 100644 layer_gpu_timeline/source/layer_device_functions_command_pool.cpp
 create mode 100644 layer_gpu_timeline/source/layer_device_functions_debug.cpp
 create mode 100644 layer_gpu_timeline/source/layer_device_functions_dispatch.cpp
 create mode 100644 layer_gpu_timeline/source/layer_device_functions_draw_call.cpp
 create mode 100644 layer_gpu_timeline/source/layer_device_functions_queue.cpp
 create mode 100644 layer_gpu_timeline/source/layer_device_functions_render_pass.cpp
 create mode 100644 layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp
 create mode 100644 layer_gpu_timeline/source/timeline_comms.cpp
 create mode 100644 layer_gpu_timeline/source/timeline_comms.hpp
 create mode 100644 lglpy/service_gpu_timeline.py
 create mode 100644 source_common/trackers/CMakeLists.txt
 create mode 100644 source_common/trackers/command_buffer.cpp
 create mode 100644 source_common/trackers/command_buffer.hpp
 create mode 100644 source_common/trackers/device.cpp
 create mode 100644 source_common/trackers/device.hpp
 create mode 100644 source_common/trackers/layer_command_stream.cpp
 create mode 100644 source_common/trackers/layer_command_stream.hpp
 create mode 100644 source_common/trackers/queue.cpp
 create mode 100644 source_common/trackers/queue.hpp
 create mode 100644 source_common/trackers/render_pass.cpp
 create mode 100644 source_common/trackers/render_pass.hpp
 create mode 100644 source_common/trackers/stats.hpp

diff --git a/layer_example/source/layer_device_functions.hpp b/layer_example/source/layer_device_functions.hpp
index df321c2..f3403c7 100644
--- a/layer_example/source/layer_device_functions.hpp
+++ b/layer_example/source/layer_device_functions.hpp
@@ -23,6 +23,8 @@
  * ----------------------------------------------------------------------------
  */
 
+#include <vulkan/vulkan.h>
+
 #include "framework/utils.hpp"
 
 /* See Vulkan API for documentation. */
diff --git a/layer_gpu_timeline/CMakeLists.txt b/layer_gpu_timeline/CMakeLists.txt
index b36ba8c..f3d8cd2 100644
--- a/layer_gpu_timeline/CMakeLists.txt
+++ b/layer_gpu_timeline/CMakeLists.txt
@@ -35,5 +35,7 @@ set(LGL_CONFIG_LOG 1)
 include(../source_common/compiler_helper.cmake)
 
 # Build steps
-add_subdirectory(source)
+add_subdirectory(../source_common/comms source_common/comms)
 add_subdirectory(../source_common/framework source_common/framework)
+add_subdirectory(../source_common/trackers source_common/trackers)
+add_subdirectory(source)
diff --git a/layer_gpu_timeline/README_LAYER.md b/layer_gpu_timeline/README_LAYER.md
new file mode 100644
index 0000000..8f8a56c
--- /dev/null
+++ b/layer_gpu_timeline/README_LAYER.md
@@ -0,0 +1,164 @@
+# Layer: GPU Timeline
+
+This layer is used with Arm GPUs for tracking submitted schedulable workloads
+and emitting semantic information about them. This data can be combined with
+the raw workload execution timing information captured using the Android
+Perfetto service, providing developers with a richer debug visualization.
+
+## What devices?
+
+The Arm GPU driver integration with the Perfetto render stages scheduler event
+trace is supported at production quality since the r47p0 driver version.
+However, associating semantics from this layer relies on a further integration
+with debug labels which requires an r51p0 or later driver version.
+
+## What workloads?
+
+A schedulable workload is the smallest workload that the Arm GPU command stream
+scheduler will issue to the GPU hardware work queues. This includes the
+following workload types:
+
+* Render passes, split into:
+  * Vertex or Binning phase
+  * Fragment or Main phase
+* Compute dispatches
+* Trace rays
+* Transfers to a buffer
+* Transfers to an image
+
+Most workloads are dispatched using a single API call, and are trivial to
+manage in the layer. However, render passes are more complex and need extra
+handling. In particular:
+
+* Render passes are issued using multiple API calls.
+* Useful render pass properties, such as draw count, are not known until the
+  render pass recording has ended.
+* Dynamic render passes using `vkCmdBeginRendering()` and `vkCmdEndRendering()`
+  can be suspended and resumed across command buffer boundaries. Properties
+  such as draw count are not defined by the scope of a single command buffer.
+
+## Tracking workloads
+
+This layer tracks workloads encoded in command buffers, and emits semantic
+metadata for each workload via a communications side-channel. A host tool
+combines the semantic data stream with the Perfetto data stream, using debug
+label tags injected by the layer as a common cross-reference to link across
+the streams.
+
+### Workload labelling
+
+Command stream labelling is implemented using `vkCmdDebugMarkerBeginEXT()`
+and `vkCmdDebugMarkerEndEXT()`, wrapping one layer-owned `tagID` label around
+each semantic workload. This `tagID` can unambiguously refer to this workload
+encoding, and metadata that we do not expect to change per submit will be
+emitted using the matching `tagID` as the sole identifier.
+
+_**TODO:** Dynamic `submitID` tracking is not yet implemented._
+
+The `tagID` label is encoded into the recorded command buffer which means, for
+reusable command buffers, it is not an unambiguous identifier of a specific
+running workload. To allow us to disambiguate specific workload instances, the
+layer can optionally add an outer wrapper of `submitID` labels around each
+submitted command buffer. This wrapper is only generated if the submit contains
+any command buffers that require the generation of a per-submit annex (see the
+following section for when this is needed).
+
+The `submitID.tagID` pair of IDs uniquely identifies a specific running
+workload, and can be used to attach an instance-specific metadata annex to a
+specific submitted workload rather than to the shared recorded command buffer.
+
+### Workload metadata for split render passes
+
+_**TODO:** Split render pass tracking is not yet implemented._
+
+Dynamic render passes can be split across multiple Begin/End pairs, including
+being split across command buffer boundaries. If these splits occur within a
+single primary command buffer, or its secondaries, it is handled transparently
+by the layer and it appears as a single message as if no splits occurred. If
+these splits occur across primary command buffer boundaries, then some
+additional work is required.
+
+In our design a `tagID` debug marker is only started when the render pass first
+starts (not on resume), and stopped at the end of the render pass (not on
+suspend). The same `tagID` is used to refer to all parts of the render pass,
+no matter how many times it was suspended and resumed.
+
+If a render pass splits across command buffers, we cannot precompute metrics
+based on `tagID` alone, even if the command buffers are one-time use. This is
+because we do not know what combination of submitted command buffers will be
+used, and so we cannot know what the render pass contains until submit time.
+Split render passes will emit a `submitID.tagID` metadata annex containing
+the parameters that can only be known at submit time.
+
+### Workload metadata for compute dispatches
+
+_**TODO:** Compute workgroup parsing from the SPIR-V is not yet implemented._
+
+Compute workload dispatch is simple to track, but one of the metadata items we
+want to export is the total size of the work space (work_group_count *
+work_group_size).
+
+The work group count is defined by the API call, but may be an indirect
+parameter (see indirect tracking above).
+
+The work group size is defined by the program pipeline, and is defined in the
+SPIR-V via a literal or a build-time specialization constant. To support this
+use case we will need to parse the SPIR-V when the pipeline is built, if
+SPIR-V is available.
+
+### Workload metadata for indirect calls
+
+_**TODO:** Indirect parameter tracking is not yet implemented._
+
+One of the valuable pieces of metadata that we want to present is the size of
+each workload. For render passes this is captured at API call time, but for
+other workloads the size can be an indirect parameter that is not known when
+the triggering API call is made.
+
+To capture indirect parameters we insert a transfer that copies the indirect
+parameters into a layer-owned buffer. To ensure exclusive use of the buffer and
+avoid data corruption, each buffer region used is unique to a specific `tagID`.
+Attempting to submit the same command buffer multiple times will result in
+the workload being serialized to avoid racy access to the buffer. Once the
+buffer has been retrieved by the layer, a metadata annex containing the
+indirect parameters will be emitted using the `submitID.tagID` pair. This may
+be some time later than the original submit.
+
+### Workload metadata for user-defined labels
+
+The workload metadata captures user-defined labels that the application
+provides using `vkCmdDebugMarkerBeginEXT()` and `vkCmdDebugMarkerEndEXT()`.
+These are a stack-based debug mechanism where `Begin` pushes a new entry on to
+to the stack, and `End` pops the the most recent level off the stack.
+
+Workloads are labelled with the stack values that existed when the workload
+was started. For render passes this is the value on the stack when, e.g.,
+`vkCmdBeginRenderPass()` was called. We do not capture any labels that exist
+inside the render pass.
+
+The debug label stack belongs to the queue, not to the command buffer, so the
+value of the label stack is not known until submit time. The debug information
+for a specific `submitID.tagID` pair is therefore provided as an annex at
+submit time once the stack can be resolved.
+
+## Message protocol
+
+For each workload in a command buffer, or part-workload in the case of a
+suspended render pass, we record a JSON metadata blob containing the payload
+we want to send.
+
+The low level protocol message contains:
+
+* Message type `uint8_t`
+* Sequence ID `uint64_t` (optional, implied by message type)
+* Tag ID `uint64_t`
+* JSON length `uint32_t`
+* JSON payload `uint8_t[]`
+
+Each workload will read whatever properties it can from the `tagID` metadata
+and will then merge in all fields from any subsequent `sequenceID.tagID`
+metadata that matches.
+
+- - -
+
+_Copyright © 2024, Arm Limited and contributors._
diff --git a/layer_gpu_timeline/docs/command_buffer_model.md b/layer_gpu_timeline/docs/command_buffer_model.md
new file mode 100644
index 0000000..e7422d7
--- /dev/null
+++ b/layer_gpu_timeline/docs/command_buffer_model.md
@@ -0,0 +1,155 @@
+# Layer: GPU Timeline - Command Buffer Modelling
+
+One of the main challenges of this layer driver is modelling behavior in queues
+and command buffers that is not known until submit time, and then taking
+appropriate actions based on the combination of both the head state of the
+queue and the content of the pre-recorded command buffers.
+
+Our design to solve this is a lightweight software command stream which is
+recorded when a command buffer is recorded, and then executed when the
+command buffer is submitted to the queue. Just like a real hardware command
+stream these commands can update state or trigger some other action we need
+performed.
+
+## Layer commands
+
+**MARKER_BEGIN(const std::string\*):**
+
+* Push a new marker into the queue debug label stack.
+
+**MARKER_END():**
+
+* Pop the latest marker from the queue debug label stack.
+
+**RENDERPASS_BEGIN(const json\*):**
+
+* Set the current workload to a new render pass with the passed metadata.
+
+**RENDERPASS_RESUME(const json\*):**
+
+* Update the current workload, which must be a render pass, with extra
+  draw count metadata.
+
+**COMPUTE_DISPATCH_BEGIN(const json\*):**
+
+* Set the current workload to a new compute dispatch with the passed metadata.
+
+**TRACE_RAYS_BEGIN(const json\*):**
+
+* Set the current workload to a new trace rays with the passed metadata.
+
+**BUFFER_TRANSFER_BEGIN(const json\*):**
+
+* Set the current workload to a new a buffer transfer.
+
+**IMAGE_TRANSFER(const json\*):**
+
+* Set the current workload to a new image transfer.
+
+**WORKLOAD_END():**
+
+* Mark the current workload as complete, and emit a built metadata entry for
+  it.
+
+## Layer command recording
+
+Command buffer recording is effectively building two separate state
+structures for the layer.
+
+The first is a per-workload or per-restart JSON structure that contains the
+metadata we need for that workload. For partial workloads - e.g. a dynamic
+render pass begin that has been suspended - this metadata will be partial and
+rely on later restart metadata to complete it.
+
+The second is the layer "command stream" that contains the bytecode commands
+to execute when the command buffer is submitted to the queue. These commands
+are very simple, consisting of a list of command+pointer pairs, where the
+pointer value may be unused by some commands. Commands are stored in a
+std::vector, but we reserve enough memory to store 256 commands without
+reallocating which is enough for the majority of command buffers we see in
+real applications.
+
+The command stream for a secondary command buffer is inlined into the primary
+command buffer during recording.
+
+###  Recording sequence
+
+When application records a new workload:
+
+  * A `tagID` is assigned and recorded using `vkCmdMarkerBegin()` label in the
+    Vulkan command stream _before_ the new workload is written to the command
+    stream.
+  * If workload is using indirect parameters, then a transfer job to copy
+    indirect parameters into a layer-owned buffer is emitted _before_ the new
+    workload. No additional barrier is needed because application barriers must
+    have already ensured that the indirect parameter buffer is valid.
+  * A proxy workload object is created in the layer storing the assigned
+    `tagID` and all settings that are known at command recording time.
+  * A layer command stream command is recorded into the submit time stream
+    indicating `<TYPE>_BEGIN` with a pointer to the proxy workload. Note that
+    this JSON may be modified later for some workloads.
+  * If workload is using indirect parameters, a layer command stream command is
+    recorded into the resolve time stream, which will handle cleanup and
+    emitting the `submitID.tagID` annex message for the indirect data.
+  * If the command buffer is not ONE_TIME_SUBMIT, if any workload is using
+    indirect parameters, or contains incomplete render passes, the command
+    buffer is marked as needing a `submitID` wrapper.
+  * The user command is written to the Vulkan command stream.
+
+When application resumes a render pass workload:
+
+  * A `tagID` of zero is assigned, but not emitted to the command stream.
+  * A layer command stream command is recorded into the submit time stream
+    indicating `<TYPE>_RESUME` with a pointer to the proxy workload. Note that
+    this JSON may be modified later for some workloads.
+  * The user command is written to the Vulkan command stream.
+
+When application ends a workload:
+
+  * For render pass workloads, any statistics accumulated since the last begin
+    are rolled up into the proxy workload object.
+  * For render pass workloads, the user command is written to the Vulkan
+    command stream.
+  * The command steam label scope is closed using `vkCmdMarkerEnd()`.
+
+## Layer command playback
+
+The persistent state for command playback belongs to the queues the command
+buffers are submitted to. The command stream bytecode is run by a bytecode
+interpreter associated with the state of the current queue, giving the
+interpreter access to the current `submitID` and queue debug label stack.
+
+###  Submitting sequence
+
+For each command buffer in the user submit:
+
+* If the command buffer needs a `submitID` we allocate a unique `submitID` and
+  create two new command buffers that will wrap the user command buffer with an
+  additional stack layer of debug label containing the `s<ID>` string. We will
+  inject a layer command stream async command to handle freeing the command
+  buffers.
+* The tool will process the submit-time layer commands, executing each command
+  to either update some state or emit
+* If there are any async layer commands, either recorded in the command buffer
+  or from the wrapping command buffers, we will need to add an async handler.
+  This cannot safely use the user fence or depend on any user object lifetime,
+  so we will add a layer-owned timeline semaphore to the submit which we can
+  wait on to determine when it is safe trigger the async work.
+
+## Future: Async commands
+
+One of our longer-term goals is to be able to capture indirect parameters,
+which will be available after-the-fact once the GPU has processed the command
+buffer. Once we have the data we can emit an annex message containing
+parameters for each indirect `submitID.tagID` pair in the command buffer.
+
+We need to be able to emit the metadata after the commands are complete,
+and correctly synchronize use of the indirect capture staging buffer
+if command buffers are reissued. My current thinking is that we would
+implement this using additional layer commands that are processed on submit,
+including support for async commands that run in a separate thread and
+wait on the command buffer completion fence before running.
+
+- - -
+
+_Copyright © 2024, Arm Limited and contributors._
diff --git a/layer_gpu_timeline/source/CMakeLists.txt b/layer_gpu_timeline/source/CMakeLists.txt
index d267712..45e55c3 100644
--- a/layer_gpu_timeline/source/CMakeLists.txt
+++ b/layer_gpu_timeline/source/CMakeLists.txt
@@ -43,7 +43,16 @@ add_library(
     ${VK_LAYER} SHARED
         ${PROJECT_SOURCE_DIR}/../source_common/framework/entry.cpp
         device.cpp
-        instance.cpp)
+        instance.cpp
+        layer_device_functions_command_buffer.cpp
+        layer_device_functions_command_pool.cpp
+        layer_device_functions_debug.cpp
+        layer_device_functions_dispatch.cpp
+        layer_device_functions_draw_call.cpp
+        layer_device_functions_queue.cpp
+        layer_device_functions_render_pass.cpp
+        layer_device_functions_trace_rays.cpp
+        timeline_comms.cpp)
 
 target_include_directories(
     ${VK_LAYER} PRIVATE
@@ -59,7 +68,9 @@ lgl_set_build_options(${VK_LAYER})
 
 target_link_libraries(
     ${VK_LAYER}
+        lib_layer_comms
         lib_layer_framework
+        lib_layer_trackers
         $<$<PLATFORM_ID:Android>:log>)
 
 if (CMAKE_BUILD_TYPE STREQUAL "Release")
diff --git a/layer_gpu_timeline/source/device.cpp b/layer_gpu_timeline/source/device.cpp
index 580e339..271aab3 100644
--- a/layer_gpu_timeline/source/device.cpp
+++ b/layer_gpu_timeline/source/device.cpp
@@ -29,6 +29,7 @@
 #include <sys/stat.h>
 #include <vector>
 
+#include "comms/comms_module.hpp"
 #include "framework/utils.hpp"
 
 #include "device.hpp"
@@ -36,6 +37,12 @@
 
 static std::unordered_map<void*, std::unique_ptr<Device>> g_devices;
 
+/* See header for documentation. */
+std::unique_ptr<Comms::CommsModule> Device::commsModule;
+
+/* See header for documentation. */
+std::unique_ptr<TimelineComms> Device::commsWrapper;
+
 /* See header for documentation. */
 void Device::store(
     VkDevice handle,
@@ -90,6 +97,13 @@ Device::Device(
     device(_device)
 {
     initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver);
+
+    // Init the shared comms module for the first device built
+    if (!commsModule)
+    {
+        commsModule = std::make_unique<Comms::CommsModule>("lglcomms");
+        commsWrapper = std::make_unique<TimelineComms>(*commsModule);
+    }
 }
 
 /* See header for documentation. */
diff --git a/layer_gpu_timeline/source/device.hpp b/layer_gpu_timeline/source/device.hpp
index d6ecad7..b04ace1 100644
--- a/layer_gpu_timeline/source/device.hpp
+++ b/layer_gpu_timeline/source/device.hpp
@@ -56,9 +56,12 @@
 
 #include <vulkan/vk_layer.h>
 
+#include "comms/comms_module.hpp"
 #include "framework/device_dispatch_table.hpp"
+#include "trackers/device.hpp"
 
 #include "instance.hpp"
+#include "timeline_comms.hpp"
 
 /**
  * @brief This class implements the layer state tracker for a single device.
@@ -127,7 +130,29 @@ class Device
      */
     ~Device();
 
+    /**
+     * @brief Callback for sending messages
+     */
+    void onWorkloadSubmit(const std::string& message)
+    {
+        commsWrapper->txMessage(message);
+    }
+
+    /**
+     * @brief Get the cumulative stats for this device.
+     */
+    Tracker::Device& getStateTracker()
+    {
+        return stateTracker;
+    }
+
 public:
+    /**
+     * @brief The driver function dispatch table.
+     */
+    DeviceDispatchTable driver {};
+
+private:
     /**
      * @brief The instance this device is created with.
      */
@@ -144,7 +169,17 @@ class Device
     const VkDevice device;
 
     /**
-     * @brief The driver function dispatch table.
+     * @brief State tracking for this device;
      */
-    DeviceDispatchTable driver {};
+    Tracker::Device stateTracker;
+
+    /**
+     * @brief Communications module.
+     */
+    static std::unique_ptr<Comms::CommsModule> commsModule;
+
+    /**
+     * @brief Communications module message encoder.
+     */
+    static std::unique_ptr<TimelineComms> commsWrapper;
 };
diff --git a/layer_gpu_timeline/source/layer_device_functions.hpp b/layer_gpu_timeline/source/layer_device_functions.hpp
new file mode 100644
index 0000000..129f1e6
--- /dev/null
+++ b/layer_gpu_timeline/source/layer_device_functions.hpp
@@ -0,0 +1,388 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <vulkan/vulkan.h>
+
+#include "framework/utils.hpp"
+
+// Functions for command pools
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool<user_tag>(
+    VkDevice device,
+    const VkCommandPoolCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkCommandPool* pCommandPool);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    VkCommandPoolResetFlags flags);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    const VkAllocationCallbacks* pAllocator);
+
+// Functions for command buffers
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkAllocateCommandBuffers<user_tag>(
+    VkDevice device,
+    const VkCommandBufferAllocateInfo* pAllocateInfo,
+    VkCommandBuffer* pCommandBuffers);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult layer_vkBeginCommandBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCommandBufferBeginInfo* pBeginInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t commandBufferCount,
+    const VkCommandBuffer* pCommandBuffers);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkCommandBufferResetFlags flags);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    uint32_t commandBufferCount,
+    const VkCommandBuffer* pCommandBuffers);
+
+// Functions for render passes
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo2* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo2* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass<user_tag>(
+    VkDevice device,
+    VkRenderPass renderPass,
+    const VkAllocationCallbacks* pAllocator);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    VkSubpassContents contents);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    const VkSubpassBeginInfo* pSubpassBeginInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    const VkSubpassBeginInfo* pSubpassBeginInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderingInfo* pRenderingInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderingInfo* pRenderingInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL  layer_vkCmdEndRendering<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL  layer_vkCmdEndRenderingKHR<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+// Functions for draw calls
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDraw<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t vertexCount,
+    uint32_t instanceCount,
+    uint32_t firstVertex,
+    uint32_t firstInstance);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexed<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t indexCount,
+    uint32_t instanceCount,
+    uint32_t firstIndex,
+    int32_t vertexOffset,
+    uint32_t firstInstance);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    uint32_t drawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCount<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCountKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    uint32_t drawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectByteCountEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t instanceCount,
+    uint32_t firstInstance,
+    VkBuffer counterBuffer,
+    VkDeviceSize counterBufferOffset,
+    uint32_t counterOffset,
+    uint32_t vertexStride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCount<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCountKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride);
+
+// Functions for compute dispatches
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t baseGroupX,
+    uint32_t baseGroupY,
+    uint32_t baseGroupZ,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t baseGroupX,
+    uint32_t baseGroupY,
+    uint32_t baseGroupZ,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset);
+
+// Commands for trace rays
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkDeviceAddress indirectDeviceAddress);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
+    VkDeviceAddress indirectDeviceAddress);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
+    uint32_t width,
+    uint32_t height,
+    uint32_t depth);
+
+// Functions for debug
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkDebugUtilsLabelEXT* pLabelInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT<user_tag>(
+    VkCommandBuffer commandBuffer);
+
+// Functions for queues
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(
+    VkQueue queue,
+    const VkPresentInfoKHR* pPresentInfo);
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo* pSubmits,
+    VkFence fence);
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo2* pSubmits,
+    VkFence fence);
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo2* pSubmits,
+    VkFence fence);
diff --git a/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp b/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp
new file mode 100644
index 0000000..75fb36b
--- /dev/null
+++ b/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp
@@ -0,0 +1,161 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkAllocateCommandBuffers<user_tag>(
+    VkDevice device,
+    const VkCommandBufferAllocateInfo* pAllocateInfo,
+    VkCommandBuffer* pCommandBuffers
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult result = layer->driver.vkAllocateCommandBuffers(
+        device, pAllocateInfo, pCommandBuffers);
+    if (result != VK_SUCCESS)
+    {
+        return result;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++)
+    {
+        tracker.allocateCommandBuffer(
+            pAllocateInfo->commandPool, pCommandBuffers[i]);
+    }
+
+    return result;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult layer_vkBeginCommandBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCommandBufferBeginInfo* pBeginInfo
+) {
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer);
+    cmdBuffer.reset();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkBeginCommandBuffer(commandBuffer, pBeginInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkCommandBufferResetFlags flags
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer);
+    cmdBuffer.reset();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkResetCommandBuffer(commandBuffer, flags);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    uint32_t commandBufferCount,
+    const VkCommandBuffer* pCommandBuffers
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    auto& tracker = layer->getStateTracker();
+    for (uint32_t i = 0; i < commandBufferCount; i++)
+    {
+        tracker.freeCommandBuffer(commandPool, pCommandBuffers[i]);
+    }
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkFreeCommandBuffers(
+        device, commandPool, commandBufferCount, pCommandBuffers);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t commandBufferCount,
+    const VkCommandBuffer* pCommandBuffers
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store and device-wide data
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& primary = tracker.getCommandBuffer(commandBuffer);
+
+    for (uint32_t i = 0; i < commandBufferCount; i++)
+    {
+        auto& secondary = tracker.getCommandBuffer(pCommandBuffers[i]);
+        primary.executeCommands(secondary);
+    }
+
+    // Release the lock to call into the main driver
+    lock.unlock();
+    layer->driver.vkCmdExecuteCommands(
+        commandBuffer, commandBufferCount, pCommandBuffers);
+}
diff --git a/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp b/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp
new file mode 100644
index 0000000..31bc1b4
--- /dev/null
+++ b/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp
@@ -0,0 +1,105 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool<user_tag>(
+    VkDevice device,
+    const VkCommandPoolCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkCommandPool* pCommandPool
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult result = layer->driver.vkCreateCommandPool(
+        device, pCreateInfo, pAllocator, pCommandPool);
+    if (result != VK_SUCCESS)
+    {
+        return result;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    tracker.createCommandPool(*pCommandPool);
+    return result;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    VkCommandPoolResetFlags flags
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    auto& tracker = layer->getStateTracker();
+    tracker.getCommandPool(commandPool).reset();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkResetCommandPool(device, commandPool, flags);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool<user_tag>(
+    VkDevice device,
+    VkCommandPool commandPool,
+    const VkAllocationCallbacks* pAllocator
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    auto& tracker = layer->getStateTracker();
+    tracker.destroyCommandPool(commandPool);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkDestroyCommandPool(device, commandPool, pAllocator);
+}
diff --git a/layer_gpu_timeline/source/layer_device_functions_debug.cpp b/layer_gpu_timeline/source/layer_device_functions_debug.cpp
new file mode 100644
index 0000000..4c1e1d9
--- /dev/null
+++ b/layer_gpu_timeline/source/layer_device_functions_debug.cpp
@@ -0,0 +1,123 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkDebugMarkerMarkerInfoEXT* pMarkerInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    // Increment the render pass counter in the tracker
+    cb.debugMarkerBegin(pMarkerInfo->pMarkerName);
+
+    // Note that we do not call the driver for user labels - they are
+    // emitted via the comms side-channel for each workload to avoid
+    // polluting the layer's use of the driver for tag labelling
+}
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    // Increment the render pass counter in the tracker
+    cb.debugMarkerEnd();
+
+    // Note that we do not call the driver for user labels - they are
+    // emitted via the comms side-channel for each workload to avoid
+    // polluting the layer's use of the driver for tag labelling
+}
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkDebugUtilsLabelEXT* pLabelInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    // Increment the render pass counter in the tracker
+    cb.debugMarkerBegin(pLabelInfo->pLabelName);
+
+    // Note that we do not call the driver for user labels - they are
+    // emitted via the comms side-channel for each workload to avoid
+    // polluting the layer's use of the driver for tag labelling
+}
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    // Increment the render pass counter in the tracker
+    cb.debugMarkerEnd();
+
+    // Note that we do not call the driver for user labels - they are
+    // emitted via the comms side-channel for each workload to avoid
+    // polluting the layer's use of the driver for tag labelling
+}
diff --git a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp
new file mode 100644
index 0000000..88bf568
--- /dev/null
+++ b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp
@@ -0,0 +1,131 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+static void registerDispatch(
+    Device* layer,
+    VkCommandBuffer commandBuffer
+) {
+    auto& state = layer->getStateTracker();
+    auto& stats = state.getCommandBuffer(commandBuffer).getStats();
+    stats.incDispatchCount();
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDispatch(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t baseGroupX,
+    uint32_t baseGroupY,
+    uint32_t baseGroupZ,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDispatch(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t baseGroupX,
+    uint32_t baseGroupY,
+    uint32_t baseGroupZ,
+    uint32_t groupCountX,
+    uint32_t groupCountY,
+    uint32_t groupCountZ
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDispatch(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDispatch(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDispatchIndirect(commandBuffer, buffer, offset);
+}
diff --git a/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp b/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp
new file mode 100644
index 0000000..49cf669
--- /dev/null
+++ b/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp
@@ -0,0 +1,251 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+static void registerDrawCall(
+    Device* layer,
+    VkCommandBuffer commandBuffer
+) {
+    auto& state = layer->getStateTracker();
+    auto& stats = state.getCommandBuffer(commandBuffer).getStats();
+    stats.incDrawCallCount();
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDraw<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t vertexCount,
+    uint32_t instanceCount,
+    uint32_t firstVertex,
+    uint32_t firstInstance
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDraw(commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexed<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t indexCount,
+    uint32_t instanceCount,
+    uint32_t firstIndex,
+    int32_t vertexOffset,
+    uint32_t firstInstance
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    uint32_t drawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCount<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndexedIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndexedIndirectCountKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirect<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    uint32_t drawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectByteCountEXT<user_tag>(
+    VkCommandBuffer commandBuffer,
+    uint32_t instanceCount,
+    uint32_t firstInstance,
+    VkBuffer counterBuffer,
+    VkDeviceSize counterBufferOffset,
+    uint32_t counterOffset,
+    uint32_t vertexStride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndirectByteCountEXT(commandBuffer, instanceCount, firstInstance, counterBuffer, counterBufferOffset, counterOffset, vertexStride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCount<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndirectCount(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdDrawIndirectCountKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer buffer,
+    VkDeviceSize offset,
+    VkBuffer countBuffer,
+    VkDeviceSize countBufferOffset,
+    uint32_t maxDrawCount,
+    uint32_t stride
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerDrawCall(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdDrawIndirectCountKHR(commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
+}
diff --git a/layer_gpu_timeline/source/layer_device_functions_queue.cpp b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
new file mode 100644
index 0000000..906a39e
--- /dev/null
+++ b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
@@ -0,0 +1,129 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+#include "utils/misc.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(
+    VkQueue queue,
+    const VkPresentInfoKHR* pPresentInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(queue);
+
+    auto& tracker = layer->getStateTracker();
+    tracker.queuePresent();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkQueuePresentKHR(queue, pPresentInfo);
+}
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo* pSubmits,
+    VkFence fence
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(queue);
+
+    using namespace std::placeholders;
+    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
+
+    auto& tracker = layer->getStateTracker();
+    auto& trackQueue = tracker.getQueue(queue);
+
+    for (uint32_t i = 0; i < submitCount; i++)
+    {
+        const auto& submit = pSubmits[i];
+        for (uint32_t j = 0; j < submit.commandBufferCount; j++)
+        {
+            auto& trackCB = tracker.getCommandBuffer(submit.pCommandBuffers[j]);
+            const auto& LCS = trackCB.getSubmitCommandStream();
+
+            trackQueue.runSubmitCommandStream(LCS, onSubmit);
+        }
+    }
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkQueueSubmit(queue, submitCount, pSubmits, fence);
+}
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo2* pSubmits,
+    VkFence fence
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(queue);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkQueueSubmit2(queue, submitCount, pSubmits, fence);
+}
+
+/* See Vulkan API for documentation. */
+template<>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR<user_tag>(
+    VkQueue queue,
+    uint32_t submitCount,
+    const VkSubmitInfo2* pSubmits,
+    VkFence fence
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(queue);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    return layer->driver.vkQueueSubmit2KHR(queue, submitCount, pSubmits, fence);
+}
diff --git a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp
new file mode 100644
index 0000000..3486da3
--- /dev/null
+++ b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp
@@ -0,0 +1,433 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+#include "framework/utils.hpp"
+#include "trackers/render_pass.hpp"
+
+extern std::mutex g_vulkanLock;
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult ret = layer->driver.vkCreateRenderPass(device, pCreateInfo, pAllocator, pRenderPass);
+    if (ret != VK_SUCCESS)
+    {
+        return ret;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    tracker.createRenderPass(*pRenderPass, *pCreateInfo);
+    return VK_SUCCESS;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo2* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult ret = layer->driver.vkCreateRenderPass2(device, pCreateInfo, pAllocator, pRenderPass);
+    if (ret != VK_SUCCESS)
+    {
+        return ret;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    tracker.createRenderPass(*pRenderPass, *pCreateInfo);
+    return VK_SUCCESS;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR<user_tag>(
+    VkDevice device,
+    const VkRenderPassCreateInfo2* pCreateInfo,
+    const VkAllocationCallbacks* pAllocator,
+    VkRenderPass* pRenderPass
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    VkResult ret = layer->driver.vkCreateRenderPass2KHR(device, pCreateInfo, pAllocator, pRenderPass);
+    if (ret != VK_SUCCESS)
+    {
+        return ret;
+    }
+
+    // Retake the lock to access layer-wide global store
+    lock.lock();
+    auto& tracker = layer->getStateTracker();
+    tracker.createRenderPass(*pRenderPass, *pCreateInfo);
+    return VK_SUCCESS;
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass<user_tag>(
+    VkDevice device,
+    VkRenderPass renderPass,
+    const VkAllocationCallbacks* pAllocator
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(device);
+
+    auto& tracker = layer->getStateTracker();
+    tracker.destroyRenderPass(renderPass);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkDestroyRenderPass(device, renderPass, pAllocator);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    VkSubpassContents contents
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    LAYER_LOG("A");
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    LAYER_LOG("B");
+    auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass);
+    uint32_t width = pRenderPassBegin->renderArea.extent.width;
+    uint32_t height = pRenderPassBegin->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    LAYER_LOG("C");
+    uint64_t tagID = cb.renderPassBegin(rp, width, height);
+
+    LAYER_LOG("D");
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    [[maybe_unused]] VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
+
+    LAYER_LOG("E");
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+    LAYER_LOG("F");
+    layer->driver.vkCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    const VkSubpassBeginInfo* pSubpassBeginInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass);
+    uint32_t width = pRenderPassBegin->renderArea.extent.width;
+    uint32_t height = pRenderPassBegin->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(rp, width, height);
+
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+    layer->driver.vkCmdBeginRenderPass2(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderPassBeginInfo* pRenderPassBegin,
+    const VkSubpassBeginInfo* pSubpassBeginInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass);
+    uint32_t width = pRenderPassBegin->renderArea.extent.width;
+    uint32_t height = pRenderPassBegin->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(rp, width, height);
+
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+    layer->driver.vkCmdBeginRenderPass2KHR(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderingInfo* pRenderingInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT;
+    bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT;
+
+    // Extract metadata for later use ...
+    Tracker::RenderPass rp(*pRenderingInfo);
+    uint32_t width = pRenderingInfo->renderArea.extent.width;
+    uint32_t height = pRenderingInfo->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(
+        rp, width, height, resuming, suspending);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+
+    // Emit the label only for new render passes
+    if (!resuming)
+    {
+        // Emit the unique workload tag into the command stream
+        std::string tagLabel = formatString("t%" PRIu64, tagID);
+        VkDebugUtilsLabelEXT tagInfo {
+            .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+            .pNext = nullptr,
+            .pLabelName = tagLabel.c_str(),
+            .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+        };
+
+        layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+    }
+
+    layer->driver.vkCmdBeginRendering(commandBuffer, pRenderingInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkRenderingInfo* pRenderingInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+
+    bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT;
+    bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT;
+
+    // Extract metadata for later use ...
+    Tracker::RenderPass rp(*pRenderingInfo);
+    uint32_t width = pRenderingInfo->renderArea.extent.width;
+    uint32_t height = pRenderingInfo->renderArea.extent.height;
+
+    // Notify the command buffer we are starting a new render pass
+    uint64_t tagID = cb.renderPassBegin(
+        rp, width, height, resuming, suspending);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+
+    // Emit the label only for new render passes
+    if (!resuming)
+    {
+        // Emit the unique workload tag into the command stream
+        std::string tagLabel = formatString("t%" PRIu64, tagID);
+        VkDebugUtilsLabelEXT tagInfo {
+            .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+            .pNext = nullptr,
+            .pLabelName = tagLabel.c_str(),
+            .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+        };
+
+        layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+    }
+
+    layer->driver.vkCmdBeginRenderingKHR(commandBuffer, pRenderingInfo);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Update the layer command stream in the tracker
+    auto& tracker = layer->getStateTracker();
+    LAYER_LOG(" - Command buffer: %p", (void*)commandBuffer);
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    cb.renderPassEnd();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdEndRenderPass(commandBuffer);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL  layer_vkCmdEndRendering<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Update the layer command stream in the tracker
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    bool suspending = cb.renderPassEnd();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdEndRendering(commandBuffer);
+    if (!suspending)
+    {
+        layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+    }
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL  layer_vkCmdEndRenderingKHR<user_tag>(
+    VkCommandBuffer commandBuffer
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    // Update the layer command stream in the tracker
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    bool suspending = cb.renderPassEnd();
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdEndRenderingKHR(commandBuffer);
+    if (!suspending)
+    {
+        layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+    }
+}
diff --git a/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp
new file mode 100644
index 0000000..5373747
--- /dev/null
+++ b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp
@@ -0,0 +1,109 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+static void registerTraceRays(
+    Device* layer,
+    VkCommandBuffer commandBuffer
+) {
+    auto& state = layer->getStateTracker();
+    auto& stats = state.getCommandBuffer(commandBuffer).getStats();
+    stats.incTraceRaysCount();
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkDeviceAddress indirectDeviceAddress
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerTraceRays(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
+    VkDeviceAddress indirectDeviceAddress
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerTraceRays(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdTraceRaysIndirectKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
+    const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
+    uint32_t width,
+    uint32_t height,
+    uint32_t depth
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    registerTraceRays(layer, commandBuffer);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    layer->driver.vkCmdTraceRaysKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
+}
\ No newline at end of file
diff --git a/layer_gpu_timeline/source/timeline_comms.cpp b/layer_gpu_timeline/source/timeline_comms.cpp
new file mode 100644
index 0000000..fbb496a
--- /dev/null
+++ b/layer_gpu_timeline/source/timeline_comms.cpp
@@ -0,0 +1,51 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+
+#include "timeline_comms.hpp"
+
+TimelineComms::TimelineComms(
+    Comms::CommsInterface& commsIf)
+{
+    comms = &commsIf;
+    if (comms->isConnected())
+    {
+        endpoint = comms->getEndpointID("GPUTimeline");
+    }
+}
+
+void TimelineComms::txMessage(
+    const std::string& message)
+{
+    // Message endpoint is not available
+    if (endpoint == 0)
+    {
+        return;
+    }
+
+    auto data = std::make_unique<Comms::MessageData>(message.begin(), message.end());
+    comms->txAsync(endpoint, std::move(data));
+}
diff --git a/layer_gpu_timeline/source/timeline_comms.hpp b/layer_gpu_timeline/source/timeline_comms.hpp
new file mode 100644
index 0000000..435f37a
--- /dev/null
+++ b/layer_gpu_timeline/source/timeline_comms.hpp
@@ -0,0 +1,42 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#pragma once
+
+#include "comms/comms_interface.hpp"
+
+class TimelineComms
+{
+public:
+    TimelineComms(
+        Comms::CommsInterface& comms);
+
+    void txMessage(
+        const std::string& message);
+
+private:
+    Comms::EndpointID endpoint { 0 };
+    Comms::CommsInterface* comms { nullptr };
+};
diff --git a/lgl_host_server.py b/lgl_host_server.py
index c7c1382..14d5941 100644
--- a/lgl_host_server.py
+++ b/lgl_host_server.py
@@ -32,6 +32,7 @@
 import threading
 
 import lglpy.server
+import lglpy.service_gpu_timeline
 import lglpy.service_test
 import lglpy.service_log
 
@@ -41,13 +42,18 @@ def main():
 
     # Register all the services with it
     print(f'Registering host services:')
-    test_service = lglpy.service_test.TestService()
-    endpoint_id = server.register_endpoint(test_service)
-    print(f'  - [{endpoint_id}] = {test_service.get_service_name()}')
+    service = lglpy.service_test.TestService()
+    endpoint_id = server.register_endpoint(service)
+    print(f'  - [{endpoint_id}] = {service.get_service_name()}')
+
+    service = lglpy.service_log.LogService()
+    endpoint_id = server.register_endpoint(service)
+    print(f'  - [{endpoint_id}] = {service.get_service_name()}')
+
+    service = lglpy.service_gpu_timeline.GPUTimelineService()
+    endpoint_id = server.register_endpoint(service)
+    print(f'  - [{endpoint_id}] = {service.get_service_name()}')
 
-    log_service = lglpy.service_log.LogService()
-    endpoint_id = server.register_endpoint(log_service)
-    print(f'  - [{endpoint_id}] = {log_service.get_service_name()}')
     print()
 
     # Start it running
diff --git a/lglpy/service_gpu_timeline.py b/lglpy/service_gpu_timeline.py
new file mode 100644
index 0000000..69ac031
--- /dev/null
+++ b/lglpy/service_gpu_timeline.py
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: MIT
+# -----------------------------------------------------------------------------
+# Copyright (c) 2024 Arm Limited
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# -----------------------------------------------------------------------------
+
+# This module implements the server-side communications module service that
+# implements a basic message endpoint for testing.
+
+from lglpy.server import Message
+
+class GPUTimelineService:
+
+    def __init__(self):
+        pass
+
+    def get_service_name(self) -> str:
+        return 'GPUTimeline'
+
+    def handle_message(self, message: Message):
+        payload = message.payload.decode('utf-8')
+
+        print(f'{message.message_type.name}: {payload} ({len(payload)} bytes)')
+
+        return None
diff --git a/source_common/CMakeLists.txt b/source_common/CMakeLists.txt
index 1f37093..a408717 100644
--- a/source_common/CMakeLists.txt
+++ b/source_common/CMakeLists.txt
@@ -26,3 +26,4 @@
 # Device classes which get specialized for each use case.
 
 add_subdirectory(comms)
+add_subdirectory(trackers)
diff --git a/source_common/comms/comms_module.cpp b/source_common/comms/comms_module.cpp
index 42c815e..5ef695e 100644
--- a/source_common/comms/comms_module.cpp
+++ b/source_common/comms/comms_module.cpp
@@ -46,6 +46,7 @@ namespace Comms
 CommsModule::CommsModule(
     const std::string& domainAddress
 ) {
+    LAYER_LOG("Client UDS socket create");
     sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
     if (sockfd < 0)
     {
@@ -60,6 +61,7 @@ CommsModule::CommsModule(
     std::strcpy(servAddr.sun_path + 1, domainAddress.c_str());
     servAddr.sun_path[0] = '\0';
 
+    LAYER_LOG("Client UDS connect");
     int conn = connect(
         sockfd,
         reinterpret_cast<const struct sockaddr*>(&servAddr),
@@ -72,8 +74,13 @@ CommsModule::CommsModule(
         return;
     }
 
+    LAYER_LOG("Client make transmitter");
     transmitter = std::make_unique<Transmitter>(*this);
+
+    LAYER_LOG("Client make receiver");
     receiver = std::make_unique<Receiver>(*this);
+
+    LAYER_LOG("Client make complete");
 }
 
 /** See header for documentation. */
diff --git a/source_common/trackers/CMakeLists.txt b/source_common/trackers/CMakeLists.txt
new file mode 100644
index 0000000..8318d8c
--- /dev/null
+++ b/source_common/trackers/CMakeLists.txt
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: MIT
+# -----------------------------------------------------------------------------
+# Copyright (c) 2024 Arm Limited
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# -----------------------------------------------------------------------------
+
+set(LIB_BINARY lib_layer_trackers)
+
+add_library(
+    ${LIB_BINARY} STATIC
+        command_buffer.cpp
+        device.cpp
+        layer_command_stream.cpp
+        queue.cpp
+        render_pass.cpp)
+
+target_include_directories(
+    ${LIB_BINARY} PRIVATE
+        ../../khronos/vulkan/include
+        ../../source_third_party
+        ../)
+
+lgl_set_build_options(${LIB_BINARY})
+
+# No unit tests for this module yet
+#if(${LGL_UNITTEST})
+#    add_subdirectory(test)
+#endif()
+
diff --git a/source_common/trackers/command_buffer.cpp b/source_common/trackers/command_buffer.cpp
new file mode 100644
index 0000000..0e4cdd8
--- /dev/null
+++ b/source_common/trackers/command_buffer.cpp
@@ -0,0 +1,185 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2022-2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <cassert>
+
+#include "trackers/command_buffer.hpp"
+#include "framework/utils.hpp"
+#include "utils/misc.hpp"
+
+namespace Tracker
+{
+
+/* See header for documentation. */
+CommandBuffer::CommandBuffer(
+    VkCommandBuffer _handle) :
+    handle(_handle)
+{
+
+};
+
+/* See header for documentation. */
+void CommandBuffer::reset()
+{
+    stats.reset();
+    workloads.clear();
+    workloadCommandStream.clear();
+}
+
+/**
+ * @brief Begin a user debug marker range.
+ */
+void CommandBuffer::debugMarkerBegin(
+    std::string marker
+) {
+    // Create a workload we can reference later
+    auto workload = std::make_shared<LCSMarker>(marker);
+    workloads.push_back(workload);
+
+    // Add command to update queue debug stack on submit
+    auto instr = std::make_pair(LCSOpcode::MARKER_BEGIN, workload);
+    workloadCommandStream.push_back(instr);
+}
+
+/**
+ * @brief End a user debug marker range.
+ */
+void CommandBuffer::debugMarkerEnd()
+{
+    // Add command with empty workload to update queue debug stack on submit
+    auto workload = std::shared_ptr<LCSWorkload>();
+    auto instr = std::make_pair(LCSOpcode::MARKER_END, workload);
+    workloadCommandStream.push_back(instr);
+}
+
+/**
+ * @brief End a user render pass.
+ */
+uint64_t CommandBuffer::renderPassBegin(
+    const RenderPass& renderPass,
+    uint32_t width,
+    uint32_t height,
+    bool resuming,
+    bool suspending
+) {
+    uint64_t tagID { 0 };
+
+    assert(!currentRenderPass);
+
+    // Assign ID and update the stats tracker for new render passes only
+    if (!resuming)
+    {
+        tagID = Tracker::LCSWorkload::getTagID();
+        stats.incRenderPassCount();
+    }
+
+    // Populate render pass with config information
+    renderPassStartDrawCount = stats.getDrawCallCount();
+
+    auto workload = std::make_shared<LCSRenderPass>(
+        tagID, renderPass, width, height, suspending);
+
+    currentRenderPass = workload;
+    workloads.push_back(workload);
+
+    // Add a command to the layer-side command stream
+    auto instr = std::make_pair(LCSOpcode::RENDERPASS_BEGIN, workload);
+    workloadCommandStream.push_back(instr);
+
+    return tagID;
+}
+
+/**
+ * @brief End a user render pass.
+ */
+bool CommandBuffer::renderPassEnd()
+{
+    assert(currentRenderPass);
+
+    // Update stats based on what happened ...
+    uint64_t endDrawCount = stats.getDrawCallCount();
+    uint64_t drawCount = endDrawCount - renderPassStartDrawCount;
+    currentRenderPass->setDrawCallCount(drawCount);
+
+    // Cache the return state and clear the current render pass tracker
+    bool suspending = currentRenderPass->isSuspending();
+    currentRenderPass.reset();
+
+    return suspending;
+}
+
+/* See header for documentation. */
+void CommandBuffer::executeCommands(
+    CommandBuffer& secondary
+) {
+    // Integrate secondary statistics into the primary
+    stats.mergeCounts(secondary.getStats());
+
+    // Integrate secondary layer commands
+    vecAppend(workloads, secondary.workloads);
+    vecAppend(workloadCommandStream, secondary.workloadCommandStream);
+}
+
+
+CommandPool::CommandPool(
+    VkCommandPool _handle) :
+    handle(_handle)
+{
+
+};
+
+/* See header for documentation. */
+CommandBuffer& CommandPool::allocateCommandBuffer(
+    VkCommandBuffer commandBuffer
+) {
+    auto result = commandBuffers.insert({
+        commandBuffer,
+        std::make_unique<CommandBuffer>(commandBuffer)
+    });
+
+    // Validate that insertion worked
+    assert(result.second);
+
+    // Return the created command buffer
+    return *result.first->second.get();
+}
+
+/* See header for documentation. */
+void CommandPool::freeCommandBuffer(
+    VkCommandBuffer commandBuffer
+) {
+    commandBuffers.erase(commandBuffer);
+}
+
+/* See header for documentation. */
+void CommandPool::reset()
+{
+    for (auto& commandBuffer : commandBuffers)
+    {
+        commandBuffer.second->reset();
+    }
+}
+
+}
diff --git a/source_common/trackers/command_buffer.hpp b/source_common/trackers/command_buffer.hpp
new file mode 100644
index 0000000..fcf5c34
--- /dev/null
+++ b/source_common/trackers/command_buffer.hpp
@@ -0,0 +1,218 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2022-2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * \file
+ * The declaration of Vulkan command pool and command buffer use trackers.
+ *
+ * Role summary
+ * ============
+ *
+ * These trackers are used to monitor the use of command buffers in a frame,
+ * allowing us to monitor command buffer payloads submitted to a queue.
+ *
+ * Key properties
+ * ==============
+ *
+ * Command pools and Command buffers are both lock-free from a single app
+ * thread, relying on external synchronization above the API if multi-threaded
+ * use is required.
+ */
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <vulkan/vulkan.h>
+
+#include "trackers/stats.hpp"
+#include "trackers/layer_command_stream.hpp"
+
+namespace Tracker
+{
+
+/**
+ * @brief The state tracker for a single command buffer.
+ */
+class CommandBuffer
+{
+public:
+    /**
+     * @brief Construct a new command buffer wrapping a Vulkan allocation.
+     *
+     * @param handle   The Vulkan command buffer handle we are tracking.
+     */
+    CommandBuffer(
+        VkCommandBuffer handle);
+
+    /**
+     * @brief Get the stats object for this command buffer;
+     */
+    Stats& getStats()
+    {
+        return stats;
+    }
+
+    /**
+     * @brief Get the layer submit-time command stream for this command buffer.
+     */
+    const std::vector<LCSInstruction>& getSubmitCommandStream() const
+    {
+        return workloadCommandStream;
+    }
+
+    /**
+     * @brief Begin recording a render pass.
+     *
+     * @param renderPass   Render pass creation info.
+     * @param width        Render pass extent width in pixels.
+     * @param height       Render pass extent height in pixels.
+     * @param resuming     If @c true this recording starts with a resume.
+     * @param suspending   If @c true this recording ends with a suspend.
+     *
+     * @return Returns the tagID assigned to this workload. Always returns 0
+     *         if @c resuming an existing workload.
+     */
+    uint64_t renderPassBegin(
+        const RenderPass& renderPass,
+        uint32_t width,
+        uint32_t height,
+        bool resuming=false,
+        bool suspending=false);
+
+    /**
+     * @brief End the current render pass workload recording.
+     *
+     * @return Returns @c true if this is a suspending dynamic render pass or
+     *         @c false otherwise.
+     */
+    bool renderPassEnd();
+
+    /**
+     * @brief Begin a user debug marker range.
+     */
+    void debugMarkerBegin(
+        std::string marker);
+
+    /**
+     * @brief End a user debug marker range.
+     */
+    void debugMarkerEnd();
+
+    /**
+     * @brief Execute a secondary command buffer.
+     */
+    void executeCommands(
+        CommandBuffer& secondary);
+
+    /**
+     * @brief Reset the command buffer back into the @a Initial state.
+     */
+    void reset();
+
+private:
+    /**
+     * @brief The Vulkan API handle of this command buffer.
+     */
+    const VkCommandBuffer handle;
+
+    /**
+     * @brief The command buffer draw count at the start of the render pass.
+     */
+    uint64_t renderPassStartDrawCount { 0 };
+
+    /**
+     * @brief The cumulative stats of the commands in this command buffer.
+     */
+    Stats stats;
+
+    /**
+     * @brief The current render pass if we are in one.
+     */
+    std::shared_ptr<LCSRenderPass> currentRenderPass;
+
+    /**
+     * @brief The recorded workloads.
+     */
+    std::vector<std::shared_ptr<LCSWorkload>> workloads;
+
+    /**
+     * @brief The recorded commands.
+     */
+    std::vector<LCSInstruction> workloadCommandStream;
+};
+
+/**
+ * @brief The state tracker for a single command pool.
+ */
+class CommandPool
+{
+public:
+    /**
+     * @brief Construct a new command pool wrapping a Vulkan allocation.
+     *
+     * @param handle       The Vulkan pool buffer handle we are wrapping.
+     */
+    CommandPool(
+        VkCommandPool handle);
+
+    /**
+     * @brief Allocate a command buffer in the pool with the given handle.
+     *
+     * @param commandBuffer   The Vulkan handle of the allocated command buffer.
+     *
+     * \return The layer wrapper object for the command buffer.
+     */
+    CommandBuffer& allocateCommandBuffer(VkCommandBuffer commandBuffer);
+
+    /**
+     * @brief Free the command buffer in the pool with the given handle.
+     *
+     * @param commandBuffer   The Vulkan handle of the command buffer to free.
+     */
+    void freeCommandBuffer(VkCommandBuffer commandBuffer);
+
+    /**
+     * @brief Reset all allocated command buffers into the @a Initial state.
+     */
+    void reset();
+
+private:
+    /**
+     * @brief The Vulkan API handle of this command pool.
+     */
+    const VkCommandPool handle;
+
+   /**
+     * @brief The command buffers currently allocated in this command pool.
+     */
+    std::unordered_map<VkCommandBuffer, std::unique_ptr<CommandBuffer>> commandBuffers;
+};
+
+}
diff --git a/source_common/trackers/device.cpp b/source_common/trackers/device.cpp
new file mode 100644
index 0000000..a1f0687
--- /dev/null
+++ b/source_common/trackers/device.cpp
@@ -0,0 +1,175 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2022-2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <array>
+#include <iostream>
+#include <fstream>
+#include <sys/stat.h>
+#include <vector>
+
+#include "trackers/device.hpp"
+#include "utils/misc.hpp"
+
+namespace Tracker
+{
+
+/* See header for documentation. */
+void Device::createCommandPool(
+    VkCommandPool commandPool
+) {
+    commandPools.insert({
+        commandPool,
+        std::make_unique<CommandPool>(commandPool)
+    });
+}
+
+/* See header for documentation. */
+CommandPool& Device::getCommandPool(
+    VkCommandPool commandPool
+) {
+    assert(isInMap(commandPool, commandPools));
+    return *commandPools.at(commandPool);
+}
+
+/* See header for documentation. */
+void Device::destroyCommandPool(
+    VkCommandPool commandPool
+) {
+    commandPools.erase(commandPool);
+}
+
+/* See header for documentation. */
+void Device::allocateCommandBuffer(
+    VkCommandPool commandPool,
+    VkCommandBuffer commandBuffer
+) {
+    // Allocate in the pool
+    auto& pool = getCommandPool(commandPool);
+    auto& buffer = pool.allocateCommandBuffer(commandBuffer);
+
+    // Insert into the tracker lookup map
+    [[maybe_unused]] auto result = commandBuffers.insert({
+        commandBuffer,
+        buffer
+    });
+
+    assert(result.second);
+}
+
+/* See header for documentation. */
+void Device::freeCommandBuffer(
+    VkCommandPool commandPool,
+    VkCommandBuffer commandBuffer
+) {
+    // Remove from the tracker lookup map
+    commandBuffers.erase(commandBuffer);
+
+    // Remove from the command pool
+    auto& pool = getCommandPool(commandPool);
+    pool.freeCommandBuffer(commandBuffer);
+}
+
+/* See header for documentation. */
+CommandBuffer& Device::getCommandBuffer(
+    VkCommandBuffer commandBuffer
+) {
+    assert(isInMap(commandBuffer, commandBuffers));
+    return commandBuffers.at(commandBuffer);
+}
+
+/* See header for documentation. */
+void Device::createRenderPass(
+    VkRenderPass renderPass,
+    const VkRenderPassCreateInfo& createInfo
+) {
+    renderPasses.insert({
+        renderPass,
+        std::make_unique<RenderPass>(renderPass, createInfo)
+    });
+}
+
+/* See header for documentation. */
+void Device::createRenderPass(
+    VkRenderPass renderPass,
+    const VkRenderPassCreateInfo2& createInfo
+) {
+    renderPasses.insert({
+        renderPass,
+        std::make_unique<RenderPass>(renderPass, createInfo)
+    });
+}
+
+/* See header for documentation. */
+RenderPass& Device::getRenderPass(
+    VkRenderPass renderPass
+) {
+    assert(isInMap(renderPass, renderPasses));
+    return *renderPasses.at(renderPass);
+}
+
+/* See header for documentation. */
+void Device::destroyRenderPass(
+    VkRenderPass renderPass
+) {
+    renderPasses.erase(renderPass);
+}
+
+/* See header for documentation. */
+Queue& Device::getQueue(
+    VkQueue queue
+) {
+    // Create a tracker for a queue on first use
+    if (!isInMap(queue, queues))
+    {
+        queues.insert({
+            queue,
+            std::make_unique<Queue>(queue)
+        });
+    }
+
+    return *queues.at(queue);
+}
+
+/* See header for documentation. */
+void Device::queueSubmit(
+    VkCommandBuffer commandBuffer
+) {
+    auto& cbStats = getCommandBuffer(commandBuffer).getStats();
+    frameStats.mergeCounts(cbStats);
+}
+
+
+/* See header for documentation. */
+void Device::queuePresent()
+{
+    // Update cumulative statistics with the frame statistics
+    totalStats.incFrameCount();
+    totalStats.mergeCounts(frameStats);
+
+    // Reset the frame statistics ready for the next frame
+    frameStats.reset();
+}
+
+}
diff --git a/source_common/trackers/device.hpp b/source_common/trackers/device.hpp
new file mode 100644
index 0000000..4507707
--- /dev/null
+++ b/source_common/trackers/device.hpp
@@ -0,0 +1,208 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2022-2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * \file
+ * This module implements basic tracking of Vulkan devices.
+ *
+ * Role summary
+ * ============
+ *
+ * Trackers are used to monitor the use of a device and the various resources
+ * that exist in the scope of a device. Primarily we use them to implement
+ * counters that layers can to either emit statistics or be used to trigger
+ * other layer behavior when a specific count is reached.
+ */
+
+#pragma once
+
+#include <unordered_map>
+#include <vulkan/vulkan.h>
+
+#include "trackers/command_buffer.hpp"
+#include "trackers/queue.hpp"
+#include "trackers/render_pass.hpp"
+
+namespace Tracker
+{
+
+/**
+ * @brief The state tracker for a single device.
+ */
+class Device
+{
+public:
+    /**
+     * @brief Create a new command pool tracker within this device.
+     *
+     * @param commandPool   The native handle to track.
+     */
+    void createCommandPool(
+        VkCommandPool commandPool);
+
+    /**
+     * @brief Get the tracker for a native command pool.
+     *
+     * @param commandPool   The native handle we are tracking.
+     */
+    CommandPool& getCommandPool(
+        VkCommandPool commandPool);
+
+    /**
+     * @brief Create a new command buffer in a pool within this device.
+     *
+     * @param commandPool     The native parent command pool handle.
+     * @param commandBuffer   The native handle to track.
+     */
+    void allocateCommandBuffer(
+        VkCommandPool commandPool,
+        VkCommandBuffer commandBuffer);
+
+    /**
+     * @brief Free a command buffer in a pool within this device.
+     *
+     * @param commandPool     The native parent command pool handle.
+     * @param commandBuffer   The native handle to stop tracking.
+     */
+    void freeCommandBuffer(
+        VkCommandPool commandPool,
+        VkCommandBuffer commandBuffer);
+
+    /**
+     * @brief Get the tracker for native command buffer.
+     *
+     * @param commandBuffer   The native handle we are tracking.
+     */
+    CommandBuffer& getCommandBuffer(
+        VkCommandBuffer commandBuffer);
+
+    /**
+     * @brief Destroy a command pool within this device.
+     *
+     * @param commandPool   The native handle to stop tracking.
+     */
+    void destroyCommandPool(
+        VkCommandPool commandPool);
+
+    /**
+     * @brief Get the tracker for a native queue.
+     *
+     * Note that queue trackers are created on the fly when a native queue is
+     * first used. We don't track queue creation as a distinct step.
+     *
+     * @param queue   The native handle we are tracking.
+     */
+    Queue& getQueue(
+        VkQueue queue);
+
+    /**
+     * @brief Create a new render pass tracker within this device.
+     *
+     * @param renderPass   The native handle to track.
+     * @param createInfo   The render pass configuration information.
+     */
+    void createRenderPass(
+        VkRenderPass renderPass,
+        const VkRenderPassCreateInfo& createInfo);
+
+    /**
+     * @brief Create a new render pass tracker within this device.
+     *
+     * @param renderPass   The native handle to track.
+     * @param createInfo   The render pass configuration information.
+     */
+    void createRenderPass(
+        VkRenderPass renderPass,
+        const VkRenderPassCreateInfo2& createInfo);
+
+    /**
+     * @brief Get the tracker for a native render pass.
+     *
+     * @param renderPass   The native handle we are tracking.
+     */
+    RenderPass& getRenderPass(
+        VkRenderPass renderPass);
+
+    /**
+     * @brief Destroy a render pass within this device.
+     *
+     * @param renderPass   The native handle to stop tracking.
+     */
+    void destroyRenderPass(
+        VkRenderPass renderPass);
+
+    /**
+     * @brief Submit a command buffer to a queue within this device.
+     *
+     * @param commandBuffer   The native command buffer we are tracking.
+     */
+    void queueSubmit(
+        VkCommandBuffer commandBuffer);
+
+    /**
+     * @brief Submit a display present command to a queue within this device.
+     */
+    void queuePresent();
+
+public:
+    /**
+     * @brief The set of all queues allocated in this device.
+     */
+    std::unordered_map<VkQueue, std::unique_ptr<Queue>> queues;
+
+    /**
+     * @brief The set of all command pools allocated in this device.
+     */
+    std::unordered_map<VkCommandPool, std::unique_ptr<CommandPool>> commandPools;
+
+    /**
+     * @brief The set of all command buffers allocated in this device.
+     *
+     * Note - memory ownership is via the CommandPool, so dispatch references in this map
+     * must be removed before deleting the command pool that owns the buffer.
+     */
+    std::unordered_map<VkCommandBuffer, CommandBuffer&> commandBuffers;
+
+    /**
+     * @brief The set of all render passes allocated in this device.
+     */
+    std::unordered_map<VkRenderPass, std::unique_ptr<RenderPass>> renderPasses;
+
+    /**
+     * @brief The cumulative statistics for this device.
+     *
+     * Only updated on submit to a present queue.
+     */
+    Stats totalStats;
+
+    /**
+     * @brief The current frame statistics for this device.
+     *
+     * Only updated on submit to a queue.
+     */
+    Stats frameStats;
+};
+
+}
diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp
new file mode 100644
index 0000000..56b1039
--- /dev/null
+++ b/source_common/trackers/layer_command_stream.cpp
@@ -0,0 +1,161 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <cassert>
+
+#include "nlohmann/json.hpp"
+
+#include "trackers/layer_command_stream.hpp"
+
+using json = nlohmann::json;
+
+namespace Tracker
+{
+/* See header for details. */
+std::atomic<uint64_t> LCSWorkload::nextTagID { 1 };
+
+LCSWorkload::LCSWorkload(
+    uint64_t _tagID):
+    tagID(_tagID)
+{
+
+}
+
+/* See header for details. */
+LCSMarker::LCSMarker(
+    const std::string& _label) :
+    LCSWorkload(0),
+    label(_label)
+{
+
+};
+
+/* See header for details. */
+LCSRenderPass::LCSRenderPass(
+    uint64_t _tagID,
+    const RenderPass& renderPass,
+    uint32_t _width,
+    uint32_t _height,
+    bool _suspending) :
+    LCSWorkload(_tagID),
+    width(_width),
+    height(_height),
+    suspending(_suspending)
+{
+    // Copy these as the renderpass object may be transient.
+    subpassCount = renderPass.getSubpassCount();
+    attachments = renderPass.getAttachments();
+}
+
+/* See header for details. */
+std::string LCSRenderPass::getBeginMetadata(
+    uint64_t submitID) const
+{
+    json metadata = {
+        { "type", "renderpass" },
+        { "tid", tagID },
+        { "width", width },
+        { "height", height },
+        { "drawCallCount", drawCallCount }
+    };
+
+    if (submitID != 0)
+    {
+        metadata["sid"] = submitID;
+    }
+
+    // Default is 1, so only store if we need it
+    if (subpassCount != 1)
+    {
+        metadata["subpassCount"] = subpassCount;
+    }
+
+    json attachPoints = json::array();
+    for (const auto& attachment : attachments)
+    {
+        json attachPoint {
+            { "binding", attachment.getAttachmentStr() },
+        };
+
+        // Default is false, so only store if we need it
+        if (attachment.isLoaded())
+        {
+            attachPoint["load"] = true;
+        }
+
+        // Default is true, so only store if we need it
+        if (!attachment.isStored())
+        {
+            attachPoint["store"] = false;
+        }
+
+        // Default is false, so only store if we need it
+        if (attachment.isResolved())
+        {
+            attachPoint["resolve"] = true;
+        }
+
+        attachPoints.push_back(attachPoint);
+    }
+
+    metadata["attachments"] = attachPoints;
+    return metadata.dump();
+}
+
+/* See header for details. */
+std::string LCSRenderPass::getContinuationMetadata(
+    uint64_t tagIDContinuation,
+    uint64_t submitID) const
+{
+    json metadata = {
+        { "type", "renderpass" },
+        { "tid", tagIDContinuation },
+        { "drawCallCount", drawCallCount }
+    };
+
+    if (submitID != 0)
+    {
+        metadata["sid"] = submitID;
+    }
+
+    return metadata.dump();
+}
+
+/* See header for details. */
+std::string LCSRenderPass::getMetadata(
+    uint64_t tagIDContinuation,
+    uint64_t submitID) const
+{
+    if (tagID)
+    {
+        assert(tagIDContinuation == 0);
+        return getBeginMetadata(submitID);
+    }
+
+    assert(tagIDContinuation != 0);
+    return getContinuationMetadata(tagIDContinuation, submitID);
+}
+
+}
diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp
new file mode 100644
index 0000000..a144683
--- /dev/null
+++ b/source_common/trackers/layer_command_stream.hpp
@@ -0,0 +1,196 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2022-2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * \file
+ * The declaration of Vulkan command pool and command buffer use trackers.
+ *
+ * Role summary
+ * ============s
+ *
+ * These trackers are used to monitor the use of command buffers in a frame,
+ * allowing us to monitor command buffer payloads submitted to a queue.
+ *
+ * Key properties
+ * ==============
+ *
+ * Command pools and Command buffers are both lock-free from a single app
+ * thread, relying on external synchronization above the API if multi-threaded
+ * use is required.
+ */
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <vulkan/vulkan.h>
+
+#include "trackers/render_pass.hpp"
+#include "trackers/stats.hpp"
+#include "utils/misc.hpp"
+
+namespace Tracker
+{
+
+/**
+ * @brief Enumeration of layer command stream opcodes.
+ */
+enum class LCSOpcode
+{
+    MARKER_BEGIN,
+    MARKER_END,
+    RENDERPASS_BEGIN,
+    RENDERPASS_END, // TODO: Does this need to be an opcode?
+    DISPATCH,
+    TRACE_RAYS,
+    BUFFER_TRANSFER,
+    IMAGE_TRANSFER
+};
+
+/**
+ * @brief Baseclass representing a GPU workload in the command stream.
+ */
+class LCSWorkload
+{
+public:
+    LCSWorkload(
+        uint64_t tagID);
+
+    virtual ~LCSWorkload() = default;
+
+    virtual std::string getMetadata(
+        uint64_t tagIDContinuation=0,
+        uint64_t submitID=0) const = 0;
+
+    /**
+     * @brief Get a unique tagID to label a workload in a command buffer.
+     *
+     * @return The assigned ID.
+     */
+    static uint64_t getTagID()
+    {
+        return nextTagID.fetch_add(1, std::memory_order_relaxed);
+    }
+
+protected:
+    /**
+     * @brief The assigned tagID for this workload.
+     *
+     * Render pass continuations are assigned tagID of zero.
+     */
+    uint64_t tagID;
+
+private:
+    /**
+     * @brief The workload tagID allocator.
+     */
+    static std::atomic<uint64_t> nextTagID;
+};
+
+/**
+ * @brief Baseclass representing a GPU workload in the command stream.
+ */
+class LCSRenderPass : public LCSWorkload
+{
+public:
+    LCSRenderPass(
+        uint64_t tagID,
+        const RenderPass& renderPass,
+        uint32_t width,
+        uint32_t height,
+        bool suspending);
+
+    virtual ~LCSRenderPass() = default;
+
+    bool isSuspending() const
+    {
+        return suspending;
+    };
+
+    void setDrawCallCount(uint64_t count)
+    {
+        drawCallCount = count;
+    };
+
+    virtual std::string getMetadata(
+        uint64_t tagIDContinuation=0,
+        uint64_t submitID=0) const;
+
+private:
+    std::string getBeginMetadata(
+        uint64_t submitID=0) const;
+
+    std::string getContinuationMetadata(
+        uint64_t tagIDContinuation,
+        uint64_t submitID=0) const;
+
+    uint32_t width;
+
+    uint32_t height;
+
+    bool suspending;
+
+    uint32_t subpassCount;
+
+    uint64_t drawCallCount { 0 };
+
+    std::vector<RenderPassAttachment> attachments;
+};
+
+/**
+ * @brief Baseclass representing a GPU workload in the command stream.
+ */
+class LCSMarker : public LCSWorkload
+{
+public:
+    LCSMarker(
+        const std::string& label);
+
+    virtual ~LCSMarker() = default;
+
+    virtual std::string getMetadata(
+        uint64_t tagIDContinuation=0,
+        uint64_t submitID=0) const
+    {
+        UNUSED(tagIDContinuation);
+        UNUSED(submitID);
+        return label;
+    };
+
+private:
+    std::string label;
+};
+
+/**
+ * @brief Instructions are an opcode with a data pointer.
+ *
+ * Data pointers may be null for some opcodes.
+ */
+using LCSInstruction = std::pair<LCSOpcode, std::shared_ptr<LCSWorkload>>;
+}
diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp
new file mode 100644
index 0000000..cfd6590
--- /dev/null
+++ b/source_common/trackers/queue.cpp
@@ -0,0 +1,67 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2022-2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <cassert>
+
+#include "trackers/queue.hpp"
+
+namespace Tracker
+{
+/* See header for details. */
+std::atomic<uint64_t> Queue::nextSubmitID { 1 };
+
+/* See header for details. */
+Queue::Queue(
+    VkQueue _handle):
+    handle(_handle) { };
+
+/* See header for details. */
+void Queue::runSubmitCommandStream(
+    const std::vector<LCSInstruction>& stream,
+    std::function<void(const std::string&)> callback
+) {
+    for (auto& instr: stream)
+    {
+        LCSOpcode opCode = instr.first;
+        const LCSWorkload* opData = instr.second.get();
+
+        if (opCode == LCSOpcode::MARKER_BEGIN)
+        {
+            debugStack.push_back(opData->getMetadata());
+        }
+        else if (opCode == LCSOpcode::MARKER_END)
+        {
+            debugStack.pop_back();
+        }
+        else if (opCode == LCSOpcode::RENDERPASS_BEGIN)
+        {
+            auto* workload = dynamic_cast<const LCSRenderPass*>(opData);
+            callback(workload->getMetadata());
+            std::string log = joinString(debugStack, "|");
+        }
+    }
+}
+
+}
diff --git a/source_common/trackers/queue.hpp b/source_common/trackers/queue.hpp
new file mode 100644
index 0000000..26fc416
--- /dev/null
+++ b/source_common/trackers/queue.hpp
@@ -0,0 +1,99 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2022-2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * \file
+ * The declaration of Vulkan queue use trackers.
+ *
+ * Role summary
+ * ============
+ *
+ * These trackers are used to monitor the use of a queue.
+ *
+ * Key properties
+ * ==============
+ *
+ * Queues are lock-free from a single app thread, relying on external
+ * synchronization above the API if multi-threaded use is required.
+ */
+
+#pragma once
+
+#include <atomic>
+#include <functional>
+#include <string>
+#include <vector>
+#include <vulkan/vulkan.h>
+
+#include "framework/utils.hpp"
+#include "trackers/layer_command_stream.hpp"
+
+namespace Tracker
+{
+
+/**
+ * @brief The state tracker for a queue.
+ */
+class Queue
+{
+public:
+    Queue(
+        VkQueue handle);
+
+    /**
+     * @brief Execute a layer command stream.
+     */
+    void runSubmitCommandStream(
+        const std::vector<LCSInstruction>& stream,
+        std::function<void(const std::string&)> callback);
+
+    /**
+     * @brief Get a unique submitID to label a command buffer submit.
+     *
+     * @return The assigned ID.
+     */
+    static uint64_t getSubmitID()
+    {
+        return nextSubmitID.fetch_add(1, std::memory_order_relaxed);
+    }
+
+private:
+    /**
+     * The handle of the native queue we are wrapping.
+     */
+    VkQueue handle;
+
+    /**
+     * @brief The stack of debug labels in the tool.
+     */
+    std::vector<std::string> debugStack;
+
+    /**
+     * @brief The command buffer submitID allocator.
+     */
+    static std::atomic<uint64_t> nextSubmitID;
+};
+
+}
diff --git a/source_common/trackers/render_pass.cpp b/source_common/trackers/render_pass.cpp
new file mode 100644
index 0000000..3560612
--- /dev/null
+++ b/source_common/trackers/render_pass.cpp
@@ -0,0 +1,386 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2022-2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <cassert>
+
+#include "trackers/render_pass.hpp"
+
+namespace Tracker
+{
+
+/* See header for details. */
+RenderPassAttachment::RenderPassAttachment(
+    RenderPassAttachName _name,
+    VkAttachmentLoadOp _loadOp,
+    VkAttachmentStoreOp _storeOp,
+    bool _resolve) :
+    name(_name),
+    loadOp(_loadOp),
+    storeOp(_storeOp),
+    resolve(_resolve)
+{
+
+}
+
+/* See header for details. */
+RenderPass::RenderPass(
+    VkRenderPass _handle,
+    const VkRenderPassCreateInfo& createInfo) :
+    handle(_handle)
+{
+    subpassCount = createInfo.subpassCount;
+
+    auto& lastSubpass = createInfo.pSubpasses[subpassCount - 1];
+
+    // Color attachments
+    for(uint32_t i = 0; i < lastSubpass.colorAttachmentCount; i++)
+    {
+        auto& attachRef = lastSubpass.pColorAttachments[i];
+        if (attachRef.attachment == VK_ATTACHMENT_UNUSED)
+        {
+            continue;
+        }
+
+        auto& attachDesc = createInfo.pAttachments[attachRef.attachment];
+        attachments.emplace_back(
+            static_cast<RenderPassAttachName>(i),
+            attachDesc.loadOp,
+            attachDesc.storeOp,
+            false);
+    }
+
+    // Color resolve attachments
+    for(uint32_t i = 0; i < lastSubpass.colorAttachmentCount; i++)
+    {
+        // We may not have any resolve attachments
+        if (!lastSubpass.pResolveAttachments)
+        {
+            continue;
+        }
+
+        auto& attachRef = lastSubpass.pResolveAttachments[i];
+        if (attachRef.attachment == VK_ATTACHMENT_UNUSED)
+        {
+            continue;
+        }
+
+        auto& attachDesc = createInfo.pAttachments[attachRef.attachment];
+        attachments.emplace_back(
+            static_cast<RenderPassAttachName>(i),
+            attachDesc.loadOp,
+            attachDesc.storeOp,
+            true);
+    }
+
+    // Depth+Stencil attachments
+    // TODO: Determine if this is depth/stencil/both from image format
+    if (lastSubpass.pDepthStencilAttachment)
+    {
+        auto& attachRef = *lastSubpass.pDepthStencilAttachment;
+        if (attachRef.attachment != VK_ATTACHMENT_UNUSED)
+        {
+            auto& attachDesc = createInfo.pAttachments[attachRef.attachment];
+
+            // Canonicalize read-only attachments as storeOp=NONE
+            VkAttachmentStoreOp depthStoreOp;
+            switch(attachRef.layout)
+            {
+                case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
+                case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+                case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL:
+                    if (attachDesc.storeOp == VK_ATTACHMENT_STORE_OP_STORE)
+                    {
+                        depthStoreOp = VK_ATTACHMENT_STORE_OP_NONE;
+                    }
+                    break;
+                default:
+                    depthStoreOp = attachDesc.storeOp;
+                    break;
+            }
+
+            attachments.emplace_back(
+                RenderPassAttachName::DEPTH,
+                attachDesc.loadOp,
+                depthStoreOp,
+                false);
+
+            // Canonicalize read-only attachments as storeOp=NONE
+            VkAttachmentStoreOp stencilStoreOp;
+            switch(attachRef.layout)
+            {
+                case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
+                case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+                case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL:
+                    if (attachDesc.stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE)
+                    {
+                        stencilStoreOp = VK_ATTACHMENT_STORE_OP_NONE;
+                    }
+                    break;
+                default:
+                    stencilStoreOp = attachDesc.stencilStoreOp;
+                    break;
+            }
+
+            attachments.emplace_back(
+                RenderPassAttachName::STENCIL,
+                attachDesc.stencilLoadOp,
+                stencilStoreOp,
+                false);
+        }
+    }
+}
+
+/* See header for details. */
+RenderPass::RenderPass(
+    VkRenderPass _handle,
+    const VkRenderPassCreateInfo2& createInfo) :
+    handle(_handle)
+{
+    subpassCount = createInfo.subpassCount;
+
+    auto& lastSubpass = createInfo.pSubpasses[subpassCount - 1];
+
+    // Color attachments
+    for(uint32_t i = 0; i < lastSubpass.colorAttachmentCount; i++)
+    {
+        auto& attachRef = lastSubpass.pColorAttachments[i];
+        if (attachRef.attachment == VK_ATTACHMENT_UNUSED)
+        {
+            continue;
+        }
+
+        auto& attachDesc = createInfo.pAttachments[attachRef.attachment];
+        attachments.emplace_back(
+            static_cast<RenderPassAttachName>(i),
+            attachDesc.loadOp,
+            attachDesc.storeOp,
+            false);
+    }
+
+    // Color resolve attachments
+    for(uint32_t i = 0; i < lastSubpass.colorAttachmentCount; i++)
+    {
+        // We may not have any resolve attachments
+        if (!lastSubpass.pResolveAttachments)
+        {
+            continue;
+        }
+
+        auto& attachRef = lastSubpass.pResolveAttachments[i];
+        if (attachRef.attachment == VK_ATTACHMENT_UNUSED)
+        {
+            continue;
+        }
+
+        auto& attachDesc = createInfo.pAttachments[attachRef.attachment];
+        attachments.emplace_back(
+            static_cast<RenderPassAttachName>(i),
+            attachDesc.loadOp,
+            attachDesc.storeOp,
+            true);
+    }
+
+    // Depth+Stencil attachments
+    // TODO: Determine if this is depth/stencil/both from image format
+    if (lastSubpass.pDepthStencilAttachment)
+    {
+        auto& attachRef = *lastSubpass.pDepthStencilAttachment;
+        if (attachRef.attachment != VK_ATTACHMENT_UNUSED)
+        {
+            auto& attachDesc = createInfo.pAttachments[attachRef.attachment];
+
+            // Canonicalize read-only attachments as storeOp=NONE
+            VkAttachmentStoreOp depthStoreOp;
+            switch(attachRef.layout)
+            {
+                case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
+                case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+                case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL:
+                    if (attachDesc.storeOp == VK_ATTACHMENT_STORE_OP_STORE)
+                    {
+                        depthStoreOp = VK_ATTACHMENT_STORE_OP_NONE;
+                    }
+                    break;
+                default:
+                    depthStoreOp = attachDesc.storeOp;
+                    break;
+            }
+
+            attachments.emplace_back(
+                RenderPassAttachName::DEPTH,
+                attachDesc.loadOp,
+                depthStoreOp,
+                false);
+
+            // Canonicalize read-only attachments as storeOp=NONE
+            VkAttachmentStoreOp stencilStoreOp;
+            switch(attachRef.layout)
+            {
+                case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
+                case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+                case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL:
+                    if (attachDesc.stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE)
+                    {
+                        stencilStoreOp = VK_ATTACHMENT_STORE_OP_NONE;
+                    }
+                    break;
+                default:
+                    stencilStoreOp = attachDesc.stencilStoreOp;
+                    break;
+            }
+
+            attachments.emplace_back(
+                RenderPassAttachName::STENCIL,
+                attachDesc.stencilLoadOp,
+                stencilStoreOp,
+                false);
+        }
+    }
+}
+
+/* See header for details. */
+RenderPass::RenderPass(
+    const VkRenderingInfo& createInfo) :
+    handle(VK_NULL_HANDLE)
+{
+    // No subpasses in dynamic rendering
+    subpassCount = 1;
+
+    // Color attachments
+    for(uint32_t i = 0; i < createInfo.colorAttachmentCount; i++)
+    {
+        auto& attachRef = createInfo.pColorAttachments[i];
+        if (attachRef.imageView == VK_NULL_HANDLE)
+        {
+            continue;
+        }
+
+        attachments.emplace_back(
+            static_cast<RenderPassAttachName>(i),
+            attachRef.loadOp,
+            attachRef.storeOp,
+            false);
+    }
+
+    // Color resolve attachments
+    for(uint32_t i = 0; i < createInfo.colorAttachmentCount; i++)
+    {
+        auto& attachRef = createInfo.pColorAttachments[i];
+        if ((attachRef.imageView == VK_NULL_HANDLE) ||
+            (attachRef.resolveMode == VK_RESOLVE_MODE_NONE))
+        {
+            continue;
+        }
+
+        attachments.emplace_back(
+            static_cast<RenderPassAttachName>(i),
+            VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+            VK_ATTACHMENT_STORE_OP_STORE,
+            true);
+    }
+
+    // Depth attachments
+    if (createInfo.pDepthAttachment)
+    {
+        auto& attachRef = *createInfo.pDepthAttachment;
+
+        // Canonicalize read-only attachments as storeOp=NONE
+        VkAttachmentStoreOp depthStoreOp;
+        switch(attachRef.imageLayout)
+        {
+            case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
+            case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+            case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL:
+                if (attachRef.storeOp == VK_ATTACHMENT_STORE_OP_STORE)
+                {
+                    depthStoreOp = VK_ATTACHMENT_STORE_OP_NONE;
+                }
+                break;
+            default:
+                depthStoreOp = attachRef.storeOp;
+                break;
+        }
+
+        attachments.emplace_back(
+            RenderPassAttachName::DEPTH,
+            attachRef.loadOp,
+            depthStoreOp,
+            false);
+
+        // Depth resolve attachment
+        if ((attachRef.imageView != VK_NULL_HANDLE) &&
+            (attachRef.resolveMode != VK_RESOLVE_MODE_NONE))
+        {
+            attachments.emplace_back(
+                RenderPassAttachName::DEPTH,
+                VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+                VK_ATTACHMENT_STORE_OP_STORE,
+                true);
+        }
+    }
+
+
+    // Stencil attachment
+    if (createInfo.pStencilAttachment)
+    {
+        auto& attachRef = *createInfo.pStencilAttachment;
+
+        // Canonicalize read-only attachments as storeOp=NONE
+        VkAttachmentStoreOp stencilStoreOp;
+        switch(attachRef.imageLayout)
+        {
+            case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
+            case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+            case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL:
+                if (attachRef.storeOp == VK_ATTACHMENT_STORE_OP_STORE)
+                {
+                    stencilStoreOp = VK_ATTACHMENT_STORE_OP_NONE;
+                }
+                break;
+            default:
+                stencilStoreOp = attachRef.storeOp;
+                break;
+        }
+
+        attachments.emplace_back(
+            RenderPassAttachName::STENCIL,
+            attachRef.loadOp,
+            stencilStoreOp,
+            false);
+
+        // Stencil resolve attachment
+        if ((attachRef.imageView != VK_NULL_HANDLE) &&
+            (attachRef.resolveMode != VK_RESOLVE_MODE_NONE))
+        {
+            attachments.emplace_back(
+                RenderPassAttachName::STENCIL,
+                VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+                VK_ATTACHMENT_STORE_OP_STORE,
+                true);
+        }
+    }
+}
+
+}
\ No newline at end of file
diff --git a/source_common/trackers/render_pass.hpp b/source_common/trackers/render_pass.hpp
new file mode 100644
index 0000000..fc5044e
--- /dev/null
+++ b/source_common/trackers/render_pass.hpp
@@ -0,0 +1,194 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2022-2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * \file
+ * The declaration of Vulkan render pass use trackers.
+ *
+ * Role summary
+ * ============
+ *
+ * These trackers are used to monitor the use of a render pass.
+ */
+
+#pragma once
+
+#include <cassert>
+#include <string>
+#include <vector>
+#include <vulkan/vulkan.h>
+
+namespace Tracker
+{
+
+/**
+ * @brief Symbolic names of render pass attachments.
+ */
+enum class RenderPassAttachName
+{
+    COLOR0 = 0,
+    COLOR1 = 1,
+    COLOR2 = 2,
+    COLOR3 = 3,
+    COLOR4 = 4,
+    COLOR5 = 5,
+    COLOR6 = 6,
+    COLOR7 = 7,
+    DEPTH = 100,
+    STENCIL = 200
+};
+
+/**
+ * @brief The state tracker for a render pass.
+ */
+class RenderPassAttachment
+{
+public:
+    RenderPassAttachment(
+        RenderPassAttachName name,
+        VkAttachmentLoadOp loadOp,
+        VkAttachmentStoreOp storeOp,
+        bool resolve);
+
+    std::string getAttachmentStr() const
+    {
+        switch(name)
+        {
+        case RenderPassAttachName::COLOR0:
+            return "C0";
+        case RenderPassAttachName::COLOR1:
+            return "C1";
+        case RenderPassAttachName::COLOR2:
+            return "C2";
+        case RenderPassAttachName::COLOR3:
+            return "C3";
+        case RenderPassAttachName::COLOR4:
+            return "C4";
+        case RenderPassAttachName::COLOR5:
+            return "C5";
+        case RenderPassAttachName::COLOR6:
+            return "C6";
+        case RenderPassAttachName::COLOR7:
+            return "C7";
+        case RenderPassAttachName::DEPTH:
+            return "D";
+        case RenderPassAttachName::STENCIL:
+            return "S";
+        default:
+            assert(false);
+        }
+
+        return "U";
+    }
+
+    bool isLoaded() const
+    {
+        return loadOp == VK_ATTACHMENT_LOAD_OP_LOAD;
+    }
+
+    bool isStored() const
+    {
+        return storeOp ==  VK_ATTACHMENT_STORE_OP_STORE;
+    }
+
+    bool isResolved() const
+    {
+        return resolve;
+    }
+
+private:
+    /**
+     * @brief The attachment point name.
+     */
+    RenderPassAttachName name;
+
+    /**
+     * @brief The attachment load operation.
+     */
+    VkAttachmentLoadOp loadOp;
+
+    /**
+     * @brief The attachment store operation.
+     */
+    VkAttachmentStoreOp storeOp;
+
+    /**
+     * @brief Is this attachment a resolve attachment?
+     */
+    bool resolve;
+};
+
+/**
+ * @brief The state tracker for a render pass.
+ */
+class RenderPass
+{
+public:
+    RenderPass(
+        VkRenderPass handle,
+        const VkRenderPassCreateInfo& createInfo);
+
+    RenderPass(
+        VkRenderPass handle,
+        const VkRenderPassCreateInfo2& createInfo);
+
+    RenderPass(
+        const VkRenderingInfo& createInfo);
+
+    uint32_t getSubpassCount() const
+    {
+        return subpassCount;
+    };
+
+    const std::vector<RenderPassAttachment>& getAttachments() const
+    {
+        return attachments;
+    };
+
+private:
+    /**
+     * @brief The handle of the native render pass we represent.
+     */
+    VkRenderPass handle;
+
+    /**
+     * @brief The render pass subpass count.
+     */
+    uint32_t subpassCount { 1 };
+
+    /**
+     * @brief The render pass attachments in this render pass.
+     *
+     * For render passes that are using multiple sub-passes this stores the
+     * the output attachments present in the final subpass.
+     *
+     * TODO: In future we could store more information here using the subpass
+     * merging feedback extension to work out how many boxes we're going to
+     * end up with on the timeline, and store attachments per merged chunk.
+     */
+    std::vector<RenderPassAttachment> attachments;
+};
+
+}
diff --git a/source_common/trackers/stats.hpp b/source_common/trackers/stats.hpp
new file mode 100644
index 0000000..2d297ed
--- /dev/null
+++ b/source_common/trackers/stats.hpp
@@ -0,0 +1,227 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2022-2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * \file
+ * This module implements basic counter tracking of Vulkan workloads.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <unordered_map>
+#include <vulkan/vulkan.h>
+
+namespace Tracker
+{
+
+/**
+ * @brief Statistics counters for a single device.
+ *
+ * These counters are designed to be used hierarchically, so you can use the
+ * API to aggregate counters into a parent tracker. Not all instances are
+ * required to track all statistics.
+ */
+class Stats
+{
+public:
+    /**
+     * @brief Increment the frame counter.
+     */
+    void incFrameCount()
+    {
+        frameCount += 1;
+    }
+
+    /**
+     * @brief Increment the render pass counter.
+     */
+    void incRenderPassCount()
+    {
+        renderPassCount += 1;
+    }
+
+    /**
+     * @brief Increment the draw counter.
+     */
+    void incDrawCallCount()
+    {
+        drawCallCount += 1;
+    }
+
+    /**
+     * @brief Increment the compute dispatch counter.
+     */
+    void incDispatchCount()
+    {
+        dispatchCount += 1;
+    }
+
+    /**
+     * @brief Increment the trace rays counter.
+     */
+    void incTraceRaysCount()
+    {
+        traceRaysCount += 1;
+    };
+
+    /**
+     * @brief Increment the buffer transfer counter.
+     */
+    void incBufferTransferCount()
+    {
+        bufferTransferCount += 1;
+    }
+
+    /**
+     * @brief Increment the image transfer counter.
+     */
+    void incImageTransferCount()
+    {
+        imageTransferCount += 1;
+    }
+
+    /**
+     * @brief Increment all counters with values from another stats object.
+     */
+    void mergeCounts(const Stats& other)
+    {
+        frameCount += other.frameCount;
+        renderPassCount += other.renderPassCount;
+        drawCallCount += other.drawCallCount;
+        dispatchCount = other.dispatchCount;
+        traceRaysCount = other.traceRaysCount;
+        bufferTransferCount = other.bufferTransferCount;
+        imageTransferCount = other.imageTransferCount;
+    }
+
+    /**
+     * @brief Reset all counters to zero;
+     */
+    void reset()
+    {
+        frameCount = 0;
+        renderPassCount = 0;
+        drawCallCount = 0;
+        dispatchCount = 0;
+        traceRaysCount = 0;
+        bufferTransferCount = 0;
+        imageTransferCount = 0;
+    }
+
+    /**
+     * @brief Get the frame counter.
+     */
+    uint64_t getFrameCount() const
+    {
+        return frameCount;
+    }
+
+    /**
+     * @brief Increment the render pass counter.
+     */
+    uint64_t getRenderPassCount() const
+    {
+        return renderPassCount;
+    }
+
+    /**
+     * @brief Increment the draw counter.
+     */
+    uint64_t getDrawCallCount() const
+    {
+        return drawCallCount;
+    }
+
+    /**
+     * @brief Increment the compute dispatch counter.
+     */
+    uint64_t getDispatchCount() const
+    {
+        return dispatchCount;
+    }
+
+    /**
+     * @brief Increment the trace rays counter.
+     */
+    uint64_t getTraceRaysCount() const
+    {
+        return traceRaysCount;
+    };
+
+    /**
+     * @brief Increment the buffer transfer counter.
+     */
+    uint64_t getBufferTransferCount() const
+    {
+        return bufferTransferCount;
+    }
+
+    /**
+     * @brief Increment the image transfer counter.
+     */
+    uint64_t getImageTransferCount() const
+    {
+        return imageTransferCount;;
+    }
+
+private:
+    /**
+     * @brief The number of frames tracked.
+     */
+    uint64_t frameCount { 0 };
+
+    /**
+     * @brief The number of render passes tracked.
+     */
+    uint64_t renderPassCount { 0 };
+
+    /**
+     * @brief The number of draw calls tracked.
+     */
+    uint64_t drawCallCount { 0 };
+
+    /**
+     * @brief The number of compute dispatches tracked.
+     */
+    uint64_t dispatchCount { 0 };
+
+    /**
+     * @brief The number of trace rays calls tracked.
+     */
+    uint64_t traceRaysCount { 0 };
+
+    /**
+     * @brief The number of buffer transfers tracked.
+     */
+    uint64_t bufferTransferCount { 0 };
+
+    /**
+     * @brief The number of image transfers tracked.
+     */
+    uint64_t imageTransferCount { 0 };
+};
+
+}
diff --git a/source_common/utils/misc.hpp b/source_common/utils/misc.hpp
index fb69b7b..e38e0b7 100644
--- a/source_common/utils/misc.hpp
+++ b/source_common/utils/misc.hpp
@@ -32,7 +32,10 @@
 
 #include <algorithm>
 #include <cassert>
+#include <cinttypes>
+#include <sstream>
 #include <string>
+#include <vector>
 
 /**
  * @brief Macro to stringize a value.
@@ -51,7 +54,7 @@
  * @param args     The variadic values used to populate the template.
  */
 template<typename ... Args>
-std::string fmt_string(
+std::string formatString(
     const std::string& format,
     Args ... args
 ) {
@@ -69,6 +72,29 @@ std::string fmt_string(
     return std::string(buf.get(), buf.get() + size - 1);
 }
 
+/**
+ * @brief Join a string of parts.
+ *
+ * @param parts       The list of string parts to join.
+ * @param separator   The delimiter to use when joining the parts.
+ */
+[[maybe_unused]] static std::string joinString(
+    const std::vector<std::string>& parts,
+    const std::string& separator
+) {
+    std::stringstream out;
+    for (size_t i = 0; i < parts.size(); i++)
+    {
+        out << parts[i];
+        if (i != parts.size() - 1)
+        {
+            out << separator;
+        }
+    }
+
+    return out.str();
+}
+
 /**
  * @brief Test if an element exists in an iterable container.
  *
@@ -101,22 +127,43 @@ bool isInMap(
     return cont.find(elem) != cont.end();
 }
 
+/**
+ * @brief Append all values in one vector to the back of another.
+ *
+ * @param src   The destination vector to append to.
+ * @param dst   The source vector; must not be src vector.
+ */
+template<typename T>
+void vecAppend(
+    std::vector<T>& dst,
+    const std::vector<T>& src
+) {
+    // Perform a resize with some room for growth
+    size_t newSize = dst.size() + src.size();
+    dst.reserve(newSize);
+
+    // Merge secondary into this command buffer
+    dst.insert(std::end(dst), std::begin(src), std::end(src));
+}
+
 /**
  * @brief Get a displayable pointer.
  *
- * On 64-bit systems this strips the MTE tag.
+ * On 64-bit Arm systems this strips the MTE tag in the top byte.
  *
- * @return The displayable pointer
+ * @return The displayable pointer.
  */
 static inline uintptr_t getDisplayPointer(
     void* pointer
 ) {
     uintptr_t dispPointer = reinterpret_cast<uintptr_t>(pointer);
 
-    if constexpr(sizeof(uintptr_t) == 8)
-    {
-        dispPointer &= 0x00FFFFFFFFFFFFFFull;
-    }
+    #if defined(__aarch64__)
+        if constexpr(sizeof(uintptr_t) == 8)
+        {
+            dispPointer &= 0x00FFFFFFFFFFFFFFull;
+        }
+    #endif
 
     return dispPointer;
 }

From daf83f9cf484805543cf7eef3514a0542e47c85d Mon Sep 17 00:00:00 2001
From: Peter Harris <peter.harris@arm.com>
Date: Wed, 11 Dec 2024 11:24:44 +0000
Subject: [PATCH 2/8] Remove comms logging

---
 source_common/comms/comms_module.cpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/source_common/comms/comms_module.cpp b/source_common/comms/comms_module.cpp
index 5ef695e..42c815e 100644
--- a/source_common/comms/comms_module.cpp
+++ b/source_common/comms/comms_module.cpp
@@ -46,7 +46,6 @@ namespace Comms
 CommsModule::CommsModule(
     const std::string& domainAddress
 ) {
-    LAYER_LOG("Client UDS socket create");
     sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
     if (sockfd < 0)
     {
@@ -61,7 +60,6 @@ CommsModule::CommsModule(
     std::strcpy(servAddr.sun_path + 1, domainAddress.c_str());
     servAddr.sun_path[0] = '\0';
 
-    LAYER_LOG("Client UDS connect");
     int conn = connect(
         sockfd,
         reinterpret_cast<const struct sockaddr*>(&servAddr),
@@ -74,13 +72,8 @@ CommsModule::CommsModule(
         return;
     }
 
-    LAYER_LOG("Client make transmitter");
     transmitter = std::make_unique<Transmitter>(*this);
-
-    LAYER_LOG("Client make receiver");
     receiver = std::make_unique<Receiver>(*this);
-
-    LAYER_LOG("Client make complete");
 }
 
 /** See header for documentation. */

From d6ef99a390ce6dead492c940cfa85756750be910 Mon Sep 17 00:00:00 2001
From: Peter Harris <peter.harris@arm.com>
Date: Wed, 11 Dec 2024 11:36:47 +0000
Subject: [PATCH 3/8] Remove debug logging

---
 .../source/layer_device_functions_render_pass.cpp          | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp
index 3486da3..1a272bb 100644
--- a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp
@@ -156,20 +156,16 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass<user_tag>(
     std::unique_lock<std::mutex> lock { g_vulkanLock };
     auto* layer = Device::retrieve(commandBuffer);
 
-    LAYER_LOG("A");
     auto& tracker = layer->getStateTracker();
     auto& cb = tracker.getCommandBuffer(commandBuffer);
 
-    LAYER_LOG("B");
     auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass);
     uint32_t width = pRenderPassBegin->renderArea.extent.width;
     uint32_t height = pRenderPassBegin->renderArea.extent.height;
 
     // Notify the command buffer we are starting a new render pass
-    LAYER_LOG("C");
     uint64_t tagID = cb.renderPassBegin(rp, width, height);
 
-    LAYER_LOG("D");
     // Emit the unique workload tag into the command stream
     std::string tagLabel = formatString("t%" PRIu64, tagID);
     [[maybe_unused]] VkDebugUtilsLabelEXT tagInfo {
@@ -179,11 +175,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass<user_tag>(
         .color = { 0.0f, 0.0f, 0.0f, 0.0f }
     };
 
-    LAYER_LOG("E");
     // Release the lock to call into the driver
     lock.unlock();
     layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
-    LAYER_LOG("F");
     layer->driver.vkCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents);
 }
 
@@ -372,7 +366,6 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass<user_tag>(
 
     // Update the layer command stream in the tracker
     auto& tracker = layer->getStateTracker();
-    LAYER_LOG(" - Command buffer: %p", (void*)commandBuffer);
     auto& cb = tracker.getCommandBuffer(commandBuffer);
     cb.renderPassEnd();
 

From 47a71805315e0820084fe57ec4ff2425d2bfabe0 Mon Sep 17 00:00:00 2001
From: Peter Harris <peter.harris@arm.com>
Date: Wed, 11 Dec 2024 15:01:51 +0000
Subject: [PATCH 4/8] Improve renderpass handling

---
 layer_gpu_timeline/android_build.sh           |  2 +-
 .../docs/command_buffer_model.md              |  2 +-
 layer_gpu_timeline/source/CMakeLists.txt      |  1 +
 layer_gpu_timeline/source/device.hpp          |  8 +++
 .../source/layer_device_functions_queue.cpp   | 57 ++++++++++++++++++-
 lgl_host_server.py                            |  7 ++-
 lglpy/server.py                               |  2 +-
 lglpy/service_gpu_timeline.py                 | 55 +++++++++++++++++-
 source_common/trackers/command_buffer.cpp     |  4 +-
 .../trackers/layer_command_stream.cpp         | 17 +++++-
 .../trackers/layer_command_stream.hpp         | 23 ++++++--
 source_common/trackers/queue.cpp              | 29 +++++++++-
 source_common/trackers/queue.hpp              | 11 +++-
 13 files changed, 195 insertions(+), 23 deletions(-)

diff --git a/layer_gpu_timeline/android_build.sh b/layer_gpu_timeline/android_build.sh
index 960b2b0..5bdbff5 100644
--- a/layer_gpu_timeline/android_build.sh
+++ b/layer_gpu_timeline/android_build.sh
@@ -67,7 +67,7 @@ cmake \
     -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \
     ..
 
-make -j1
+make -j8
 
 popd
 
diff --git a/layer_gpu_timeline/docs/command_buffer_model.md b/layer_gpu_timeline/docs/command_buffer_model.md
index e7422d7..f317b51 100644
--- a/layer_gpu_timeline/docs/command_buffer_model.md
+++ b/layer_gpu_timeline/docs/command_buffer_model.md
@@ -21,7 +21,7 @@ performed.
 
 * Pop the latest marker from the queue debug label stack.
 
-**RENDERPASS_BEGIN(const json\*):**
+**RENDER_PASS(const json\*):**
 
 * Set the current workload to a new render pass with the passed metadata.
 
diff --git a/layer_gpu_timeline/source/CMakeLists.txt b/layer_gpu_timeline/source/CMakeLists.txt
index 45e55c3..ca4fc25 100644
--- a/layer_gpu_timeline/source/CMakeLists.txt
+++ b/layer_gpu_timeline/source/CMakeLists.txt
@@ -57,6 +57,7 @@ add_library(
 target_include_directories(
     ${VK_LAYER} PRIVATE
         ${PROJECT_SOURCE_DIR}/../source_common
+        ${PROJECT_SOURCE_DIR}/../source_third_party
         ${CMAKE_CURRENT_BINARY_DIR}
         .)
 
diff --git a/layer_gpu_timeline/source/device.hpp b/layer_gpu_timeline/source/device.hpp
index b04ace1..2d5460d 100644
--- a/layer_gpu_timeline/source/device.hpp
+++ b/layer_gpu_timeline/source/device.hpp
@@ -130,6 +130,14 @@ class Device
      */
     ~Device();
 
+    /**
+     * @brief Callback for sending messages
+     */
+    void onFrame(const std::string& message)
+    {
+        commsWrapper->txMessage(message);
+    }
+
     /**
      * @brief Callback for sending messages
      */
diff --git a/layer_gpu_timeline/source/layer_device_functions_queue.cpp b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
index 906a39e..30ca611 100644
--- a/layer_gpu_timeline/source/layer_device_functions_queue.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
@@ -27,9 +27,15 @@
 #include <mutex>
 #include <thread>
 
+#include "utils/misc.hpp"
+#include "nlohmann/json.hpp"
+
 #include "device.hpp"
 #include "layer_device_functions.hpp"
-#include "utils/misc.hpp"
+
+using json = nlohmann::json;
+
+using namespace std::placeholders;
 
 extern std::mutex g_vulkanLock;
 
@@ -48,6 +54,15 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(
     auto& tracker = layer->getStateTracker();
     tracker.queuePresent();
 
+    // This is run with the lock held to ensure that all queue submit
+    // messages are sent sequentially to the host tool
+    json frame {
+        { "type", "frame" },
+        { "fid", tracker.totalStats.getFrameCount() }
+    };
+
+    layer->onFrame(frame.dump());
+
     // Release the lock to call into the driver
     lock.unlock();
     return layer->driver.vkQueuePresentKHR(queue, pPresentInfo);
@@ -67,12 +82,13 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit<user_tag>(
     std::unique_lock<std::mutex> lock { g_vulkanLock };
     auto* layer = Device::retrieve(queue);
 
-    using namespace std::placeholders;
     auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
 
     auto& tracker = layer->getStateTracker();
     auto& trackQueue = tracker.getQueue(queue);
 
+    // This is run with the lock held to ensure that all queue submit
+    // messages are sent sequentially to the host tool
     for (uint32_t i = 0; i < submitCount; i++)
     {
         const auto& submit = pSubmits[i];
@@ -80,7 +96,6 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit<user_tag>(
         {
             auto& trackCB = tracker.getCommandBuffer(submit.pCommandBuffers[j]);
             const auto& LCS = trackCB.getSubmitCommandStream();
-
             trackQueue.runSubmitCommandStream(LCS, onSubmit);
         }
     }
@@ -104,6 +119,24 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2<user_tag>(
     std::unique_lock<std::mutex> lock { g_vulkanLock };
     auto* layer = Device::retrieve(queue);
 
+    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
+
+    auto& tracker = layer->getStateTracker();
+    auto& trackQueue = tracker.getQueue(queue);
+
+    // This is run with the lock held to ensure that all queue submit
+    // messages are sent sequentially to the host tool
+    for (uint32_t i = 0; i < submitCount; i++)
+    {
+        const auto& submit = pSubmits[i];
+        for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
+        {
+            auto& trackCB = tracker.getCommandBuffer(submit.pCommandBufferInfos[j].commandBuffer);
+            const auto& LCS = trackCB.getSubmitCommandStream();
+            trackQueue.runSubmitCommandStream(LCS, onSubmit);
+        }
+    }
+
     // Release the lock to call into the driver
     lock.unlock();
     return layer->driver.vkQueueSubmit2(queue, submitCount, pSubmits, fence);
@@ -123,6 +156,24 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR<user_tag>(
     std::unique_lock<std::mutex> lock { g_vulkanLock };
     auto* layer = Device::retrieve(queue);
 
+    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
+
+    auto& tracker = layer->getStateTracker();
+    auto& trackQueue = tracker.getQueue(queue);
+
+    // This is run with the lock held to ensure that all queue submit
+    // messages are sent sequentially to the host tool
+    for (uint32_t i = 0; i < submitCount; i++)
+    {
+        const auto& submit = pSubmits[i];
+        for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
+        {
+            auto& trackCB = tracker.getCommandBuffer(submit.pCommandBufferInfos[j].commandBuffer);
+            const auto& LCS = trackCB.getSubmitCommandStream();
+            trackQueue.runSubmitCommandStream(LCS, onSubmit);
+        }
+    }
+
     // Release the lock to call into the driver
     lock.unlock();
     return layer->driver.vkQueueSubmit2KHR(queue, submitCount, pSubmits, fence);
diff --git a/lgl_host_server.py b/lgl_host_server.py
index 14d5941..3893fec 100644
--- a/lgl_host_server.py
+++ b/lgl_host_server.py
@@ -57,14 +57,15 @@ def main():
     print()
 
     # Start it running
-    serverThread = threading.Thread(target=server.run)
+    serverThread = threading.Thread(target=server.run, daemon=True)
     serverThread.start()
 
     # Press to exit
     try:
-        input("Press any key to exit ...")
+        input("Press any key to exit ...\n\n")
     except KeyboardInterrupt:
-        server.stop()
+        print("Exiting ...")
+        sys.exit(0)
 
     return 0
 
diff --git a/lglpy/server.py b/lglpy/server.py
index e10c58c..127e708 100644
--- a/lglpy/server.py
+++ b/lglpy/server.py
@@ -123,7 +123,7 @@ def run(self):
 
         # Accept connections from outside
         while not self.shutdown:
-            print('Waiting for connection')
+            print('Waiting for client connection')
             try:
                 sockfd, _ = listen_sockfd.accept()
             except OSError:
diff --git a/lglpy/service_gpu_timeline.py b/lglpy/service_gpu_timeline.py
index 69ac031..1c707f2 100644
--- a/lglpy/service_gpu_timeline.py
+++ b/lglpy/service_gpu_timeline.py
@@ -25,18 +25,69 @@
 # implements a basic message endpoint for testing.
 
 from lglpy.server import Message
+import json
+import struct
 
 class GPUTimelineService:
 
     def __init__(self):
-        pass
+        self.frame = {
+            "frame": 0,
+            "workloads": [
+
+            ]
+        }
+
+        # TODO: Make file name configurable
+        self.fileHandle = open('malivision.gputl', 'wb')
 
     def get_service_name(self) -> str:
         return 'GPUTimeline'
 
+    def handle_frame(self, msg):
+        print(json.dumps(self.frame, indent=4))
+
+        # Write frame packet to the file
+        lastFrame = json.dumps(self.frame).encode('utf-8')
+        length = struct.pack('<I', len(lastFrame))
+
+        self.fileHandle.write(length)
+        self.fileHandle.write(lastFrame)
+
+        # Reset the local frame state for the next frame
+        self.frame = {
+            'frame': msg['fid'],
+            'workloads': []
+        }
+
+    def handle_renderpass(self, msg):
+        # Find the last workload
+        lastRenderPass = None
+        if len(self.frame['workloads']):
+            lastWorkload = self.frame['workloads'][-1]
+            if lastWorkload['type'] == 'renderpass':
+                lastRenderPass = lastWorkload
+
+        # Continuation
+        if lastRenderPass and lastRenderPass['tid'] == msg['tid']:
+            lastRenderPass['drawCallCount'] += msg['drawCallCount']
+        # New render pass
+        else:
+            self.frame['workloads'].append(msg)
+
     def handle_message(self, message: Message):
         payload = message.payload.decode('utf-8')
+        parsedPayload = json.loads(payload)
+
+        payloadType = parsedPayload['type']
+
+        if payloadType == 'frame':
+            self.handle_frame(parsedPayload)
+
+        elif payloadType == 'renderpass':
+            self.handle_renderpass(parsedPayload)
 
-        print(f'{message.message_type.name}: {payload} ({len(payload)} bytes)')
+        else:
+            assert False, f'Unknown payload type {payloadType}'
 
         return None
diff --git a/source_common/trackers/command_buffer.cpp b/source_common/trackers/command_buffer.cpp
index 0e4cdd8..b2b6d4f 100644
--- a/source_common/trackers/command_buffer.cpp
+++ b/source_common/trackers/command_buffer.cpp
@@ -91,7 +91,7 @@ uint64_t CommandBuffer::renderPassBegin(
     // Assign ID and update the stats tracker for new render passes only
     if (!resuming)
     {
-        tagID = Tracker::LCSWorkload::getTagID();
+        tagID = Tracker::LCSWorkload::assignTagID();
         stats.incRenderPassCount();
     }
 
@@ -105,7 +105,7 @@ uint64_t CommandBuffer::renderPassBegin(
     workloads.push_back(workload);
 
     // Add a command to the layer-side command stream
-    auto instr = std::make_pair(LCSOpcode::RENDERPASS_BEGIN, workload);
+    auto instr = std::make_pair(LCSOpcode::RENDER_PASS, workload);
     workloadCommandStream.push_back(instr);
 
     return tagID;
diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp
index 56b1039..a3dfccd 100644
--- a/source_common/trackers/layer_command_stream.cpp
+++ b/source_common/trackers/layer_command_stream.cpp
@@ -71,6 +71,7 @@ LCSRenderPass::LCSRenderPass(
 
 /* See header for details. */
 std::string LCSRenderPass::getBeginMetadata(
+    const std::string* debugLabel,
     uint64_t submitID) const
 {
     json metadata = {
@@ -86,6 +87,11 @@ std::string LCSRenderPass::getBeginMetadata(
         metadata["sid"] = submitID;
     }
 
+    if (debugLabel && debugLabel->size())
+    {
+        metadata["label"] = *debugLabel;
+    }
+
     // Default is 1, so only store if we need it
     if (subpassCount != 1)
     {
@@ -126,6 +132,7 @@ std::string LCSRenderPass::getBeginMetadata(
 
 /* See header for details. */
 std::string LCSRenderPass::getContinuationMetadata(
+    const std::string* debugLabel,
     uint64_t tagIDContinuation,
     uint64_t submitID) const
 {
@@ -135,6 +142,11 @@ std::string LCSRenderPass::getContinuationMetadata(
         { "drawCallCount", drawCallCount }
     };
 
+    if (debugLabel && debugLabel->size())
+    {
+        metadata["label"] = *debugLabel;
+    }
+
     if (submitID != 0)
     {
         metadata["sid"] = submitID;
@@ -145,17 +157,18 @@ std::string LCSRenderPass::getContinuationMetadata(
 
 /* See header for details. */
 std::string LCSRenderPass::getMetadata(
+    const std::string* debugLabel,
     uint64_t tagIDContinuation,
     uint64_t submitID) const
 {
     if (tagID)
     {
         assert(tagIDContinuation == 0);
-        return getBeginMetadata(submitID);
+        return getBeginMetadata(debugLabel, submitID);
     }
 
     assert(tagIDContinuation != 0);
-    return getContinuationMetadata(tagIDContinuation, submitID);
+    return getContinuationMetadata(debugLabel, tagIDContinuation, submitID);
 }
 
 }
diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp
index a144683..9f3635d 100644
--- a/source_common/trackers/layer_command_stream.hpp
+++ b/source_common/trackers/layer_command_stream.hpp
@@ -65,8 +65,7 @@ enum class LCSOpcode
 {
     MARKER_BEGIN,
     MARKER_END,
-    RENDERPASS_BEGIN,
-    RENDERPASS_END, // TODO: Does this need to be an opcode?
+    RENDER_PASS,
     DISPATCH,
     TRACE_RAYS,
     BUFFER_TRANSFER,
@@ -85,15 +84,26 @@ class LCSWorkload
     virtual ~LCSWorkload() = default;
 
     virtual std::string getMetadata(
+        const std::string* debugLabel=nullptr,
         uint64_t tagIDContinuation=0,
         uint64_t submitID=0) const = 0;
 
+    /**
+     * @brief Get this workloads tagID.
+     *
+     * @return The assigned ID.
+     */
+    uint64_t getTagID() const
+    {
+        return tagID;
+    }
+
     /**
      * @brief Get a unique tagID to label a workload in a command buffer.
      *
      * @return The assigned ID.
      */
-    static uint64_t getTagID()
+    static uint64_t assignTagID()
     {
         return nextTagID.fetch_add(1, std::memory_order_relaxed);
     }
@@ -139,15 +149,18 @@ class LCSRenderPass : public LCSWorkload
     };
 
     virtual std::string getMetadata(
+        const std::string* debugLabel=nullptr,
         uint64_t tagIDContinuation=0,
         uint64_t submitID=0) const;
 
 private:
     std::string getBeginMetadata(
+        const std::string* debugLabel=nullptr,
         uint64_t submitID=0) const;
 
     std::string getContinuationMetadata(
-        uint64_t tagIDContinuation,
+        const std::string* debugLabel=nullptr,
+        uint64_t tagIDContinuation=0,
         uint64_t submitID=0) const;
 
     uint32_t width;
@@ -175,9 +188,11 @@ class LCSMarker : public LCSWorkload
     virtual ~LCSMarker() = default;
 
     virtual std::string getMetadata(
+        const std::string* debugLabel=nullptr,
         uint64_t tagIDContinuation=0,
         uint64_t submitID=0) const
     {
+        UNUSED(debugLabel);
         UNUSED(tagIDContinuation);
         UNUSED(submitID);
         return label;
diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp
index cfd6590..358d71a 100644
--- a/source_common/trackers/queue.cpp
+++ b/source_common/trackers/queue.cpp
@@ -55,11 +55,36 @@ void Queue::runSubmitCommandStream(
         {
             debugStack.pop_back();
         }
-        else if (opCode == LCSOpcode::RENDERPASS_BEGIN)
+        else if (opCode == LCSOpcode::RENDER_PASS)
         {
             auto* workload = dynamic_cast<const LCSRenderPass*>(opData);
-            callback(workload->getMetadata());
+            uint64_t tagID = workload->getTagID();
+
+            // Build the debug info
             std::string log = joinString(debugStack, "|");
+
+            // Workload is a new render pass
+            if (tagID > 0)
+            {
+                assert(lastRenderPassTagID == 0);
+                callback(workload->getMetadata(&log));
+
+                lastRenderPassTagID = 0;
+                if (workload->isSuspending())
+                {
+                    lastRenderPassTagID = tagID;
+                }
+            }
+            // Workload is a continuation
+            else
+            {
+                assert(lastRenderPassTagID != 0);
+                callback(workload->getMetadata(nullptr, lastRenderPassTagID));
+                if (!workload->isSuspending())
+                {
+                    lastRenderPassTagID = 0;
+                }
+            }
         }
     }
 }
diff --git a/source_common/trackers/queue.hpp b/source_common/trackers/queue.hpp
index 26fc416..ff62b87 100644
--- a/source_common/trackers/queue.hpp
+++ b/source_common/trackers/queue.hpp
@@ -74,7 +74,7 @@ class Queue
      *
      * @return The assigned ID.
      */
-    static uint64_t getSubmitID()
+    static uint64_t assignSubmitID()
     {
         return nextSubmitID.fetch_add(1, std::memory_order_relaxed);
     }
@@ -86,14 +86,21 @@ class Queue
     VkQueue handle;
 
     /**
-     * @brief The stack of debug labels in the tool.
+     * @brief The stack of user debug labels for this queue.
      */
     std::vector<std::string> debugStack;
 
+    /**
+     * @brief The last non-zero renderpass tagID submitted.
+     */
+    uint64_t lastRenderPassTagID { 0 };
+
     /**
      * @brief The command buffer submitID allocator.
      */
     static std::atomic<uint64_t> nextSubmitID;
+
+
 };
 
 }

From 829a39f09c8bb884f397b5e237771a1197ecee69 Mon Sep 17 00:00:00 2001
From: Peter Harris <peter.harris@arm.com>
Date: Thu, 12 Dec 2024 13:19:55 +0000
Subject: [PATCH 5/8] Add compute

---
 .../layer_device_functions_command_buffer.cpp |  1 +
 .../layer_device_functions_dispatch.cpp       | 80 ++++++++++++++++---
 .../layer_device_functions_render_pass.cpp    |  2 +-
 lglpy/service_gpu_timeline.py                 | 23 ++++--
 source_common/trackers/command_buffer.cpp     | 51 ++++++++----
 source_common/trackers/command_buffer.hpp     | 27 +++++++
 .../trackers/layer_command_stream.cpp         | 64 +++++++++++++--
 .../trackers/layer_command_stream.hpp         | 30 ++++++-
 source_common/trackers/queue.cpp              |  6 ++
 source_common/trackers/render_pass.cpp        |  1 -
 10 files changed, 246 insertions(+), 39 deletions(-)

diff --git a/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp b/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp
index 75fb36b..f0cc338 100644
--- a/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp
@@ -79,6 +79,7 @@ VKAPI_ATTR VkResult layer_vkBeginCommandBuffer<user_tag>(
     auto& tracker = layer->getStateTracker();
     auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer);
     cmdBuffer.reset();
+    cmdBuffer.begin(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
 
     // Release the lock to call into the driver
     lock.unlock();
diff --git a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp
index 88bf568..0e3774f 100644
--- a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp
@@ -32,13 +32,16 @@
 
 extern std::mutex g_vulkanLock;
 
-static void registerDispatch(
+static uint64_t registerDispatch(
     Device* layer,
-    VkCommandBuffer commandBuffer
+    VkCommandBuffer commandBuffer,
+    int64_t groupX,
+    int64_t groupY,
+    int64_t groupZ
 ) {
-    auto& state = layer->getStateTracker();
-    auto& stats = state.getCommandBuffer(commandBuffer).getStats();
-    stats.incDispatchCount();
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    return cb.dispatch(groupX, groupY, groupZ);
 }
 
 /* See Vulkan API for documentation. */
@@ -55,11 +58,27 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch<user_tag>(
     std::unique_lock<std::mutex> lock { g_vulkanLock };
     auto* layer = Device::retrieve(commandBuffer);
 
-    registerDispatch(layer, commandBuffer);
+    uint64_t tagID = registerDispatch(
+        layer,
+        commandBuffer,
+        static_cast<int64_t>(groupCountX),
+        static_cast<int64_t>(groupCountY),
+        static_cast<int64_t>(groupCountZ));
+
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
 
     // Release the lock to call into the driver
     lock.unlock();
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
     layer->driver.vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
 }
 
 /* See Vulkan API for documentation. */
@@ -79,11 +98,27 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase<user_tag>(
     std::unique_lock<std::mutex> lock { g_vulkanLock };
     auto* layer = Device::retrieve(commandBuffer);
 
-    registerDispatch(layer, commandBuffer);
+    uint64_t tagID = registerDispatch(
+        layer,
+        commandBuffer,
+        static_cast<int64_t>(groupCountX),
+        static_cast<int64_t>(groupCountY),
+        static_cast<int64_t>(groupCountZ));
+
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
 
     // Release the lock to call into the driver
     lock.unlock();
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
     layer->driver.vkCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
 }
 
 /* See Vulkan API for documentation. */
@@ -103,11 +138,27 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR<user_tag>(
     std::unique_lock<std::mutex> lock { g_vulkanLock };
     auto* layer = Device::retrieve(commandBuffer);
 
-    registerDispatch(layer, commandBuffer);
+    uint64_t tagID = registerDispatch(
+        layer,
+        commandBuffer,
+        static_cast<int64_t>(groupCountX),
+        static_cast<int64_t>(groupCountY),
+        static_cast<int64_t>(groupCountZ));
+
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
 
     // Release the lock to call into the driver
     lock.unlock();
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
     layer->driver.vkCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
 }
 
 /* See Vulkan API for documentation. */
@@ -123,9 +174,20 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect<user_tag>(
     std::unique_lock<std::mutex> lock { g_vulkanLock };
     auto* layer = Device::retrieve(commandBuffer);
 
-    registerDispatch(layer, commandBuffer);
+    uint64_t tagID = registerDispatch(layer, commandBuffer, -1, -1, -1);
+
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
 
     // Release the lock to call into the driver
     lock.unlock();
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
     layer->driver.vkCmdDispatchIndirect(commandBuffer, buffer, offset);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
 }
diff --git a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp
index 1a272bb..b59f305 100644
--- a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp
@@ -168,7 +168,7 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass<user_tag>(
 
     // Emit the unique workload tag into the command stream
     std::string tagLabel = formatString("t%" PRIu64, tagID);
-    [[maybe_unused]] VkDebugUtilsLabelEXT tagInfo {
+    VkDebugUtilsLabelEXT tagInfo {
         .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
         .pNext = nullptr,
         .pLabelName = tagLabel.c_str(),
diff --git a/lglpy/service_gpu_timeline.py b/lglpy/service_gpu_timeline.py
index 1c707f2..0f619ae 100644
--- a/lglpy/service_gpu_timeline.py
+++ b/lglpy/service_gpu_timeline.py
@@ -33,9 +33,7 @@ class GPUTimelineService:
     def __init__(self):
         self.frame = {
             "frame": 0,
-            "workloads": [
-
-            ]
+            "workloads": []
         }
 
         # TODO: Make file name configurable
@@ -45,8 +43,6 @@ def get_service_name(self) -> str:
         return 'GPUTimeline'
 
     def handle_frame(self, msg):
-        print(json.dumps(self.frame, indent=4))
-
         # Write frame packet to the file
         lastFrame = json.dumps(self.frame).encode('utf-8')
         length = struct.pack('<I', len(lastFrame))
@@ -55,11 +51,15 @@ def handle_frame(self, msg):
         self.fileHandle.write(lastFrame)
 
         # Reset the local frame state for the next frame
+        nextFrame = msg['fid']
         self.frame = {
-            'frame': msg['fid'],
+            'frame': nextFrame,
             'workloads': []
         }
 
+        if nextFrame % 100 == 0:
+            print(f'Starting frame {nextFrame} ...')
+
     def handle_renderpass(self, msg):
         # Find the last workload
         lastRenderPass = None
@@ -70,11 +70,17 @@ def handle_renderpass(self, msg):
 
         # Continuation
         if lastRenderPass and lastRenderPass['tid'] == msg['tid']:
-            lastRenderPass['drawCallCount'] += msg['drawCallCount']
+            # Don't accumulate if tagID is not unique metadata tag
+            if lastRenderPass['drawCallCount'] != -1:
+                lastRenderPass['drawCallCount'] += msg['drawCallCount']
         # New render pass
         else:
             self.frame['workloads'].append(msg)
 
+    def handle_dispatch(self, msg):
+        # Find the last workload
+        self.frame['workloads'].append(msg)
+
     def handle_message(self, message: Message):
         payload = message.payload.decode('utf-8')
         parsedPayload = json.loads(payload)
@@ -87,6 +93,9 @@ def handle_message(self, message: Message):
         elif payloadType == 'renderpass':
             self.handle_renderpass(parsedPayload)
 
+        elif payloadType == 'dispatch':
+            self.handle_dispatch(parsedPayload)
+
         else:
             assert False, f'Unknown payload type {payloadType}'
 
diff --git a/source_common/trackers/command_buffer.cpp b/source_common/trackers/command_buffer.cpp
index b2b6d4f..ef47730 100644
--- a/source_common/trackers/command_buffer.cpp
+++ b/source_common/trackers/command_buffer.cpp
@@ -43,14 +43,21 @@ CommandBuffer::CommandBuffer(
 /* See header for documentation. */
 void CommandBuffer::reset()
 {
+    oneTimeSubmit = false;
     stats.reset();
     workloads.clear();
     workloadCommandStream.clear();
 }
 
-/**
- * @brief Begin a user debug marker range.
- */
+/* See header for documentation. */
+void CommandBuffer::begin(
+    bool _oneTimeSubmit
+)
+{
+    oneTimeSubmit = _oneTimeSubmit;
+}
+
+/* See header for documentation. */
 void CommandBuffer::debugMarkerBegin(
     std::string marker
 ) {
@@ -63,9 +70,7 @@ void CommandBuffer::debugMarkerBegin(
     workloadCommandStream.push_back(instr);
 }
 
-/**
- * @brief End a user debug marker range.
- */
+/* See header for documentation. */
 void CommandBuffer::debugMarkerEnd()
 {
     // Add command with empty workload to update queue debug stack on submit
@@ -74,9 +79,7 @@ void CommandBuffer::debugMarkerEnd()
     workloadCommandStream.push_back(instr);
 }
 
-/**
- * @brief End a user render pass.
- */
+/* See header for documentation. */
 uint64_t CommandBuffer::renderPassBegin(
     const RenderPass& renderPass,
     uint32_t width,
@@ -99,7 +102,7 @@ uint64_t CommandBuffer::renderPassBegin(
     renderPassStartDrawCount = stats.getDrawCallCount();
 
     auto workload = std::make_shared<LCSRenderPass>(
-        tagID, renderPass, width, height, suspending);
+        tagID, renderPass, width, height, suspending, oneTimeSubmit);
 
     currentRenderPass = workload;
     workloads.push_back(workload);
@@ -111,9 +114,7 @@ uint64_t CommandBuffer::renderPassBegin(
     return tagID;
 }
 
-/**
- * @brief End a user render pass.
- */
+/* See header for documentation. */
 bool CommandBuffer::renderPassEnd()
 {
     assert(currentRenderPass);
@@ -130,6 +131,28 @@ bool CommandBuffer::renderPassEnd()
     return suspending;
 }
 
+/* See header for documentation. */
+uint64_t CommandBuffer::dispatch(
+    int64_t xGroups,
+    int64_t yGroups,
+    int64_t zGroups
+) {
+    LAYER_LOG("Creating LCSDispatch workload");
+    uint64_t tagID = Tracker::LCSWorkload::assignTagID();
+    stats.incDispatchCount();
+
+    // Add a workload to the render pass
+    auto workload = std::make_shared<LCSDispatch>(
+        tagID, xGroups, yGroups, zGroups);
+    workloads.push_back(workload);
+
+    // Add a command to the layer-side command stream
+    auto instr = std::make_pair(LCSOpcode::DISPATCH, workload);
+    workloadCommandStream.push_back(instr);
+
+    return tagID;
+}
+
 /* See header for documentation. */
 void CommandBuffer::executeCommands(
     CommandBuffer& secondary
@@ -142,7 +165,7 @@ void CommandBuffer::executeCommands(
     vecAppend(workloadCommandStream, secondary.workloadCommandStream);
 }
 
-
+/* See header for documentation. */
 CommandPool::CommandPool(
     VkCommandPool _handle) :
     handle(_handle)
diff --git a/source_common/trackers/command_buffer.hpp b/source_common/trackers/command_buffer.hpp
index fcf5c34..c4b4828 100644
--- a/source_common/trackers/command_buffer.hpp
+++ b/source_common/trackers/command_buffer.hpp
@@ -114,6 +114,20 @@ class CommandBuffer
      */
     bool renderPassEnd();
 
+    /**
+     * @brief Capture a compute dispatch.
+     *
+     * @param xGroups   Number of groups in X dimension, or -1 if unknown.
+     * @param yGroups   Number of groups in Y dimension, or -1 if unknown.
+     * @param zGroups   Number of groups in Z dimension, or -1 if unknown.
+     *
+     * @return Returns the tagID assigned to this workload.
+     */
+    uint64_t dispatch(
+        int64_t xGroups,
+        int64_t yGroups,
+        int64_t zGroups);
+
     /**
      * @brief Begin a user debug marker range.
      */
@@ -136,12 +150,25 @@ class CommandBuffer
      */
     void reset();
 
+    /**
+     * @brief Begin recording back into the @a Recording state.
+     *
+     * @param oneTimeSubmit   Is this a one-time submit recording.
+     */
+    void begin(
+        bool oneTimeSubmit);
+
 private:
     /**
      * @brief The Vulkan API handle of this command buffer.
      */
     const VkCommandBuffer handle;
 
+    /**
+     * @brief Is this command buffer recording one-time-submit?
+     */
+    bool oneTimeSubmit { false };
+
     /**
      * @brief The command buffer draw count at the start of the render pass.
      */
diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp
index a3dfccd..2b29d07 100644
--- a/source_common/trackers/layer_command_stream.cpp
+++ b/source_common/trackers/layer_command_stream.cpp
@@ -58,11 +58,13 @@ LCSRenderPass::LCSRenderPass(
     const RenderPass& renderPass,
     uint32_t _width,
     uint32_t _height,
-    bool _suspending) :
+    bool _suspending,
+    bool _oneTimeSubmit) :
     LCSWorkload(_tagID),
     width(_width),
     height(_height),
-    suspending(_suspending)
+    suspending(_suspending),
+    oneTimeSubmit(_oneTimeSubmit)
 {
     // Copy these as the renderpass object may be transient.
     subpassCount = renderPass.getSubpassCount();
@@ -74,12 +76,22 @@ std::string LCSRenderPass::getBeginMetadata(
     const std::string* debugLabel,
     uint64_t submitID) const
 {
+    // Draw count for a multi-submit command buffer cannot be reliably
+    // associated with a single tagID if restartable across command buffer
+    // boundaries because different command buffer submit combinations can
+    // result in different draw counts for the same starting tagID.
+    int64_t drawCount = static_cast<int64_t>(drawCallCount);
+    if (!oneTimeSubmit && suspending)
+    {
+        drawCount = -1;
+    }
+
     json metadata = {
         { "type", "renderpass" },
         { "tid", tagID },
         { "width", width },
         { "height", height },
-        { "drawCallCount", drawCallCount }
+        { "drawCallCount", drawCount }
     };
 
     if (submitID != 0)
@@ -105,19 +117,19 @@ std::string LCSRenderPass::getBeginMetadata(
             { "binding", attachment.getAttachmentStr() },
         };
 
-        // Default is false, so only store if we need it
+        // Default is false, so only serialize if we need it
         if (attachment.isLoaded())
         {
             attachPoint["load"] = true;
         }
 
-        // Default is true, so only store if we need it
+        // Default is true, so only serialize if we need it
         if (!attachment.isStored())
         {
             attachPoint["store"] = false;
         }
 
-        // Default is false, so only store if we need it
+        // Default is false, so only serialize if we need it
         if (attachment.isResolved())
         {
             attachPoint["resolve"] = true;
@@ -171,4 +183,44 @@ std::string LCSRenderPass::getMetadata(
     return getContinuationMetadata(debugLabel, tagIDContinuation, submitID);
 }
 
+/* See header for details. */
+LCSDispatch::LCSDispatch(
+    uint64_t _tagID,
+    int64_t _xGroups,
+    int64_t _yGroups,
+    int64_t _zGroups) :
+    LCSWorkload(_tagID),
+    xGroups(_xGroups),
+    yGroups(_yGroups),
+    zGroups(_zGroups)
+{
+
+}
+
+/* See header for details. */
+std::string LCSDispatch::getMetadata(
+    const std::string* debugLabel,
+    uint64_t tagIDContinuation,
+    uint64_t submitID
+) const {
+    UNUSED(tagIDContinuation);
+    UNUSED(submitID);
+
+    json metadata = {
+        { "type", "dispatch" },
+        { "tid", tagID },
+        { "xGroups", xGroups },
+        { "yGroups", yGroups },
+        { "zGroups", zGroups }
+    };
+
+    if (debugLabel && debugLabel->size())
+    {
+        metadata["label"] = *debugLabel;
+    }
+
+    return metadata.dump();
+}
+
+
 }
diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp
index 9f3635d..246558d 100644
--- a/source_common/trackers/layer_command_stream.hpp
+++ b/source_common/trackers/layer_command_stream.hpp
@@ -134,7 +134,8 @@ class LCSRenderPass : public LCSWorkload
         const RenderPass& renderPass,
         uint32_t width,
         uint32_t height,
-        bool suspending);
+        bool suspending,
+        bool oneTimeSubmit);
 
     virtual ~LCSRenderPass() = default;
 
@@ -169,6 +170,8 @@ class LCSRenderPass : public LCSWorkload
 
     bool suspending;
 
+    bool oneTimeSubmit;
+
     uint32_t subpassCount;
 
     uint64_t drawCallCount { 0 };
@@ -176,6 +179,31 @@ class LCSRenderPass : public LCSWorkload
     std::vector<RenderPassAttachment> attachments;
 };
 
+/**
+ * @brief Baseclass representing a GPU workload in the command stream.
+ */
+class LCSDispatch : public LCSWorkload
+{
+public:
+    LCSDispatch(
+        uint64_t tagID,
+        int64_t xGroups,
+        int64_t yGroups,
+        int64_t zGroups);
+
+    virtual ~LCSDispatch() = default;
+
+    virtual std::string getMetadata(
+        const std::string* debugLabel=nullptr,
+        uint64_t tagIDContinuation=0,
+        uint64_t submitID=0) const;
+
+private:
+    int64_t xGroups;
+    int64_t yGroups;
+    int64_t zGroups;
+};
+
 /**
  * @brief Baseclass representing a GPU workload in the command stream.
  */
diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp
index 358d71a..756ea07 100644
--- a/source_common/trackers/queue.cpp
+++ b/source_common/trackers/queue.cpp
@@ -86,6 +86,12 @@ void Queue::runSubmitCommandStream(
                 }
             }
         }
+        else if (opCode == LCSOpcode::DISPATCH)
+        {
+            uint64_t tagID = opData->getTagID();
+            std::string log = joinString(debugStack, "|");
+            callback(opData->getMetadata(&log, tagID));
+        }
     }
 }
 
diff --git a/source_common/trackers/render_pass.cpp b/source_common/trackers/render_pass.cpp
index 3560612..181cf53 100644
--- a/source_common/trackers/render_pass.cpp
+++ b/source_common/trackers/render_pass.cpp
@@ -341,7 +341,6 @@ RenderPass::RenderPass(
         }
     }
 
-
     // Stencil attachment
     if (createInfo.pStencilAttachment)
     {

From 9446e54e936f363c5867c8a74363d4bb913e0e09 Mon Sep 17 00:00:00 2001
From: Peter Harris <peter.harris@arm.com>
Date: Thu, 12 Dec 2024 19:59:33 +0000
Subject: [PATCH 6/8] Add transfers

---
 .../source/layer_device_functions.hpp         |   2 -
 layer_gpu_timeline/source/CMakeLists.txt      |   1 +
 .../source/layer_device_functions.hpp         | 120 +++++
 .../layer_device_functions_dispatch.cpp       |  61 +--
 .../layer_device_functions_trace_rays.cpp     |  40 +-
 .../layer_device_functions_transfer.cpp       | 476 ++++++++++++++++++
 lglpy/service_gpu_timeline.py                 |   7 +-
 source_common/trackers/command_buffer.cpp     |  62 ++-
 source_common/trackers/command_buffer.hpp     |  38 ++
 .../trackers/layer_command_stream.cpp         | 110 ++++
 .../trackers/layer_command_stream.hpp         |  71 +++
 source_common/trackers/queue.cpp              |   5 +-
 12 files changed, 937 insertions(+), 56 deletions(-)
 create mode 100644 layer_gpu_timeline/source/layer_device_functions_transfer.cpp

diff --git a/layer_example/source/layer_device_functions.hpp b/layer_example/source/layer_device_functions.hpp
index f3403c7..df321c2 100644
--- a/layer_example/source/layer_device_functions.hpp
+++ b/layer_example/source/layer_device_functions.hpp
@@ -23,8 +23,6 @@
  * ----------------------------------------------------------------------------
  */
 
-#include <vulkan/vulkan.h>
-
 #include "framework/utils.hpp"
 
 /* See Vulkan API for documentation. */
diff --git a/layer_gpu_timeline/source/CMakeLists.txt b/layer_gpu_timeline/source/CMakeLists.txt
index ca4fc25..b8212d1 100644
--- a/layer_gpu_timeline/source/CMakeLists.txt
+++ b/layer_gpu_timeline/source/CMakeLists.txt
@@ -52,6 +52,7 @@ add_library(
         layer_device_functions_queue.cpp
         layer_device_functions_render_pass.cpp
         layer_device_functions_trace_rays.cpp
+        layer_device_functions_transfer.cpp
         timeline_comms.cpp)
 
 target_include_directories(
diff --git a/layer_gpu_timeline/source/layer_device_functions.hpp b/layer_gpu_timeline/source/layer_device_functions.hpp
index 129f1e6..3806398 100644
--- a/layer_gpu_timeline/source/layer_device_functions.hpp
+++ b/layer_gpu_timeline/source/layer_device_functions.hpp
@@ -331,6 +331,126 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR<user_tag>(
     uint32_t height,
     uint32_t depth);
 
+
+// Commands for transfers
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer dstBuffer,
+    VkDeviceSize dstOffset,
+    VkDeviceSize size,
+    uint32_t data);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage image,
+    VkImageLayout imageLayout,
+    const VkClearColorValue* pColor,
+    uint32_t rangeCount,
+    const VkImageSubresourceRange* pRanges);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage image,
+    VkImageLayout imageLayout,
+    const VkClearDepthStencilValue* pDepthStencil,
+    uint32_t rangeCount,
+    const VkImageSubresourceRange* pRanges);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer srcBuffer,
+    VkBuffer dstBuffer,
+    uint32_t regionCount,
+    const VkBufferCopy* pRegions);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer srcBuffer,
+    VkImage dstImage,
+    VkImageLayout dstImageLayout,
+    uint32_t regionCount,
+    const VkBufferImageCopy* pRegions);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage srcImage,
+    VkImageLayout srcImageLayout,
+    VkImage dstImage,
+    VkImageLayout dstImageLayout,
+    uint32_t regionCount,
+    const VkImageCopy* pRegions);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageInfo2* pCopyImageInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageInfo2* pCopyImageInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage srcImage,
+    VkImageLayout srcImageLayout,
+    VkBuffer dstBuffer,
+    uint32_t regionCount,
+    const VkBufferImageCopy* pRegions);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo);
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo);
+
 // Functions for debug
 
 /* See Vulkan API for documentation. */
diff --git a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp
index 0e3774f..7555501 100644
--- a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp
@@ -44,6 +44,23 @@ static uint64_t registerDispatch(
     return cb.dispatch(groupX, groupY, groupZ);
 }
 
+static void emitStartTag(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    uint64_t tagID
+) {
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
+
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+}
+
 /* See Vulkan API for documentation. */
 template <>
 VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch<user_tag>(
@@ -65,18 +82,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch<user_tag>(
         static_cast<int64_t>(groupCountY),
         static_cast<int64_t>(groupCountZ));
 
-    // Emit the unique workload tag into the command stream
-    std::string tagLabel = formatString("t%" PRIu64, tagID);
-    VkDebugUtilsLabelEXT tagInfo {
-        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
-        .pNext = nullptr,
-        .pLabelName = tagLabel.c_str(),
-        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
-    };
-
     // Release the lock to call into the driver
     lock.unlock();
-    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+    emitStartTag(layer, commandBuffer, tagID);
     layer->driver.vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
     layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
 }
@@ -105,18 +113,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase<user_tag>(
         static_cast<int64_t>(groupCountY),
         static_cast<int64_t>(groupCountZ));
 
-    // Emit the unique workload tag into the command stream
-    std::string tagLabel = formatString("t%" PRIu64, tagID);
-    VkDebugUtilsLabelEXT tagInfo {
-        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
-        .pNext = nullptr,
-        .pLabelName = tagLabel.c_str(),
-        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
-    };
-
     // Release the lock to call into the driver
     lock.unlock();
-    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+    emitStartTag(layer, commandBuffer, tagID);
     layer->driver.vkCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
     layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
 }
@@ -145,18 +144,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR<user_tag>(
         static_cast<int64_t>(groupCountY),
         static_cast<int64_t>(groupCountZ));
 
-    // Emit the unique workload tag into the command stream
-    std::string tagLabel = formatString("t%" PRIu64, tagID);
-    VkDebugUtilsLabelEXT tagInfo {
-        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
-        .pNext = nullptr,
-        .pLabelName = tagLabel.c_str(),
-        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
-    };
-
     // Release the lock to call into the driver
     lock.unlock();
-    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+    emitStartTag(layer, commandBuffer, tagID);
     layer->driver.vkCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
     layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
 }
@@ -176,18 +166,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect<user_tag>(
 
     uint64_t tagID = registerDispatch(layer, commandBuffer, -1, -1, -1);
 
-    // Emit the unique workload tag into the command stream
-    std::string tagLabel = formatString("t%" PRIu64, tagID);
-    VkDebugUtilsLabelEXT tagInfo {
-        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
-        .pNext = nullptr,
-        .pLabelName = tagLabel.c_str(),
-        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
-    };
-
     // Release the lock to call into the driver
     lock.unlock();
-    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+    emitStartTag(layer, commandBuffer, tagID);
     layer->driver.vkCmdDispatchIndirect(commandBuffer, buffer, offset);
     layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
 }
diff --git a/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp
index 5373747..2d99a3b 100644
--- a/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp
@@ -32,13 +32,33 @@
 
 extern std::mutex g_vulkanLock;
 
-static void registerTraceRays(
+static uint64_t registerTraceRays(
     Device* layer,
-    VkCommandBuffer commandBuffer
+    VkCommandBuffer commandBuffer,
+    int64_t itemsX,
+    int64_t itemsY,
+    int64_t itemsZ
+) {
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    return cb.traceRays(itemsX, itemsY, itemsZ);
+}
+
+static void emitStartTag(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    uint64_t tagID
 ) {
-    auto& state = layer->getStateTracker();
-    auto& stats = state.getCommandBuffer(commandBuffer).getStats();
-    stats.incTraceRaysCount();
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
+
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
 }
 
 /* See Vulkan API for documentation. */
@@ -53,11 +73,13 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR<user_tag>(
     std::unique_lock<std::mutex> lock { g_vulkanLock };
     auto* layer = Device::retrieve(commandBuffer);
 
-    registerTraceRays(layer, commandBuffer);
+    uint64_t tagID = registerTraceRays(layer, commandBuffer, -1, -1, -1);
 
     // Release the lock to call into the driver
     lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
     layer->driver.vkCmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
 }
 
 /* See Vulkan API for documentation. */
@@ -76,10 +98,11 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirectKHR<user_tag>(
     std::unique_lock<std::mutex> lock { g_vulkanLock };
     auto* layer = Device::retrieve(commandBuffer);
 
-    registerTraceRays(layer, commandBuffer);
+    uint64_t tagID = registerTraceRays(layer, commandBuffer, -1, -1, -1);
 
     // Release the lock to call into the driver
     lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
     layer->driver.vkCmdTraceRaysIndirectKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress);
 }
 
@@ -101,9 +124,10 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysKHR<user_tag>(
     std::unique_lock<std::mutex> lock { g_vulkanLock };
     auto* layer = Device::retrieve(commandBuffer);
 
-    registerTraceRays(layer, commandBuffer);
+    uint64_t tagID = registerTraceRays(layer, commandBuffer, width, height, depth);
 
     // Release the lock to call into the driver
     lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
     layer->driver.vkCmdTraceRaysKHR(commandBuffer, pRaygenShaderBindingTable, pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
 }
\ No newline at end of file
diff --git a/layer_gpu_timeline/source/layer_device_functions_transfer.cpp b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp
new file mode 100644
index 0000000..066c23f
--- /dev/null
+++ b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp
@@ -0,0 +1,476 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "device.hpp"
+#include "layer_device_functions.hpp"
+
+extern std::mutex g_vulkanLock;
+
+static uint64_t registerBufferTransfer(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    const std::string& transferType,
+    int64_t byteCount
+) {
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    return cb.bufferTransfer(transferType, byteCount);
+}
+
+static uint64_t registerImageTransfer(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    const std::string& transferType,
+    int64_t pixelCount
+) {
+    auto& tracker = layer->getStateTracker();
+    auto& cb = tracker.getCommandBuffer(commandBuffer);
+    return cb.imageTransfer(transferType, pixelCount);
+}
+
+static void emitStartTag(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    uint64_t tagID
+) {
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
+
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+}
+
+// Commands for transfers
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer dstBuffer,
+    VkDeviceSize dstOffset,
+    VkDeviceSize size,
+    uint32_t data
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Fill buffer",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdFillBuffer(commandBuffer, dstBuffer, dstOffset, size, data);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage image,
+    VkImageLayout imageLayout,
+    const VkClearColorValue* pColor,
+    uint32_t rangeCount,
+    const VkImageSubresourceRange* pRanges
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Clear image",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdClearColorImage(commandBuffer, image, imageLayout, pColor, rangeCount, pRanges);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage image,
+    VkImageLayout imageLayout,
+    const VkClearDepthStencilValue* pDepthStencil,
+    uint32_t rangeCount,
+    const VkImageSubresourceRange* pRanges
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Clear image",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdClearDepthStencilImage(commandBuffer, image, imageLayout, pDepthStencil, rangeCount, pRanges);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer srcBuffer,
+    VkBuffer dstBuffer,
+    uint32_t regionCount,
+    const VkBufferCopy* pRegions
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, regionCount, pRegions);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBuffer2(commandBuffer, pCopyBufferInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferInfo2* pCopyBufferInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBuffer2KHR(commandBuffer, pCopyBufferInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkBuffer srcBuffer,
+    VkImage dstImage,
+    VkImageLayout dstImageLayout,
+    uint32_t regionCount,
+    const VkBufferImageCopy* pRegions
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBufferToImage(commandBuffer, srcBuffer, dstImage, dstImageLayout, regionCount, pRegions);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBufferToImage2(commandBuffer, pCopyBufferToImageInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyBufferToImage2KHR(commandBuffer, pCopyBufferToImageInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage srcImage,
+    VkImageLayout srcImageLayout,
+    VkImage dstImage,
+    VkImageLayout dstImageLayout,
+    uint32_t regionCount,
+    const VkImageCopy* pRegions
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImage(commandBuffer, srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageInfo2* pCopyImageInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImage2(commandBuffer, pCopyImageInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageInfo2* pCopyImageInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerImageTransfer(
+        layer,
+        commandBuffer,
+        "Copy image",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImage2KHR(commandBuffer, pCopyImageInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer<user_tag>(
+    VkCommandBuffer commandBuffer,
+    VkImage srcImage,
+    VkImageLayout srcImageLayout,
+    VkBuffer dstBuffer,
+    uint32_t regionCount,
+    const VkBufferImageCopy* pRegions
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImageToBuffer(commandBuffer, srcImage, srcImageLayout, dstBuffer, regionCount, pRegions);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImageToBuffer2(commandBuffer, pCopyImageToBufferInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
+
+/* See Vulkan API for documentation. */
+template <>
+VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR<user_tag>(
+    VkCommandBuffer commandBuffer,
+    const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo
+) {
+    LAYER_TRACE(__func__);
+
+    // Hold the lock to access layer-wide global store
+    std::unique_lock<std::mutex> lock { g_vulkanLock };
+    auto* layer = Device::retrieve(commandBuffer);
+
+    uint64_t tagID = registerBufferTransfer(
+        layer,
+        commandBuffer,
+        "Copy buffer",
+        -1);
+
+    // Release the lock to call into the driver
+    lock.unlock();
+    emitStartTag(layer, commandBuffer, tagID);
+    layer->driver.vkCmdCopyImageToBuffer2KHR(commandBuffer, pCopyImageToBufferInfo);
+    layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer);
+}
diff --git a/lglpy/service_gpu_timeline.py b/lglpy/service_gpu_timeline.py
index 0f619ae..9ead818 100644
--- a/lglpy/service_gpu_timeline.py
+++ b/lglpy/service_gpu_timeline.py
@@ -77,8 +77,7 @@ def handle_renderpass(self, msg):
         else:
             self.frame['workloads'].append(msg)
 
-    def handle_dispatch(self, msg):
-        # Find the last workload
+    def handle_generic(self, msg):
         self.frame['workloads'].append(msg)
 
     def handle_message(self, message: Message):
@@ -93,8 +92,8 @@ def handle_message(self, message: Message):
         elif payloadType == 'renderpass':
             self.handle_renderpass(parsedPayload)
 
-        elif payloadType == 'dispatch':
-            self.handle_dispatch(parsedPayload)
+        elif payloadType in ('dispatch', 'tracerays', 'imagetransfer', 'buffertransfer'):
+            self.handle_generic(parsedPayload)
 
         else:
             assert False, f'Unknown payload type {payloadType}'
diff --git a/source_common/trackers/command_buffer.cpp b/source_common/trackers/command_buffer.cpp
index ef47730..b1b5d3c 100644
--- a/source_common/trackers/command_buffer.cpp
+++ b/source_common/trackers/command_buffer.cpp
@@ -137,7 +137,6 @@ uint64_t CommandBuffer::dispatch(
     int64_t yGroups,
     int64_t zGroups
 ) {
-    LAYER_LOG("Creating LCSDispatch workload");
     uint64_t tagID = Tracker::LCSWorkload::assignTagID();
     stats.incDispatchCount();
 
@@ -153,6 +152,67 @@ uint64_t CommandBuffer::dispatch(
     return tagID;
 }
 
+/* See header for documentation. */
+uint64_t CommandBuffer::traceRays(
+    int64_t xItems,
+    int64_t yItems,
+    int64_t zItems
+) {
+    uint64_t tagID = Tracker::LCSWorkload::assignTagID();
+    stats.incTraceRaysCount();
+
+    // Add a workload to the render pass
+    auto workload = std::make_shared<LCSTraceRays>(
+        tagID, xItems, yItems, zItems);
+    workloads.push_back(workload);
+
+    // Add a command to the layer-side command stream
+    auto instr = std::make_pair(LCSOpcode::TRACE_RAYS, workload);
+    workloadCommandStream.push_back(instr);
+
+    return tagID;
+}
+
+/* See header for documentation. */
+uint64_t CommandBuffer::imageTransfer(
+    const std::string& transferType,
+    int64_t pixelCount
+) {
+    uint64_t tagID = Tracker::LCSWorkload::assignTagID();
+    stats.incImageTransferCount();
+
+    // Add a workload to the render pass
+    auto workload = std::make_shared<LCSImageTransfer>(
+        tagID, transferType, pixelCount);
+    workloads.push_back(workload);
+
+    // Add a command to the layer-side command stream
+    auto instr = std::make_pair(LCSOpcode::IMAGE_TRANSFER, workload);
+    workloadCommandStream.push_back(instr);
+
+    return tagID;
+}
+
+/* See header for documentation. */
+uint64_t CommandBuffer::bufferTransfer(
+    const std::string& transferType,
+    int64_t byteCount
+) {
+    uint64_t tagID = Tracker::LCSWorkload::assignTagID();
+    stats.incBufferTransferCount();
+
+    // Add a workload to the render pass
+    auto workload = std::make_shared<LCSBufferTransfer>(
+        tagID, transferType, byteCount);
+    workloads.push_back(workload);
+
+    // Add a command to the layer-side command stream
+    auto instr = std::make_pair(LCSOpcode::BUFFER_TRANSFER, workload);
+    workloadCommandStream.push_back(instr);
+
+    return tagID;
+}
+
 /* See header for documentation. */
 void CommandBuffer::executeCommands(
     CommandBuffer& secondary
diff --git a/source_common/trackers/command_buffer.hpp b/source_common/trackers/command_buffer.hpp
index c4b4828..ea474c0 100644
--- a/source_common/trackers/command_buffer.hpp
+++ b/source_common/trackers/command_buffer.hpp
@@ -128,6 +128,44 @@ class CommandBuffer
         int64_t yGroups,
         int64_t zGroups);
 
+    /**
+     * @brief Capture a trace rays dispatch.
+     *
+     * @param xItems   Number of work items in X dimension, or -1 if unknown.
+     * @param yItems   Number of work items in Y dimension, or -1 if unknown.
+     * @param zItems   Number of work items in Z dimension, or -1 if unknown.
+     *
+     * @return Returns the tagID assigned to this workload.
+     */
+    uint64_t traceRays(
+        int64_t xItems,
+        int64_t yItems,
+        int64_t zItems);
+
+    /**
+     * @brief Capture a transfer where the destination is an image.
+     *
+     * @param transferType   The type of the transfer.
+     * @param pixelCount     The number of pixels written.
+     *
+     * @return Returns the tagID assigned to this workload.
+     */
+    uint64_t imageTransfer(
+        const std::string& transferType,
+        int64_t pixelCount);
+
+    /**
+     * @brief Capture a transfer where the destination is a buffer.
+     *
+     * @param transferType   The type of the transfer.
+     * @param byteCount      The number of pixels written.
+     *
+     * @return Returns the tagID assigned to this workload.
+     */
+    uint64_t bufferTransfer(
+        const std::string& transferType,
+        int64_t byteCount);
+
     /**
      * @brief Begin a user debug marker range.
      */
diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp
index 2b29d07..90c8c72 100644
--- a/source_common/trackers/layer_command_stream.cpp
+++ b/source_common/trackers/layer_command_stream.cpp
@@ -222,5 +222,115 @@ std::string LCSDispatch::getMetadata(
     return metadata.dump();
 }
 
+/* See header for details. */
+LCSTraceRays::LCSTraceRays(
+    uint64_t _tagID,
+    int64_t _xItems,
+    int64_t _yItems,
+    int64_t _zItems) :
+    LCSWorkload(_tagID),
+    xItems(_xItems),
+    yItems(_yItems),
+    zItems(_zItems)
+{
+
+}
+
+/* See header for details. */
+std::string LCSTraceRays::getMetadata(
+    const std::string* debugLabel,
+    uint64_t tagIDContinuation,
+    uint64_t submitID
+) const {
+    UNUSED(tagIDContinuation);
+    UNUSED(submitID);
+
+    json metadata = {
+        { "type", "tracerays" },
+        { "tid", tagID },
+        { "xItems", xItems },
+        { "yItems", yItems },
+        { "zItems", zItems }
+    };
+
+    if (debugLabel && debugLabel->size())
+    {
+        metadata["label"] = *debugLabel;
+    }
+
+    return metadata.dump();
+}
+
+/* See header for details. */
+LCSImageTransfer::LCSImageTransfer(
+    uint64_t _tagID,
+    const std::string& _transferType,
+    int64_t _pixelCount):
+    LCSWorkload(_tagID),
+    transferType(_transferType),
+    pixelCount(_pixelCount)
+{
+
+}
+
+/* See header for details. */
+std::string LCSImageTransfer::getMetadata(
+    const std::string* debugLabel,
+    uint64_t tagIDContinuation,
+    uint64_t submitID
+) const {
+    UNUSED(tagIDContinuation);
+    UNUSED(submitID);
+
+    json metadata = {
+        { "type", "imagetransfer" },
+        { "tid", tagID },
+        { "subtype", transferType },
+        { "pixels", pixelCount }
+    };
+
+    if (debugLabel && debugLabel->size())
+    {
+        metadata["label"] = *debugLabel;
+    }
+
+    return metadata.dump();
+}
+
+/* See header for details. */
+LCSBufferTransfer::LCSBufferTransfer(
+    uint64_t _tagID,
+    const std::string& _transferType,
+    int64_t _byteCount):
+    LCSWorkload(_tagID),
+    transferType(_transferType),
+    byteCount(_byteCount)
+{
+
+}
+
+/* See header for details. */
+std::string LCSBufferTransfer::getMetadata(
+    const std::string* debugLabel,
+    uint64_t tagIDContinuation,
+    uint64_t submitID
+) const {
+    UNUSED(tagIDContinuation);
+    UNUSED(submitID);
+
+    json metadata = {
+        { "type", "buffertransfer" },
+        { "tid", tagID },
+        { "subtype", transferType },
+        { "bytes", byteCount }
+    };
+
+    if (debugLabel && debugLabel->size())
+    {
+        metadata["label"] = *debugLabel;
+    }
+
+    return metadata.dump();
+}
 
 }
diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp
index 246558d..29acdd6 100644
--- a/source_common/trackers/layer_command_stream.hpp
+++ b/source_common/trackers/layer_command_stream.hpp
@@ -204,6 +204,77 @@ class LCSDispatch : public LCSWorkload
     int64_t zGroups;
 };
 
+/**
+ * @brief Baseclass representing a GPU workload in the command stream.
+ */
+class LCSTraceRays : public LCSWorkload
+{
+public:
+    LCSTraceRays(
+        uint64_t tagID,
+        int64_t xItems,
+        int64_t yItems,
+        int64_t zItems);
+
+    virtual ~LCSTraceRays() = default;
+
+    virtual std::string getMetadata(
+        const std::string* debugLabel=nullptr,
+        uint64_t tagIDContinuation=0,
+        uint64_t submitID=0) const;
+
+private:
+    int64_t xItems;
+    int64_t yItems;
+    int64_t zItems;
+};
+
+/**
+ * @brief Baseclass representing a GPU workload in the command stream.
+ */
+class LCSImageTransfer : public LCSWorkload
+{
+public:
+    LCSImageTransfer(
+        uint64_t tagID,
+        const std::string& transferType,
+        int64_t pixelCount);
+
+    virtual ~LCSImageTransfer() = default;
+
+    virtual std::string getMetadata(
+        const std::string* debugLabel=nullptr,
+        uint64_t tagIDContinuation=0,
+        uint64_t submitID=0) const;
+
+private:
+    std::string transferType;
+    int64_t pixelCount;
+};
+
+/**
+ * @brief Baseclass representing a GPU workload in the command stream.
+ */
+class LCSBufferTransfer : public LCSWorkload
+{
+public:
+    LCSBufferTransfer(
+        uint64_t tagID,
+        const std::string& transferType,
+        int64_t byteCount);
+
+    virtual ~LCSBufferTransfer() = default;
+
+    virtual std::string getMetadata(
+        const std::string* debugLabel=nullptr,
+        uint64_t tagIDContinuation=0,
+        uint64_t submitID=0) const;
+
+private:
+    std::string transferType;
+    int64_t byteCount;
+};
+
 /**
  * @brief Baseclass representing a GPU workload in the command stream.
  */
diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp
index 756ea07..7b79606 100644
--- a/source_common/trackers/queue.cpp
+++ b/source_common/trackers/queue.cpp
@@ -86,7 +86,10 @@ void Queue::runSubmitCommandStream(
                 }
             }
         }
-        else if (opCode == LCSOpcode::DISPATCH)
+        else if ((opCode == LCSOpcode::DISPATCH) ||
+                 (opCode == LCSOpcode::TRACE_RAYS) ||
+                 (opCode == LCSOpcode::IMAGE_TRANSFER) ||
+                 (opCode == LCSOpcode::BUFFER_TRANSFER))
         {
             uint64_t tagID = opData->getTagID();
             std::string log = joinString(debugStack, "|");

From 9ac3bc1cd97f2cdc24e8db1cd7d43863617c65a3 Mon Sep 17 00:00:00 2001
From: Peter Harris <peter.harris@arm.com>
Date: Thu, 12 Dec 2024 21:43:37 +0000
Subject: [PATCH 7/8] Code style cleanup

---
 generator/vk_codegen/device_defs.txt          |   2 -
 generator/vk_codegen/instance_defs.txt        |   2 -
 generator/vk_layer/source/device.cpp          |  24 +-
 generator/vk_layer/source/device.hpp          |  18 +-
 generator/vk_layer/source/instance.cpp        |  14 +-
 generator/vk_layer/source/instance.hpp        |  13 +-
 generator/vk_layer/source/version.hpp.in      |   4 +-
 layer_gpu_timeline/README_LAYER.md            | 225 +++++++-----------
 layer_gpu_timeline/android_build.sh           |   2 +-
 .../docs/command_buffer_model.md              | 155 ------------
 layer_gpu_timeline/source/device.cpp          |  24 +-
 layer_gpu_timeline/source/device.hpp          |  44 ++--
 layer_gpu_timeline/source/device_utils.hpp    |  56 +++++
 layer_gpu_timeline/source/instance.cpp        |  14 +-
 layer_gpu_timeline/source/instance.hpp        |  13 +-
 .../source/layer_device_functions.hpp         |   2 +
 .../layer_device_functions_command_buffer.cpp |   2 -
 .../layer_device_functions_command_pool.cpp   |   2 -
 .../source/layer_device_functions_debug.cpp   |   2 -
 .../layer_device_functions_dispatch.cpp       |  31 +--
 .../layer_device_functions_draw_call.cpp      |   6 +
 .../source/layer_device_functions_queue.cpp   |   4 +-
 .../layer_device_functions_render_pass.cpp    |  68 +-----
 .../layer_device_functions_trace_rays.cpp     |  29 +--
 .../layer_device_functions_transfer.cpp       |  38 +--
 layer_gpu_timeline/source/timeline_comms.cpp  |  13 +-
 layer_gpu_timeline/source/timeline_comms.hpp  |  31 ++-
 layer_gpu_timeline/source/version.hpp.in      |   4 +-
 source_common/comms/comms_message.cpp         |   3 +-
 source_common/comms/comms_module.cpp          |  22 +-
 source_common/comms/comms_receiver.cpp        |  21 +-
 source_common/comms/comms_transmitter.cpp     |  15 +-
 .../comms/test/comms_test_server.cpp          |  14 +-
 source_common/framework/device_functions.cpp  |   2 -
 .../framework/instance_functions.cpp          |   2 -
 source_common/trackers/command_buffer.hpp     |  11 +-
 source_common/trackers/device.cpp             |  10 +-
 .../trackers/layer_command_stream.cpp         |  26 +-
 source_common/trackers/queue.hpp              |   7 +-
 source_common/trackers/render_pass.cpp        |  44 +++-
 source_common/trackers/render_pass.hpp        |  93 +++++---
 source_common/utils/misc.hpp                  |  11 +-
 source_common/utils/queue.hpp                 |   7 +-
 43 files changed, 512 insertions(+), 618 deletions(-)
 delete mode 100644 layer_gpu_timeline/docs/command_buffer_model.md
 create mode 100644 layer_gpu_timeline/source/device_utils.hpp

diff --git a/generator/vk_codegen/device_defs.txt b/generator/vk_codegen/device_defs.txt
index 3b87173..b217a1d 100644
--- a/generator/vk_codegen/device_defs.txt
+++ b/generator/vk_codegen/device_defs.txt
@@ -1,6 +1,4 @@
-#include <memory>
 #include <mutex>
-#include <thread>
 
 // Include from per-layer code
 #include "utils.hpp"
diff --git a/generator/vk_codegen/instance_defs.txt b/generator/vk_codegen/instance_defs.txt
index d4891fb..400263c 100644
--- a/generator/vk_codegen/instance_defs.txt
+++ b/generator/vk_codegen/instance_defs.txt
@@ -1,6 +1,4 @@
-#include <memory>
 #include <mutex>
-#include <thread>
 
 // Include from per-layer code
 #include "device.hpp"
diff --git a/generator/vk_layer/source/device.cpp b/generator/vk_layer/source/device.cpp
index 580e339..3371cff 100644
--- a/generator/vk_layer/source/device.cpp
+++ b/generator/vk_layer/source/device.cpp
@@ -34,6 +34,9 @@
 #include "device.hpp"
 #include "instance.hpp"
 
+/**
+ * @brief The dispatch lookup for all of the created Vulkan instances.
+ */
 static std::unordered_map<void*, std::unique_ptr<Device>> g_devices;
 
 /* See header for documentation. */
@@ -47,8 +50,8 @@ void Device::store(
 
 /* See header for documentation. */
 Device* Device::retrieve(
-    VkDevice handle)
-{
+    VkDevice handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_devices));
     return g_devices.at(key).get();
@@ -56,8 +59,8 @@ Device* Device::retrieve(
 
 /* See header for documentation. */
 Device* Device::retrieve(
-    VkQueue handle)
-{
+    VkQueue handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_devices));
     return g_devices.at(key).get();
@@ -65,8 +68,8 @@ Device* Device::retrieve(
 
 /* See header for documentation. */
 Device* Device::retrieve(
-    VkCommandBuffer handle)
-{
+    VkCommandBuffer handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_devices));
     return g_devices.at(key).get();
@@ -85,15 +88,10 @@ Device::Device(
     VkPhysicalDevice _physicalDevice,
     VkDevice _device,
     PFN_vkGetDeviceProcAddr nlayerGetProcAddress
-):  instance(_instance),
+):
+    instance(_instance),
     physicalDevice(_physicalDevice),
     device(_device)
 {
     initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver);
 }
-
-/* See header for documentation. */
-Device::~Device()
-{
-
-}
diff --git a/generator/vk_layer/source/device.hpp b/generator/vk_layer/source/device.hpp
index d6ecad7..c0e1f0a 100644
--- a/generator/vk_layer/source/device.hpp
+++ b/generator/vk_layer/source/device.hpp
@@ -24,8 +24,7 @@
  */
 
 /**
- * @file
- * Declares the root class for layer management of VkDevice objects.
+ * @file Declares the root class for layer management of VkDevice objects.
  *
  * Role summary
  * ============
@@ -41,10 +40,9 @@
  * Key properties
  * ==============
  *
- * Unlike EGL contexts, Vulkan devices are designed to be used concurrently by
- * multiple application threads. An application can have multiple concurrent
- * devices (although this is less common than with OpenGL ES applications), and
- * use each device from multiple threads.
+ * Vulkan devices are designed to be used concurrently by multiple application
+ * threads. An application can have multiple concurrent devices, and use each
+ * device from multiple threads.
  *
  * Access to the layer driver structures must therefore be kept thread-safe.
  * For sake of simplicity, we generally implement this by:
@@ -80,6 +78,8 @@ class Device
      * @brief Fetch a device from the global store of dispatchable devices.
      *
      * @param handle   The dispatchable device handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
      */
     static Device* retrieve(
         VkDevice handle);
@@ -88,6 +88,8 @@ class Device
      * @brief Fetch a device from the global store of dispatchable devices.
      *
      * @param handle   The dispatchable queue handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
      */
     static Device* retrieve(
         VkQueue handle);
@@ -96,6 +98,8 @@ class Device
      * @brief Fetch a device from the global store of dispatchable devices.
      *
      * @param handle   The dispatchable command buffer handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
      */
     static Device* retrieve(
         VkCommandBuffer handle);
@@ -125,7 +129,7 @@ class Device
     /**
      * @brief Destroy this layer device object.
      */
-    ~Device();
+    ~Device() = default;
 
 public:
     /**
diff --git a/generator/vk_layer/source/instance.cpp b/generator/vk_layer/source/instance.cpp
index 6ac278e..0b62857 100644
--- a/generator/vk_layer/source/instance.cpp
+++ b/generator/vk_layer/source/instance.cpp
@@ -29,6 +29,9 @@
 
 #include "instance.hpp"
 
+/**
+ * @brief The dispatch lookup for all of the created Vulkan instances.
+ */
 static std::unordered_map<void*, std::unique_ptr<Instance>> g_instances;
 
 /* See header for documentation. */
@@ -42,8 +45,8 @@ void Instance::store(
 
 /* See header for documentation. */
 Instance* Instance::retrieve(
-    VkInstance handle)
-{
+    VkInstance handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_instances));
     return g_instances.at(key).get();
@@ -51,8 +54,8 @@ Instance* Instance::retrieve(
 
 /* See header for documentation. */
 Instance* Instance::retrieve(
-    VkPhysicalDevice handle)
-{
+    VkPhysicalDevice handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_instances));
     return g_instances.at(key).get();
@@ -68,7 +71,8 @@ void Instance::destroy(
 /* See header for documentation. */
 Instance::Instance(
     VkInstance _instance,
-    PFN_vkGetInstanceProcAddr _nlayerGetProcAddress) :
+    PFN_vkGetInstanceProcAddr _nlayerGetProcAddress
+) :
     instance(_instance),
     nlayerGetProcAddress(_nlayerGetProcAddress)
 {
diff --git a/generator/vk_layer/source/instance.hpp b/generator/vk_layer/source/instance.hpp
index cfda54e..fc6af6b 100644
--- a/generator/vk_layer/source/instance.hpp
+++ b/generator/vk_layer/source/instance.hpp
@@ -42,9 +42,8 @@
  * Key properties
  * ==============
  *
- * Unlike EGL contexts, Vulkan instances are designed to be used concurrently
- * by multiple application threads. An application can have multiple concurrent
- * instances (although this is less common than with OpenGL ES applications),
+ * Vulkan instances are designed to be used concurrently by multiple
+ * application threads. An application can have multiple concurrent instances,
  * and use each instance from multiple threads.
  *
  * Access to the layer driver structures must therefore be kept thread-safe.
@@ -65,10 +64,6 @@
 
 /**
  * @brief This class implements the layer state tracker for a single instance.
- *
- * These objects are relatively light-weight, as they are rarely used once a VkDevice has been
- * created, but we need to track the chain-of-ownership as the instance is the root object that
- * the application creates when initializing a rendering context.
  */
 class Instance
 {
@@ -87,6 +82,8 @@ class Instance
      * @brief Fetch an instance from the global store of dispatchable instances.
      *
      * @param handle   The dispatchable instance handle to use as an indirect lookup.
+     *
+     * @return The layer instance context.
      */
     static Instance* retrieve(
         VkInstance handle);
@@ -95,6 +92,8 @@ class Instance
      * @brief Fetch an instance from the global store of dispatchable instances.
      *
      * @param handle   The dispatchable physical device handle to use as an indirect lookup.
+     *
+     * @return The layer instance context.
      */
     static Instance* retrieve(
         VkPhysicalDevice handle);
diff --git a/generator/vk_layer/source/version.hpp.in b/generator/vk_layer/source/version.hpp.in
index 50c30b9..5fcb9c3 100644
--- a/generator/vk_layer/source/version.hpp.in
+++ b/generator/vk_layer/source/version.hpp.in
@@ -24,9 +24,7 @@
  */
 
 /**
- * @file
- * This header implements placeholder templates that are populated by CMake
- * during configure.
+ * @file Placeholder templates that are populated by CMake during configure.
  */
 
 #pragma once
diff --git a/layer_gpu_timeline/README_LAYER.md b/layer_gpu_timeline/README_LAYER.md
index 8f8a56c..a3b0fee 100644
--- a/layer_gpu_timeline/README_LAYER.md
+++ b/layer_gpu_timeline/README_LAYER.md
@@ -1,163 +1,100 @@
 # Layer: GPU Timeline
 
 This layer is used with Arm GPUs for tracking submitted schedulable workloads
-and emitting semantic information about them. This data can be combined with
-the raw workload execution timing information captured using the Android
-Perfetto service, providing developers with a richer debug visualization.
+and emitting useful metadata that can be used in tooling visualizations. This
+data can be combined with raw workload execution timing information captured
+by the Android Perfetto service, providing developers with more useful
+information about how their application is scheduled on to the Arm GPU.
 
-## What devices?
+## What devices are supported?
 
 The Arm GPU driver integration with the Perfetto render stages scheduler event
 trace is supported at production quality since the r47p0 driver version.
-However, associating semantics from this layer relies on a further integration
-with debug labels which requires an r51p0 or later driver version.
+However, associating additional metadata from this layer relies on additional
+functionality which requires an r51p0 or later driver version.
 
-## What workloads?
+## What workloads are supported?
 
-A schedulable workload is the smallest workload that the Arm GPU command stream
-scheduler will issue to the GPU hardware work queues. This includes the
-following workload types:
+The Arm GPU scheduler event trace can generate timing events for each
+atomically schedulable workload submitted to the GPU scheduler.
 
-* Render passes, split into:
-  * Vertex or Binning phase
-  * Fragment or Main phase
+Most workloads submitted to a Vulkan queue by the application are a single
+schedulable entity, for example a compute dispatch or transfer is a single
+workload.
+
+The exception to this is the render pass workload. Arm GPUs are tile-based, so
+each group of merged subpasses from a render pass is processed as two
+schedulable phases. The first phase - the vertex or binning phase - determines
+which primitives contribute to which screen-space tiles. The second phase - the
+fragment or main phase - reads the binning information and completes fragment
+shading tile-by-tile.
+
+This layer tracks the following workloads:
+
+* Render passes
 * Compute dispatches
-* Trace rays
+* Trace rays dispatches
 * Transfers to a buffer
 * Transfers to an image
 
-Most workloads are dispatched using a single API call, and are trivial to
-manage in the layer. However, render passes are more complex and need extra
-handling. In particular:
-
-* Render passes are issued using multiple API calls.
-* Useful render pass properties, such as draw count, are not known until the
-  render pass recording has ended.
-* Dynamic render passes using `vkCmdBeginRendering()` and `vkCmdEndRendering()`
-  can be suspended and resumed across command buffer boundaries. Properties
-  such as draw count are not defined by the scope of a single command buffer.
-
 ## Tracking workloads
 
-This layer tracks workloads encoded in command buffers, and emits semantic
-metadata for each workload via a communications side-channel. A host tool
-combines the semantic data stream with the Perfetto data stream, using debug
-label tags injected by the layer as a common cross-reference to link across
-the streams.
-
-### Workload labelling
-
-Command stream labelling is implemented using `vkCmdDebugMarkerBeginEXT()`
-and `vkCmdDebugMarkerEndEXT()`, wrapping one layer-owned `tagID` label around
-each semantic workload. This `tagID` can unambiguously refer to this workload
-encoding, and metadata that we do not expect to change per submit will be
-emitted using the matching `tagID` as the sole identifier.
-
-_**TODO:** Dynamic `submitID` tracking is not yet implemented._
-
-The `tagID` label is encoded into the recorded command buffer which means, for
-reusable command buffers, it is not an unambiguous identifier of a specific
-running workload. To allow us to disambiguate specific workload instances, the
-layer can optionally add an outer wrapper of `submitID` labels around each
-submitted command buffer. This wrapper is only generated if the submit contains
-any command buffers that require the generation of a per-submit annex (see the
-following section for when this is needed).
-
-The `submitID.tagID` pair of IDs uniquely identifies a specific running
-workload, and can be used to attach an instance-specific metadata annex to a
-specific submitted workload rather than to the shared recorded command buffer.
-
-### Workload metadata for split render passes
-
-_**TODO:** Split render pass tracking is not yet implemented._
-
-Dynamic render passes can be split across multiple Begin/End pairs, including
-being split across command buffer boundaries. If these splits occur within a
-single primary command buffer, or its secondaries, it is handled transparently
-by the layer and it appears as a single message as if no splits occurred. If
-these splits occur across primary command buffer boundaries, then some
-additional work is required.
-
-In our design a `tagID` debug marker is only started when the render pass first
-starts (not on resume), and stopped at the end of the render pass (not on
-suspend). The same `tagID` is used to refer to all parts of the render pass,
-no matter how many times it was suspended and resumed.
-
-If a render pass splits across command buffers, we cannot precompute metrics
-based on `tagID` alone, even if the command buffers are one-time use. This is
-because we do not know what combination of submitted command buffers will be
-used, and so we cannot know what the render pass contains until submit time.
-Split render passes will emit a `submitID.tagID` metadata annex containing
-the parameters that can only be known at submit time.
-
-### Workload metadata for compute dispatches
-
-_**TODO:** Compute workgroup parsing from the SPIR-V is not yet implemented._
-
-Compute workload dispatch is simple to track, but one of the metadata items we
-want to export is the total size of the work space (work_group_count *
-work_group_size).
-
-The work group count is defined by the API call, but may be an indirect
-parameter (see indirect tracking above).
-
-The work group size is defined by the program pipeline, and is defined in the
-SPIR-V via a literal or a build-time specialization constant. To support this
-use case we will need to parse the SPIR-V when the pipeline is built, if
-SPIR-V is available.
-
-### Workload metadata for indirect calls
-
-_**TODO:** Indirect parameter tracking is not yet implemented._
-
-One of the valuable pieces of metadata that we want to present is the size of
-each workload. For render passes this is captured at API call time, but for
-other workloads the size can be an indirect parameter that is not known when
-the triggering API call is made.
-
-To capture indirect parameters we insert a transfer that copies the indirect
-parameters into a layer-owned buffer. To ensure exclusive use of the buffer and
-avoid data corruption, each buffer region used is unique to a specific `tagID`.
-Attempting to submit the same command buffer multiple times will result in
-the workload being serialized to avoid racy access to the buffer. Once the
-buffer has been retrieved by the layer, a metadata annex containing the
-indirect parameters will be emitted using the `submitID.tagID` pair. This may
-be some time later than the original submit.
-
-### Workload metadata for user-defined labels
-
-The workload metadata captures user-defined labels that the application
-provides using `vkCmdDebugMarkerBeginEXT()` and `vkCmdDebugMarkerEndEXT()`.
-These are a stack-based debug mechanism where `Begin` pushes a new entry on to
-to the stack, and `End` pops the the most recent level off the stack.
-
-Workloads are labelled with the stack values that existed when the workload
-was started. For render passes this is the value on the stack when, e.g.,
-`vkCmdBeginRenderPass()` was called. We do not capture any labels that exist
-inside the render pass.
-
-The debug label stack belongs to the queue, not to the command buffer, so the
-value of the label stack is not known until submit time. The debug information
-for a specific `submitID.tagID` pair is therefore provided as an annex at
-submit time once the stack can be resolved.
-
-## Message protocol
-
-For each workload in a command buffer, or part-workload in the case of a
-suspended render pass, we record a JSON metadata blob containing the payload
-we want to send.
-
-The low level protocol message contains:
-
-* Message type `uint8_t`
-* Sequence ID `uint64_t` (optional, implied by message type)
-* Tag ID `uint64_t`
-* JSON length `uint32_t`
-* JSON payload `uint8_t[]`
-
-Each workload will read whatever properties it can from the `tagID` metadata
-and will then merge in all fields from any subsequent `sequenceID.tagID`
-metadata that matches.
+The latest Arm driver integration with the Perfetto profiler propagates
+application debug labels into the GPU Render Stages scheduler events. The debug
+labels are the label stack created using either of these Vulkan methods:
+
+* `vkCmdBegin/EndDebugUtilsLabelEXT()`
+* `vkCmdDebugMarkerBegin/EndEXT()`
+
+This layer utilizes this mechanism to wrap each submitted workload in a command
+buffer with a unique `tagID` which identifies that recorded workload. A
+metadata side-channel provides the metadata for each workload, annotating each
+metadata record with the matching `tagID` to allow them to be cross-referenced
+later.
+
+### Limitation: Indirect dispatches and trace rays
+
+The current implementation captures the metadata parameters when the command
+buffer is recorded. The layer does not currently support asynchronous capture
+of indirect parameter buffers. Indirect dispatch and trace rays are still
+captured and reported, but with unknown workload dimensions.
+
+### Limitation: Compute dispatch sizes
+
+The current implementation reports the size of a compute workload as the
+number of work groups, because this is the parameter used by the API. We
+eventually want to report this as the number of work items, but the parsing
+of the SPIR-V and pipeline parameters has not yet been implemented.
+
+### Limitation: Dynamic render passes split over multiple command buffers
+
+The label containing the `tagID` is recorded into the application command
+buffer when the command buffer is recorded. The workload-to-metadata mapping
+requires that every use of a `tagID` has the same properties, or we will
+be unable to associate the correct metadata with its matching workload.
+
+Content that splits a render pass over multiple command buffers that
+are not one-time-submit violates this requirement. Multiple submits of a render
+pass with a single `tagID` may have different numbers of draw calls, depending
+on the number of draws that occur in the later command buffers that resume the
+render pass. When the layer detects suspended render pass in a multi-submit
+command buffer, it will still capture and report the workload, but with an
+unknown draw call count.
+
+## Command stream modelling
+
+Most properties we track are a property of the command buffer recording in
+isolation. However, the user debug label stack is a property of the queue and
+persists across submits. We can therefore only determine the debug label
+associated with a workload in the command stream at submit time, and must
+resolve it per workload inside the command buffer.
+
+To support this we implement a software command stream that contains simple
+bytecode actions that represent the sequence of debug label and workload
+commands inside each command buffer. This "command stream" can be played to
+update the the queue state at submit time, triggering metadata submission
+for each workload that can snapshot the current state of the user debug label
+stack at that point in the command stream.
 
 - - -
 
diff --git a/layer_gpu_timeline/android_build.sh b/layer_gpu_timeline/android_build.sh
index 5bdbff5..960b2b0 100644
--- a/layer_gpu_timeline/android_build.sh
+++ b/layer_gpu_timeline/android_build.sh
@@ -67,7 +67,7 @@ cmake \
     -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \
     ..
 
-make -j8
+make -j1
 
 popd
 
diff --git a/layer_gpu_timeline/docs/command_buffer_model.md b/layer_gpu_timeline/docs/command_buffer_model.md
deleted file mode 100644
index f317b51..0000000
--- a/layer_gpu_timeline/docs/command_buffer_model.md
+++ /dev/null
@@ -1,155 +0,0 @@
-# Layer: GPU Timeline - Command Buffer Modelling
-
-One of the main challenges of this layer driver is modelling behavior in queues
-and command buffers that is not known until submit time, and then taking
-appropriate actions based on the combination of both the head state of the
-queue and the content of the pre-recorded command buffers.
-
-Our design to solve this is a lightweight software command stream which is
-recorded when a command buffer is recorded, and then executed when the
-command buffer is submitted to the queue. Just like a real hardware command
-stream these commands can update state or trigger some other action we need
-performed.
-
-## Layer commands
-
-**MARKER_BEGIN(const std::string\*):**
-
-* Push a new marker into the queue debug label stack.
-
-**MARKER_END():**
-
-* Pop the latest marker from the queue debug label stack.
-
-**RENDER_PASS(const json\*):**
-
-* Set the current workload to a new render pass with the passed metadata.
-
-**RENDERPASS_RESUME(const json\*):**
-
-* Update the current workload, which must be a render pass, with extra
-  draw count metadata.
-
-**COMPUTE_DISPATCH_BEGIN(const json\*):**
-
-* Set the current workload to a new compute dispatch with the passed metadata.
-
-**TRACE_RAYS_BEGIN(const json\*):**
-
-* Set the current workload to a new trace rays with the passed metadata.
-
-**BUFFER_TRANSFER_BEGIN(const json\*):**
-
-* Set the current workload to a new a buffer transfer.
-
-**IMAGE_TRANSFER(const json\*):**
-
-* Set the current workload to a new image transfer.
-
-**WORKLOAD_END():**
-
-* Mark the current workload as complete, and emit a built metadata entry for
-  it.
-
-## Layer command recording
-
-Command buffer recording is effectively building two separate state
-structures for the layer.
-
-The first is a per-workload or per-restart JSON structure that contains the
-metadata we need for that workload. For partial workloads - e.g. a dynamic
-render pass begin that has been suspended - this metadata will be partial and
-rely on later restart metadata to complete it.
-
-The second is the layer "command stream" that contains the bytecode commands
-to execute when the command buffer is submitted to the queue. These commands
-are very simple, consisting of a list of command+pointer pairs, where the
-pointer value may be unused by some commands. Commands are stored in a
-std::vector, but we reserve enough memory to store 256 commands without
-reallocating which is enough for the majority of command buffers we see in
-real applications.
-
-The command stream for a secondary command buffer is inlined into the primary
-command buffer during recording.
-
-###  Recording sequence
-
-When application records a new workload:
-
-  * A `tagID` is assigned and recorded using `vkCmdMarkerBegin()` label in the
-    Vulkan command stream _before_ the new workload is written to the command
-    stream.
-  * If workload is using indirect parameters, then a transfer job to copy
-    indirect parameters into a layer-owned buffer is emitted _before_ the new
-    workload. No additional barrier is needed because application barriers must
-    have already ensured that the indirect parameter buffer is valid.
-  * A proxy workload object is created in the layer storing the assigned
-    `tagID` and all settings that are known at command recording time.
-  * A layer command stream command is recorded into the submit time stream
-    indicating `<TYPE>_BEGIN` with a pointer to the proxy workload. Note that
-    this JSON may be modified later for some workloads.
-  * If workload is using indirect parameters, a layer command stream command is
-    recorded into the resolve time stream, which will handle cleanup and
-    emitting the `submitID.tagID` annex message for the indirect data.
-  * If the command buffer is not ONE_TIME_SUBMIT, if any workload is using
-    indirect parameters, or contains incomplete render passes, the command
-    buffer is marked as needing a `submitID` wrapper.
-  * The user command is written to the Vulkan command stream.
-
-When application resumes a render pass workload:
-
-  * A `tagID` of zero is assigned, but not emitted to the command stream.
-  * A layer command stream command is recorded into the submit time stream
-    indicating `<TYPE>_RESUME` with a pointer to the proxy workload. Note that
-    this JSON may be modified later for some workloads.
-  * The user command is written to the Vulkan command stream.
-
-When application ends a workload:
-
-  * For render pass workloads, any statistics accumulated since the last begin
-    are rolled up into the proxy workload object.
-  * For render pass workloads, the user command is written to the Vulkan
-    command stream.
-  * The command steam label scope is closed using `vkCmdMarkerEnd()`.
-
-## Layer command playback
-
-The persistent state for command playback belongs to the queues the command
-buffers are submitted to. The command stream bytecode is run by a bytecode
-interpreter associated with the state of the current queue, giving the
-interpreter access to the current `submitID` and queue debug label stack.
-
-###  Submitting sequence
-
-For each command buffer in the user submit:
-
-* If the command buffer needs a `submitID` we allocate a unique `submitID` and
-  create two new command buffers that will wrap the user command buffer with an
-  additional stack layer of debug label containing the `s<ID>` string. We will
-  inject a layer command stream async command to handle freeing the command
-  buffers.
-* The tool will process the submit-time layer commands, executing each command
-  to either update some state or emit
-* If there are any async layer commands, either recorded in the command buffer
-  or from the wrapping command buffers, we will need to add an async handler.
-  This cannot safely use the user fence or depend on any user object lifetime,
-  so we will add a layer-owned timeline semaphore to the submit which we can
-  wait on to determine when it is safe trigger the async work.
-
-## Future: Async commands
-
-One of our longer-term goals is to be able to capture indirect parameters,
-which will be available after-the-fact once the GPU has processed the command
-buffer. Once we have the data we can emit an annex message containing
-parameters for each indirect `submitID.tagID` pair in the command buffer.
-
-We need to be able to emit the metadata after the commands are complete,
-and correctly synchronize use of the indirect capture staging buffer
-if command buffers are reissued. My current thinking is that we would
-implement this using additional layer commands that are processed on submit,
-including support for async commands that run in a separate thread and
-wait on the command buffer completion fence before running.
-
-- - -
-
-_Copyright © 2024, Arm Limited and contributors._
diff --git a/layer_gpu_timeline/source/device.cpp b/layer_gpu_timeline/source/device.cpp
index 271aab3..1e140ff 100644
--- a/layer_gpu_timeline/source/device.cpp
+++ b/layer_gpu_timeline/source/device.cpp
@@ -35,6 +35,9 @@
 #include "device.hpp"
 #include "instance.hpp"
 
+/**
+ * @brief The dispatch lookup for all of the created Vulkan devices.
+ */
 static std::unordered_map<void*, std::unique_ptr<Device>> g_devices;
 
 /* See header for documentation. */
@@ -54,8 +57,8 @@ void Device::store(
 
 /* See header for documentation. */
 Device* Device::retrieve(
-    VkDevice handle)
-{
+    VkDevice handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_devices));
     return g_devices.at(key).get();
@@ -63,8 +66,8 @@ Device* Device::retrieve(
 
 /* See header for documentation. */
 Device* Device::retrieve(
-    VkQueue handle)
-{
+    VkQueue handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_devices));
     return g_devices.at(key).get();
@@ -72,8 +75,8 @@ Device* Device::retrieve(
 
 /* See header for documentation. */
 Device* Device::retrieve(
-    VkCommandBuffer handle)
-{
+    VkCommandBuffer handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_devices));
     return g_devices.at(key).get();
@@ -92,7 +95,8 @@ Device::Device(
     VkPhysicalDevice _physicalDevice,
     VkDevice _device,
     PFN_vkGetDeviceProcAddr nlayerGetProcAddress
-):  instance(_instance),
+):
+    instance(_instance),
     physicalDevice(_physicalDevice),
     device(_device)
 {
@@ -105,9 +109,3 @@ Device::Device(
         commsWrapper = std::make_unique<TimelineComms>(*commsModule);
     }
 }
-
-/* See header for documentation. */
-Device::~Device()
-{
-
-}
diff --git a/layer_gpu_timeline/source/device.hpp b/layer_gpu_timeline/source/device.hpp
index 2d5460d..b56de83 100644
--- a/layer_gpu_timeline/source/device.hpp
+++ b/layer_gpu_timeline/source/device.hpp
@@ -24,8 +24,7 @@
  */
 
 /**
- * @file
- * Declares the root class for layer management of VkDevice objects.
+ * @file Declares the root class for layer management of VkDevice objects.
  *
  * Role summary
  * ============
@@ -41,10 +40,9 @@
  * Key properties
  * ==============
  *
- * Unlike EGL contexts, Vulkan devices are designed to be used concurrently by
- * multiple application threads. An application can have multiple concurrent
- * devices (although this is less common than with OpenGL ES applications), and
- * use each device from multiple threads.
+ * Vulkan devices are designed to be used concurrently by multiple application
+ * threads. An application can have multiple concurrent devices, and use each
+ * device from multiple threads.
  *
  * Access to the layer driver structures must therefore be kept thread-safe.
  * For sake of simplicity, we generally implement this by:
@@ -83,6 +81,8 @@ class Device
      * @brief Fetch a device from the global store of dispatchable devices.
      *
      * @param handle   The dispatchable device handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
      */
     static Device* retrieve(
         VkDevice handle);
@@ -91,6 +91,8 @@ class Device
      * @brief Fetch a device from the global store of dispatchable devices.
      *
      * @param handle   The dispatchable queue handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
      */
     static Device* retrieve(
         VkQueue handle);
@@ -99,6 +101,8 @@ class Device
      * @brief Fetch a device from the global store of dispatchable devices.
      *
      * @param handle   The dispatchable command buffer handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
      */
     static Device* retrieve(
         VkCommandBuffer handle);
@@ -117,7 +121,7 @@ class Device
      * @param instance               The layer instance object this device is created with.
      * @param physicalDevice         The physical device this logical device is for.
      * @param device                 The device handle this device is created with.
-     * @param nlayerGetProcAddress   The vkGetProcAddress function in the driver/next layer down.
+     * @param nlayerGetProcAddress   The vkGetDeviceProcAddress function for the driver.
      */
     Device(
         Instance* instance,
@@ -128,21 +132,27 @@ class Device
     /**
      * @brief Destroy this layer device object.
      */
-    ~Device();
+    ~Device() = default;
 
     /**
-     * @brief Callback for sending messages
+     * @brief Callback for sending messages on frame boundary.
+     *
+     * @param message   The message to send.
      */
-    void onFrame(const std::string& message)
-    {
+    void onFrame(
+        const std::string& message
+    ) {
         commsWrapper->txMessage(message);
     }
 
     /**
-     * @brief Callback for sending messages
+     * @brief Callback for sending messages on workload submit to a queue.
+     *
+     * @param message   The message to send.
      */
-    void onWorkloadSubmit(const std::string& message)
-    {
+    void onWorkloadSubmit(
+        const std::string& message
+    ) {
         commsWrapper->txMessage(message);
     }
 
@@ -177,17 +187,17 @@ class Device
     const VkDevice device;
 
     /**
-     * @brief State tracking for this device;
+     * @brief State tracker for this device.
      */
     Tracker::Device stateTracker;
 
     /**
-     * @brief Communications module.
+     * @brief Shared network communications module.
      */
     static std::unique_ptr<Comms::CommsModule> commsModule;
 
     /**
-     * @brief Communications module message encoder.
+     * @brief Shared network communications message encoder.
      */
     static std::unique_ptr<TimelineComms> commsWrapper;
 };
diff --git a/layer_gpu_timeline/source/device_utils.hpp b/layer_gpu_timeline/source/device_utils.hpp
new file mode 100644
index 0000000..eddf193
--- /dev/null
+++ b/layer_gpu_timeline/source/device_utils.hpp
@@ -0,0 +1,56 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#pragma once
+
+#include <vulkan/vulkan.h>
+
+#include "framework/utils.hpp"
+
+#include "device.hpp"
+
+/**
+ * @brief Emit a start tag via a driver debug utils label.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param tagID           The tagID to emit into the label.
+ */
+[[maybe_unused]] static void emitStartTag(
+    Device* layer,
+    VkCommandBuffer commandBuffer,
+    uint64_t tagID
+) {
+    // Emit the unique workload tag into the command stream
+    std::string tagLabel = formatString("t%" PRIu64, tagID);
+    VkDebugUtilsLabelEXT tagInfo {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pNext = nullptr,
+        .pLabelName = tagLabel.c_str(),
+        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
+    };
+
+    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+}
diff --git a/layer_gpu_timeline/source/instance.cpp b/layer_gpu_timeline/source/instance.cpp
index 6ac278e..0b62857 100644
--- a/layer_gpu_timeline/source/instance.cpp
+++ b/layer_gpu_timeline/source/instance.cpp
@@ -29,6 +29,9 @@
 
 #include "instance.hpp"
 
+/**
+ * @brief The dispatch lookup for all of the created Vulkan instances.
+ */
 static std::unordered_map<void*, std::unique_ptr<Instance>> g_instances;
 
 /* See header for documentation. */
@@ -42,8 +45,8 @@ void Instance::store(
 
 /* See header for documentation. */
 Instance* Instance::retrieve(
-    VkInstance handle)
-{
+    VkInstance handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_instances));
     return g_instances.at(key).get();
@@ -51,8 +54,8 @@ Instance* Instance::retrieve(
 
 /* See header for documentation. */
 Instance* Instance::retrieve(
-    VkPhysicalDevice handle)
-{
+    VkPhysicalDevice handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_instances));
     return g_instances.at(key).get();
@@ -68,7 +71,8 @@ void Instance::destroy(
 /* See header for documentation. */
 Instance::Instance(
     VkInstance _instance,
-    PFN_vkGetInstanceProcAddr _nlayerGetProcAddress) :
+    PFN_vkGetInstanceProcAddr _nlayerGetProcAddress
+) :
     instance(_instance),
     nlayerGetProcAddress(_nlayerGetProcAddress)
 {
diff --git a/layer_gpu_timeline/source/instance.hpp b/layer_gpu_timeline/source/instance.hpp
index cfda54e..fc6af6b 100644
--- a/layer_gpu_timeline/source/instance.hpp
+++ b/layer_gpu_timeline/source/instance.hpp
@@ -42,9 +42,8 @@
  * Key properties
  * ==============
  *
- * Unlike EGL contexts, Vulkan instances are designed to be used concurrently
- * by multiple application threads. An application can have multiple concurrent
- * instances (although this is less common than with OpenGL ES applications),
+ * Vulkan instances are designed to be used concurrently by multiple
+ * application threads. An application can have multiple concurrent instances,
  * and use each instance from multiple threads.
  *
  * Access to the layer driver structures must therefore be kept thread-safe.
@@ -65,10 +64,6 @@
 
 /**
  * @brief This class implements the layer state tracker for a single instance.
- *
- * These objects are relatively light-weight, as they are rarely used once a VkDevice has been
- * created, but we need to track the chain-of-ownership as the instance is the root object that
- * the application creates when initializing a rendering context.
  */
 class Instance
 {
@@ -87,6 +82,8 @@ class Instance
      * @brief Fetch an instance from the global store of dispatchable instances.
      *
      * @param handle   The dispatchable instance handle to use as an indirect lookup.
+     *
+     * @return The layer instance context.
      */
     static Instance* retrieve(
         VkInstance handle);
@@ -95,6 +92,8 @@ class Instance
      * @brief Fetch an instance from the global store of dispatchable instances.
      *
      * @param handle   The dispatchable physical device handle to use as an indirect lookup.
+     *
+     * @return The layer instance context.
      */
     static Instance* retrieve(
         VkPhysicalDevice handle);
diff --git a/layer_gpu_timeline/source/layer_device_functions.hpp b/layer_gpu_timeline/source/layer_device_functions.hpp
index 3806398..8c2f8b5 100644
--- a/layer_gpu_timeline/source/layer_device_functions.hpp
+++ b/layer_gpu_timeline/source/layer_device_functions.hpp
@@ -23,6 +23,8 @@
  * ----------------------------------------------------------------------------
  */
 
+#pragma once
+
 #include <vulkan/vulkan.h>
 
 #include "framework/utils.hpp"
diff --git a/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp b/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp
index f0cc338..ef8e920 100644
--- a/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_command_buffer.cpp
@@ -23,9 +23,7 @@
  * ----------------------------------------------------------------------------
  */
 
-#include <memory>
 #include <mutex>
-#include <thread>
 
 #include "device.hpp"
 #include "layer_device_functions.hpp"
diff --git a/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp b/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp
index 31bc1b4..a640a90 100644
--- a/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_command_pool.cpp
@@ -23,9 +23,7 @@
  * ----------------------------------------------------------------------------
  */
 
-#include <memory>
 #include <mutex>
-#include <thread>
 
 #include "device.hpp"
 #include "layer_device_functions.hpp"
diff --git a/layer_gpu_timeline/source/layer_device_functions_debug.cpp b/layer_gpu_timeline/source/layer_device_functions_debug.cpp
index 4c1e1d9..1905193 100644
--- a/layer_gpu_timeline/source/layer_device_functions_debug.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_debug.cpp
@@ -23,9 +23,7 @@
  * ----------------------------------------------------------------------------
  */
 
-#include <memory>
 #include <mutex>
-#include <thread>
 
 #include "device.hpp"
 #include "layer_device_functions.hpp"
diff --git a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp
index 7555501..de5ee10 100644
--- a/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_dispatch.cpp
@@ -23,15 +23,25 @@
  * ----------------------------------------------------------------------------
  */
 
-#include <memory>
 #include <mutex>
-#include <thread>
 
 #include "device.hpp"
+#include "device_utils.hpp"
 #include "layer_device_functions.hpp"
 
 extern std::mutex g_vulkanLock;
 
+/**
+ * @brief Register a compute dispatch with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param groupX          The X size of the dispatch in groups.
+ * @param groupY          The Y size of the dispatch in groups.
+ * @param groupZ          The Z size of the dispatch in groups.
+ *
+ * @return The assigned tagID for the workload.
+ */
 static uint64_t registerDispatch(
     Device* layer,
     VkCommandBuffer commandBuffer,
@@ -44,23 +54,6 @@ static uint64_t registerDispatch(
     return cb.dispatch(groupX, groupY, groupZ);
 }
 
-static void emitStartTag(
-    Device* layer,
-    VkCommandBuffer commandBuffer,
-    uint64_t tagID
-) {
-    // Emit the unique workload tag into the command stream
-    std::string tagLabel = formatString("t%" PRIu64, tagID);
-    VkDebugUtilsLabelEXT tagInfo {
-        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
-        .pNext = nullptr,
-        .pLabelName = tagLabel.c_str(),
-        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
-    };
-
-    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
-}
-
 /* See Vulkan API for documentation. */
 template <>
 VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch<user_tag>(
diff --git a/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp b/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp
index 49cf669..42350d0 100644
--- a/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_draw_call.cpp
@@ -32,6 +32,12 @@
 
 extern std::mutex g_vulkanLock;
 
+/**
+ * @brief Register a draw call with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ */
 static void registerDrawCall(
     Device* layer,
     VkCommandBuffer commandBuffer
diff --git a/layer_gpu_timeline/source/layer_device_functions_queue.cpp b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
index 30ca611..a5c92e2 100644
--- a/layer_gpu_timeline/source/layer_device_functions_queue.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
@@ -23,12 +23,10 @@
  * ----------------------------------------------------------------------------
  */
 
-#include <memory>
 #include <mutex>
-#include <thread>
+#include <nlohmann/json.hpp>
 
 #include "utils/misc.hpp"
-#include "nlohmann/json.hpp"
 
 #include "device.hpp"
 #include "layer_device_functions.hpp"
diff --git a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp
index b59f305..5d16880 100644
--- a/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_render_pass.cpp
@@ -23,16 +23,15 @@
  * ----------------------------------------------------------------------------
  */
 
-#include <memory>
 #include <mutex>
-#include <thread>
-
-#include "device.hpp"
-#include "layer_device_functions.hpp"
 
 #include "framework/utils.hpp"
 #include "trackers/render_pass.hpp"
 
+#include "device.hpp"
+#include "device_utils.hpp"
+#include "layer_device_functions.hpp"
+
 extern std::mutex g_vulkanLock;
 
 /* See Vulkan API for documentation. */
@@ -166,18 +165,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass<user_tag>(
     // Notify the command buffer we are starting a new render pass
     uint64_t tagID = cb.renderPassBegin(rp, width, height);
 
-    // Emit the unique workload tag into the command stream
-    std::string tagLabel = formatString("t%" PRIu64, tagID);
-    VkDebugUtilsLabelEXT tagInfo {
-        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
-        .pNext = nullptr,
-        .pLabelName = tagLabel.c_str(),
-        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
-    };
-
     // Release the lock to call into the driver
     lock.unlock();
-    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+    emitStartTag(layer, commandBuffer, tagID);
     layer->driver.vkCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents);
 }
 
@@ -204,18 +194,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2<user_tag>(
     // Notify the command buffer we are starting a new render pass
     uint64_t tagID = cb.renderPassBegin(rp, width, height);
 
-    // Emit the unique workload tag into the command stream
-    std::string tagLabel = formatString("t%" PRIu64, tagID);
-    VkDebugUtilsLabelEXT tagInfo {
-        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
-        .pNext = nullptr,
-        .pLabelName = tagLabel.c_str(),
-        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
-    };
-
     // Release the lock to call into the driver
     lock.unlock();
-    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+    emitStartTag(layer, commandBuffer, tagID);
     layer->driver.vkCmdBeginRenderPass2(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
 }
 
@@ -242,18 +223,9 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR<user_tag>(
     // Notify the command buffer we are starting a new render pass
     uint64_t tagID = cb.renderPassBegin(rp, width, height);
 
-    // Emit the unique workload tag into the command stream
-    std::string tagLabel = formatString("t%" PRIu64, tagID);
-    VkDebugUtilsLabelEXT tagInfo {
-        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
-        .pNext = nullptr,
-        .pLabelName = tagLabel.c_str(),
-        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
-    };
-
     // Release the lock to call into the driver
     lock.unlock();
-    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+    emitStartTag(layer, commandBuffer, tagID);
     layer->driver.vkCmdBeginRenderPass2KHR(commandBuffer, pRenderPassBegin, pSubpassBeginInfo);
 }
 
@@ -286,22 +258,11 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering<user_tag>(
 
     // Release the lock to call into the driver
     lock.unlock();
-
     // Emit the label only for new render passes
     if (!resuming)
     {
-        // Emit the unique workload tag into the command stream
-        std::string tagLabel = formatString("t%" PRIu64, tagID);
-        VkDebugUtilsLabelEXT tagInfo {
-            .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
-            .pNext = nullptr,
-            .pLabelName = tagLabel.c_str(),
-            .color = { 0.0f, 0.0f, 0.0f, 0.0f }
-        };
-
-        layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+        emitStartTag(layer, commandBuffer, tagID);
     }
-
     layer->driver.vkCmdBeginRendering(commandBuffer, pRenderingInfo);
 }
 
@@ -334,22 +295,11 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR<user_tag>(
 
     // Release the lock to call into the driver
     lock.unlock();
-
     // Emit the label only for new render passes
     if (!resuming)
     {
-        // Emit the unique workload tag into the command stream
-        std::string tagLabel = formatString("t%" PRIu64, tagID);
-        VkDebugUtilsLabelEXT tagInfo {
-            .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
-            .pNext = nullptr,
-            .pLabelName = tagLabel.c_str(),
-            .color = { 0.0f, 0.0f, 0.0f, 0.0f }
-        };
-
-        layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
+        emitStartTag(layer, commandBuffer, tagID);
     }
-
     layer->driver.vkCmdBeginRenderingKHR(commandBuffer, pRenderingInfo);
 }
 
diff --git a/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp
index 2d99a3b..d453a29 100644
--- a/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp
@@ -28,10 +28,22 @@
 #include <thread>
 
 #include "device.hpp"
+#include "device_utils.hpp"
 #include "layer_device_functions.hpp"
 
 extern std::mutex g_vulkanLock;
 
+/**
+ * @brief Register a trace rays dispatch with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param itemsX          The X size of the dispatch in work items.
+ * @param itemsY          The Y size of the dispatch in work items.
+ * @param itemsZ          The Z size of the dispatch in work items.
+ *
+ * @return The assigned tagID for the workload.
+ */
 static uint64_t registerTraceRays(
     Device* layer,
     VkCommandBuffer commandBuffer,
@@ -44,23 +56,6 @@ static uint64_t registerTraceRays(
     return cb.traceRays(itemsX, itemsY, itemsZ);
 }
 
-static void emitStartTag(
-    Device* layer,
-    VkCommandBuffer commandBuffer,
-    uint64_t tagID
-) {
-    // Emit the unique workload tag into the command stream
-    std::string tagLabel = formatString("t%" PRIu64, tagID);
-    VkDebugUtilsLabelEXT tagInfo {
-        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
-        .pNext = nullptr,
-        .pLabelName = tagLabel.c_str(),
-        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
-    };
-
-    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
-}
-
 /* See Vulkan API for documentation. */
 template <>
 VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR<user_tag>(
diff --git a/layer_gpu_timeline/source/layer_device_functions_transfer.cpp b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp
index 066c23f..3aca1a7 100644
--- a/layer_gpu_timeline/source/layer_device_functions_transfer.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp
@@ -28,10 +28,21 @@
 #include <thread>
 
 #include "device.hpp"
+#include "device_utils.hpp"
 #include "layer_device_functions.hpp"
 
 extern std::mutex g_vulkanLock;
 
+/**
+ * @brief Register a transfer to a buffer with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param transferType    The type of transfer being performed.
+ * @param byteCount       The number of bytes transferred.
+ *
+ * @return The assigned tagID for the workload.
+ */
 static uint64_t registerBufferTransfer(
     Device* layer,
     VkCommandBuffer commandBuffer,
@@ -43,6 +54,16 @@ static uint64_t registerBufferTransfer(
     return cb.bufferTransfer(transferType, byteCount);
 }
 
+/**
+ * @brief Register a transfer to an image with the tracker.
+ *
+ * @param layer           The layer context for the device.
+ * @param commandBuffer   The command buffer we are recording.
+ * @param transferType    The type of transfer being performed.
+ * @param pixelCount      The number of pixels transferred.
+ *
+ * @return The assigned tagID for the workload.
+ */
 static uint64_t registerImageTransfer(
     Device* layer,
     VkCommandBuffer commandBuffer,
@@ -54,23 +75,6 @@ static uint64_t registerImageTransfer(
     return cb.imageTransfer(transferType, pixelCount);
 }
 
-static void emitStartTag(
-    Device* layer,
-    VkCommandBuffer commandBuffer,
-    uint64_t tagID
-) {
-    // Emit the unique workload tag into the command stream
-    std::string tagLabel = formatString("t%" PRIu64, tagID);
-    VkDebugUtilsLabelEXT tagInfo {
-        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
-        .pNext = nullptr,
-        .pLabelName = tagLabel.c_str(),
-        .color = { 0.0f, 0.0f, 0.0f, 0.0f }
-    };
-
-    layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, &tagInfo);
-}
-
 // Commands for transfers
 
 /* See Vulkan API for documentation. */
diff --git a/layer_gpu_timeline/source/timeline_comms.cpp b/layer_gpu_timeline/source/timeline_comms.cpp
index fbb496a..125acef 100644
--- a/layer_gpu_timeline/source/timeline_comms.cpp
+++ b/layer_gpu_timeline/source/timeline_comms.cpp
@@ -27,16 +27,19 @@
 
 #include "timeline_comms.hpp"
 
+/* See header for documentation. */
 TimelineComms::TimelineComms(
-    Comms::CommsInterface& commsIf)
+    Comms::CommsInterface& _comms
+):
+    comms(_comms)
 {
-    comms = &commsIf;
-    if (comms->isConnected())
+    if (comms.isConnected())
     {
-        endpoint = comms->getEndpointID("GPUTimeline");
+        endpoint = comms.getEndpointID("GPUTimeline");
     }
 }
 
+/* See header for documentation. */
 void TimelineComms::txMessage(
     const std::string& message)
 {
@@ -47,5 +50,5 @@ void TimelineComms::txMessage(
     }
 
     auto data = std::make_unique<Comms::MessageData>(message.begin(), message.end());
-    comms->txAsync(endpoint, std::move(data));
+    comms.txAsync(endpoint, std::move(data));
 }
diff --git a/layer_gpu_timeline/source/timeline_comms.hpp b/layer_gpu_timeline/source/timeline_comms.hpp
index 435f37a..84a3c8a 100644
--- a/layer_gpu_timeline/source/timeline_comms.hpp
+++ b/layer_gpu_timeline/source/timeline_comms.hpp
@@ -23,20 +23,49 @@
  * ----------------------------------------------------------------------------
  */
 
+/**
+ * @file Declares a simple comms encoded for the timeline layer.
+ */
+
 #pragma once
 
 #include "comms/comms_interface.hpp"
 
+/**
+ * @brief A simple message encoder for the timeline comms endpoint.
+ *
+ * TODO: This is currently a very simple implementation because we are simply
+ * passing JSON strings around. This is not the most efficient way of doing
+ * this and in future this module will be used to implement binary encoders
+ * for each specific message type that needs sending.
+ */
 class TimelineComms
 {
 public:
+    /**
+     * @brief Construct a new encoder.
+     *
+     * @param comms   The common comms module used by all services.
+     */
     TimelineComms(
         Comms::CommsInterface& comms);
 
+    /**
+     * @brief Send a message to the GPU timeline endpoint service.
+     *
+     * @param message   The message to send.
+     */
     void txMessage(
         const std::string& message);
 
 private:
+    /**
+     * @brief The endpoint ID of the service, or 0 if not found.
+     */
     Comms::EndpointID endpoint { 0 };
-    Comms::CommsInterface* comms { nullptr };
+
+    /**
+     * @brief The common module for network messaging.
+     */
+    Comms::CommsInterface& comms;
 };
diff --git a/layer_gpu_timeline/source/version.hpp.in b/layer_gpu_timeline/source/version.hpp.in
index 50c30b9..5fcb9c3 100644
--- a/layer_gpu_timeline/source/version.hpp.in
+++ b/layer_gpu_timeline/source/version.hpp.in
@@ -24,9 +24,7 @@
  */
 
 /**
- * @file
- * This header implements placeholder templates that are populated by CMake
- * during configure.
+ * @file Placeholder templates that are populated by CMake during configure.
  */
 
 #pragma once
diff --git a/source_common/comms/comms_message.cpp b/source_common/comms/comms_message.cpp
index 0e3df99..03f0e9f 100644
--- a/source_common/comms/comms_message.cpp
+++ b/source_common/comms/comms_message.cpp
@@ -37,7 +37,8 @@ Message::Message(
     EndpointID _endpointID,
     MessageType _messageType,
     MessageID _messageID,
-    std::unique_ptr<MessageData> _transmitData) :
+    std::unique_ptr<MessageData> _transmitData
+) :
     endpointID(_endpointID),
     messageType(_messageType),
     messageID(_messageID),
diff --git a/source_common/comms/comms_module.cpp b/source_common/comms/comms_module.cpp
index 42c815e..335b79f 100644
--- a/source_common/comms/comms_module.cpp
+++ b/source_common/comms/comms_module.cpp
@@ -42,7 +42,7 @@
 namespace Comms
 {
 
-/** See header for documentation. */
+/* See header for documentation. */
 CommsModule::CommsModule(
     const std::string& domainAddress
 ) {
@@ -76,7 +76,7 @@ CommsModule::CommsModule(
     receiver = std::make_unique<Receiver>(*this);
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 CommsModule::CommsModule(
     const std::string& hostAddress,
     int port
@@ -109,7 +109,7 @@ CommsModule::CommsModule(
     receiver = std::make_unique<Receiver>(*this);
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 CommsModule::~CommsModule()
 {
     // Stop async worker threads before closing the socket
@@ -132,13 +132,13 @@ CommsModule::~CommsModule()
     }
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 bool CommsModule::isConnected()
 {
     return sockfd >= 0;
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 EndpointID CommsModule::getEndpointID(
     const std::string& name
 ) {
@@ -192,7 +192,7 @@ EndpointID CommsModule::getEndpointID(
     }
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void CommsModule::txAsync(
     EndpointID endpoint,
     std::unique_ptr<MessageData> data
@@ -206,7 +206,7 @@ void CommsModule::txAsync(
     enqueueMessage(std::move(message));
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void CommsModule::tx(
     EndpointID endpoint,
     std::unique_ptr<MessageData> data
@@ -221,7 +221,7 @@ void CommsModule::tx(
     message->wait();
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 std::unique_ptr<MessageData> CommsModule::txRx(
     EndpointID endpoint,
     std::unique_ptr<MessageData> data
@@ -238,20 +238,20 @@ std::unique_ptr<MessageData> CommsModule::txRx(
     return std::move(message->responseData);
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 MessageID CommsModule::assignMessageID()
 {
     return nextMessageID.fetch_add(1, std::memory_order_relaxed);
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void CommsModule::enqueueMessage(
     std::shared_ptr<Message> message
 ) {
     messageQueue.put(std::move(message));
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 std::shared_ptr<Message> CommsModule::dequeueMessage()
 {
     return messageQueue.get();
diff --git a/source_common/comms/comms_receiver.cpp b/source_common/comms/comms_receiver.cpp
index 6c3e40d..d27b868 100644
--- a/source_common/comms/comms_receiver.cpp
+++ b/source_common/comms/comms_receiver.cpp
@@ -37,13 +37,15 @@
 #include "comms/comms_receiver.hpp"
 #include "comms/comms_module.hpp"
 #include "framework/utils.hpp"
+#include "utils/misc.hpp"
 
 namespace Comms
 {
-/** See header for documentation. */
+/* See header for documentation. */
 Receiver::Receiver(
     CommsModule& _parent
-) : parent(_parent)
+) :
+    parent(_parent)
 {
     int pipe_err = pipe(stopRequestPipe);
     if (pipe_err)
@@ -55,7 +57,7 @@ Receiver::Receiver(
     worker = std::thread(&Receiver::runReceiver, this);
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 Receiver::~Receiver()
 {
     // Stop the worker thread if it's not stopped already
@@ -69,7 +71,7 @@ Receiver::~Receiver()
     close(stopRequestPipe[1]);
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void Receiver::stop()
 {
     // Mark the engine as stopping
@@ -77,13 +79,14 @@ void Receiver::stop()
 
     // Poke the pipe to wake the worker thread if it is blocked on a read
     int data = 0xdead;
-    [[maybe_unused]] int _ = write(stopRequestPipe[1], &data, sizeof(int));
+    int ret = write(stopRequestPipe[1], &data, sizeof(int));
+    UNUSED(ret);
 
     // Join on the worker thread
     worker.join();
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void Receiver::parkMessage(
     std::shared_ptr<Message> message
 ) {
@@ -91,7 +94,7 @@ void Receiver::parkMessage(
     parkingBuffer.insert({ message->messageID, std::move(message) });
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void Receiver::runReceiver()
 {
     while (!stopRequested)
@@ -119,7 +122,7 @@ void Receiver::runReceiver()
     }
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void Receiver::wakeMessage(
     MessageID messageID,
     std::unique_ptr<MessageData> data
@@ -142,7 +145,7 @@ void Receiver::wakeMessage(
     message->notify();
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 bool Receiver::receiveData(
     uint8_t* data,
     size_t dataSize
diff --git a/source_common/comms/comms_transmitter.cpp b/source_common/comms/comms_transmitter.cpp
index dddd290..3088754 100644
--- a/source_common/comms/comms_transmitter.cpp
+++ b/source_common/comms/comms_transmitter.cpp
@@ -37,16 +37,17 @@
 namespace Comms
 {
 
-/** See header for documentation. */
+/* See header for documentation. */
 Transmitter::Transmitter(
     CommsModule& _parent
-) : parent(_parent)
+) :
+    parent(_parent)
 {
     // Create and start a worker thread
     worker = std::thread(&Transmitter::runTransmitter, this);
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 Transmitter::~Transmitter()
 {
     // Stop the worker thread if it's not stopped already
@@ -56,7 +57,7 @@ Transmitter::~Transmitter()
     }
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void Transmitter::runTransmitter()
 {
     // Keep looping until we are told to stop and message queue is empty
@@ -87,7 +88,7 @@ void Transmitter::runTransmitter()
     }
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void Transmitter::stop()
 {
     // Mark the engine as stopping
@@ -103,7 +104,7 @@ void Transmitter::stop()
     worker.join();
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void Transmitter::sendMessage(
     const Message& message
 ) {
@@ -124,7 +125,7 @@ void Transmitter::sendMessage(
     sendData(data, dataSize);
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void Transmitter::sendData(
     uint8_t* data,
     size_t dataSize
diff --git a/source_common/comms/test/comms_test_server.cpp b/source_common/comms/test/comms_test_server.cpp
index de64196..116cfb2 100644
--- a/source_common/comms/test/comms_test_server.cpp
+++ b/source_common/comms/test/comms_test_server.cpp
@@ -40,7 +40,7 @@
 namespace CommsTest
 {
 
-/** See header for documentation. */
+/* See header for documentation. */
 CommsTestServer::CommsTestServer(
     const std::string& domainAddress
 ) {
@@ -94,7 +94,7 @@ CommsTestServer::CommsTestServer(
     worker = std::thread(&CommsTestServer::runServer, this);
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 CommsTestServer::CommsTestServer(
     int port
 ) {
@@ -152,7 +152,7 @@ CommsTestServer::CommsTestServer(
     worker = std::thread(&CommsTestServer::runServer, this);
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 CommsTestServer::~CommsTestServer()
 {
     // Stop the worker thread if it's not stopped already
@@ -172,7 +172,7 @@ CommsTestServer::~CommsTestServer()
     close(stopRequestPipe[1]);
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void CommsTestServer::stop()
 {
     // Mark the engine as stopping
@@ -186,7 +186,7 @@ void CommsTestServer::stop()
     worker.join();
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void CommsTestServer::runServer()
 {
     int dataSockfd = accept(listenSockfd, NULL, NULL);
@@ -251,7 +251,7 @@ void CommsTestServer::runServer()
     close(dataSockfd);
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 bool CommsTestServer::receiveData(
     int sockfd,
     uint8_t* data,
@@ -297,7 +297,7 @@ bool CommsTestServer::receiveData(
     return true;
 }
 
-/** See header for documentation. */
+/* See header for documentation. */
 void CommsTestServer::send_data(
     int sockfd,
     uint8_t* data,
diff --git a/source_common/framework/device_functions.cpp b/source_common/framework/device_functions.cpp
index 52c2dae..805aefb 100644
--- a/source_common/framework/device_functions.cpp
+++ b/source_common/framework/device_functions.cpp
@@ -23,9 +23,7 @@
  * ----------------------------------------------------------------------------
  */
 
-#include <memory>
 #include <mutex>
-#include <thread>
 
 // Include from per-layer code
 #include "utils.hpp"
diff --git a/source_common/framework/instance_functions.cpp b/source_common/framework/instance_functions.cpp
index 3c613f3..02ecf28 100644
--- a/source_common/framework/instance_functions.cpp
+++ b/source_common/framework/instance_functions.cpp
@@ -23,9 +23,7 @@
  * ----------------------------------------------------------------------------
  */
 
-#include <memory>
 #include <mutex>
-#include <thread>
 
 // Include from per-layer code
 #include "device.hpp"
diff --git a/source_common/trackers/command_buffer.hpp b/source_common/trackers/command_buffer.hpp
index ea474c0..3962f48 100644
--- a/source_common/trackers/command_buffer.hpp
+++ b/source_common/trackers/command_buffer.hpp
@@ -43,7 +43,6 @@
 
 #pragma once
 
-#include <atomic>
 #include <memory>
 #include <string>
 #include <unordered_map>
@@ -168,6 +167,8 @@ class CommandBuffer
 
     /**
      * @brief Begin a user debug marker range.
+     *
+     * @param marker   The marker label.
      */
     void debugMarkerBegin(
         std::string marker);
@@ -191,7 +192,7 @@ class CommandBuffer
     /**
      * @brief Begin recording back into the @a Recording state.
      *
-     * @param oneTimeSubmit   Is this a one-time submit recording.
+     * @param oneTimeSubmit   Is this a one-time submit recording?
      */
     void begin(
         bool oneTimeSubmit);
@@ -254,14 +255,16 @@ class CommandPool
      *
      * \return The layer wrapper object for the command buffer.
      */
-    CommandBuffer& allocateCommandBuffer(VkCommandBuffer commandBuffer);
+    CommandBuffer& allocateCommandBuffer(
+        VkCommandBuffer commandBuffer);
 
     /**
      * @brief Free the command buffer in the pool with the given handle.
      *
      * @param commandBuffer   The Vulkan handle of the command buffer to free.
      */
-    void freeCommandBuffer(VkCommandBuffer commandBuffer);
+    void freeCommandBuffer(
+        VkCommandBuffer commandBuffer);
 
     /**
      * @brief Reset all allocated command buffers into the @a Initial state.
diff --git a/source_common/trackers/device.cpp b/source_common/trackers/device.cpp
index a1f0687..af5fee7 100644
--- a/source_common/trackers/device.cpp
+++ b/source_common/trackers/device.cpp
@@ -23,12 +23,6 @@
  * ----------------------------------------------------------------------------
  */
 
-#include <array>
-#include <iostream>
-#include <fstream>
-#include <sys/stat.h>
-#include <vector>
-
 #include "trackers/device.hpp"
 #include "utils/misc.hpp"
 
@@ -70,12 +64,10 @@ void Device::allocateCommandBuffer(
     auto& buffer = pool.allocateCommandBuffer(commandBuffer);
 
     // Insert into the tracker lookup map
-    [[maybe_unused]] auto result = commandBuffers.insert({
+    commandBuffers.insert({
         commandBuffer,
         buffer
     });
-
-    assert(result.second);
 }
 
 /* See header for documentation. */
diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp
index 90c8c72..e3df7d8 100644
--- a/source_common/trackers/layer_command_stream.cpp
+++ b/source_common/trackers/layer_command_stream.cpp
@@ -24,8 +24,7 @@
  */
 
 #include <cassert>
-
-#include "nlohmann/json.hpp"
+#include <nlohmann/json.hpp>
 
 #include "trackers/layer_command_stream.hpp"
 
@@ -37,7 +36,8 @@ namespace Tracker
 std::atomic<uint64_t> LCSWorkload::nextTagID { 1 };
 
 LCSWorkload::LCSWorkload(
-    uint64_t _tagID):
+    uint64_t _tagID
+):
     tagID(_tagID)
 {
 
@@ -45,7 +45,8 @@ LCSWorkload::LCSWorkload(
 
 /* See header for details. */
 LCSMarker::LCSMarker(
-    const std::string& _label) :
+    const std::string& _label
+) :
     LCSWorkload(0),
     label(_label)
 {
@@ -59,14 +60,15 @@ LCSRenderPass::LCSRenderPass(
     uint32_t _width,
     uint32_t _height,
     bool _suspending,
-    bool _oneTimeSubmit) :
+    bool _oneTimeSubmit
+) :
     LCSWorkload(_tagID),
     width(_width),
     height(_height),
     suspending(_suspending),
     oneTimeSubmit(_oneTimeSubmit)
 {
-    // Copy these as the renderpass object may be transient.
+    // Copy these as the render pass object may be transient.
     subpassCount = renderPass.getSubpassCount();
     attachments = renderPass.getAttachments();
 }
@@ -188,7 +190,8 @@ LCSDispatch::LCSDispatch(
     uint64_t _tagID,
     int64_t _xGroups,
     int64_t _yGroups,
-    int64_t _zGroups) :
+    int64_t _zGroups
+) :
     LCSWorkload(_tagID),
     xGroups(_xGroups),
     yGroups(_yGroups),
@@ -227,7 +230,8 @@ LCSTraceRays::LCSTraceRays(
     uint64_t _tagID,
     int64_t _xItems,
     int64_t _yItems,
-    int64_t _zItems) :
+    int64_t _zItems
+) :
     LCSWorkload(_tagID),
     xItems(_xItems),
     yItems(_yItems),
@@ -265,7 +269,8 @@ std::string LCSTraceRays::getMetadata(
 LCSImageTransfer::LCSImageTransfer(
     uint64_t _tagID,
     const std::string& _transferType,
-    int64_t _pixelCount):
+    int64_t _pixelCount
+):
     LCSWorkload(_tagID),
     transferType(_transferType),
     pixelCount(_pixelCount)
@@ -301,7 +306,8 @@ std::string LCSImageTransfer::getMetadata(
 LCSBufferTransfer::LCSBufferTransfer(
     uint64_t _tagID,
     const std::string& _transferType,
-    int64_t _byteCount):
+    int64_t _byteCount
+):
     LCSWorkload(_tagID),
     transferType(_transferType),
     byteCount(_byteCount)
diff --git a/source_common/trackers/queue.hpp b/source_common/trackers/queue.hpp
index ff62b87..b57a07b 100644
--- a/source_common/trackers/queue.hpp
+++ b/source_common/trackers/queue.hpp
@@ -64,6 +64,9 @@ class Queue
 
     /**
      * @brief Execute a layer command stream.
+     *
+     * @param stream     The layer command stream to execute.
+     * @param callback   The callback to pass submitted workloads to.
      */
     void runSubmitCommandStream(
         const std::vector<LCSInstruction>& stream,
@@ -91,7 +94,7 @@ class Queue
     std::vector<std::string> debugStack;
 
     /**
-     * @brief The last non-zero renderpass tagID submitted.
+     * @brief The last non-zero render pass tagID submitted.
      */
     uint64_t lastRenderPassTagID { 0 };
 
@@ -99,8 +102,6 @@ class Queue
      * @brief The command buffer submitID allocator.
      */
     static std::atomic<uint64_t> nextSubmitID;
-
-
 };
 
 }
diff --git a/source_common/trackers/render_pass.cpp b/source_common/trackers/render_pass.cpp
index 181cf53..8a6da5e 100644
--- a/source_common/trackers/render_pass.cpp
+++ b/source_common/trackers/render_pass.cpp
@@ -35,7 +35,8 @@ RenderPassAttachment::RenderPassAttachment(
     RenderPassAttachName _name,
     VkAttachmentLoadOp _loadOp,
     VkAttachmentStoreOp _storeOp,
-    bool _resolve) :
+    bool _resolve
+) :
     name(_name),
     loadOp(_loadOp),
     storeOp(_storeOp),
@@ -44,10 +45,43 @@ RenderPassAttachment::RenderPassAttachment(
 
 }
 
+/* See header for details. */
+std::string RenderPassAttachment::getAttachmentStr() const
+{
+    switch(name)
+    {
+    case RenderPassAttachName::COLOR0:
+        return "C0";
+    case RenderPassAttachName::COLOR1:
+        return "C1";
+    case RenderPassAttachName::COLOR2:
+        return "C2";
+    case RenderPassAttachName::COLOR3:
+        return "C3";
+    case RenderPassAttachName::COLOR4:
+        return "C4";
+    case RenderPassAttachName::COLOR5:
+        return "C5";
+    case RenderPassAttachName::COLOR6:
+        return "C6";
+    case RenderPassAttachName::COLOR7:
+        return "C7";
+    case RenderPassAttachName::DEPTH:
+        return "D";
+    case RenderPassAttachName::STENCIL:
+        return "S";
+    default:
+        assert(false);
+    }
+
+    return "U";
+}
+
 /* See header for details. */
 RenderPass::RenderPass(
     VkRenderPass _handle,
-    const VkRenderPassCreateInfo& createInfo) :
+    const VkRenderPassCreateInfo& createInfo
+) :
     handle(_handle)
 {
     subpassCount = createInfo.subpassCount;
@@ -155,7 +189,8 @@ RenderPass::RenderPass(
 /* See header for details. */
 RenderPass::RenderPass(
     VkRenderPass _handle,
-    const VkRenderPassCreateInfo2& createInfo) :
+    const VkRenderPassCreateInfo2& createInfo
+) :
     handle(_handle)
 {
     subpassCount = createInfo.subpassCount;
@@ -262,7 +297,8 @@ RenderPass::RenderPass(
 
 /* See header for details. */
 RenderPass::RenderPass(
-    const VkRenderingInfo& createInfo) :
+    const VkRenderingInfo& createInfo
+) :
     handle(VK_NULL_HANDLE)
 {
     // No subpasses in dynamic rendering
diff --git a/source_common/trackers/render_pass.hpp b/source_common/trackers/render_pass.hpp
index fc5044e..92d964d 100644
--- a/source_common/trackers/render_pass.hpp
+++ b/source_common/trackers/render_pass.hpp
@@ -61,58 +61,61 @@ enum class RenderPassAttachName
 };
 
 /**
- * @brief The state tracker for a render pass.
+ * @brief The state tracker for a single render pass attachment.
  */
 class RenderPassAttachment
 {
 public:
+    /**
+     * @brief Construct a new render pass attachment tracker.
+     *
+     * @param name      The name of the attachment point.
+     * @param loadOp    The render pass loadOp for this attachment.
+     * @param storeOp   The render pass storeOp for this attachment.
+     * @param resolve   Is this a resolve attachment or the main attachment?
+     */
     RenderPassAttachment(
         RenderPassAttachName name,
         VkAttachmentLoadOp loadOp,
         VkAttachmentStoreOp storeOp,
         bool resolve);
 
-    std::string getAttachmentStr() const
-    {
-        switch(name)
-        {
-        case RenderPassAttachName::COLOR0:
-            return "C0";
-        case RenderPassAttachName::COLOR1:
-            return "C1";
-        case RenderPassAttachName::COLOR2:
-            return "C2";
-        case RenderPassAttachName::COLOR3:
-            return "C3";
-        case RenderPassAttachName::COLOR4:
-            return "C4";
-        case RenderPassAttachName::COLOR5:
-            return "C5";
-        case RenderPassAttachName::COLOR6:
-            return "C6";
-        case RenderPassAttachName::COLOR7:
-            return "C7";
-        case RenderPassAttachName::DEPTH:
-            return "D";
-        case RenderPassAttachName::STENCIL:
-            return "S";
-        default:
-            assert(false);
-        }
-
-        return "U";
-    }
+    /**
+     * @brief Get a string form of the attachment point name.
+     *
+     * @return The attachment point name.
+     */
+    std::string getAttachmentStr() const;
 
+    /**
+     * @brief Is this attachment loaded at the start of the render pass?
+     *
+     * @return @c true if loaded from memory.
+     */
     bool isLoaded() const
     {
         return loadOp == VK_ATTACHMENT_LOAD_OP_LOAD;
     }
 
+    /**
+     * @brief Is this attachment stored at the end of the render pass?
+     *
+     * @return @c true if stored to memory.
+     */
     bool isStored() const
     {
-        return storeOp ==  VK_ATTACHMENT_STORE_OP_STORE;
+        return storeOp == VK_ATTACHMENT_STORE_OP_STORE;
     }
 
+
+    /**
+     * @brief Is this attachment a resolve attachment?
+     *
+     * Note that in dynamic rendering resolve attachments are implicitly not
+     * loaded and stored. There are no explicit settings for this.
+     *
+     * @return @c true if this is a resolve attachment.
+     */
     bool isResolved() const
     {
         return resolve;
@@ -146,22 +149,48 @@ class RenderPassAttachment
 class RenderPass
 {
 public:
+    /**
+     * @brief Construct a new render pass from Vulkan 1.0-style render passes.
+     *
+     * @param handle       The driver handle of the render pass.
+     * @param createInfo   The API context creating the render pass.
+     */
     RenderPass(
         VkRenderPass handle,
         const VkRenderPassCreateInfo& createInfo);
 
+    /**
+     * @brief Construct a new render pass from Vulkan 1.0-style render passes.
+     *
+     * @param handle       The driver handle of the render pass.
+     * @param createInfo   The API context creating the render pass.
+     */
     RenderPass(
         VkRenderPass handle,
         const VkRenderPassCreateInfo2& createInfo);
 
+    /**
+     * @brief Construct a new render pass from Vulkan 1.3 dynamic rendering.
+     *
+     * @param createInfo   The API context starting the render pass.
+     */
     RenderPass(
         const VkRenderingInfo& createInfo);
 
+    /**
+     * @brief Get the number of subpasses in the render pass.
+     *
+     * @return The number of subpasses. Always returns 1 for dynamic render
+     *         passes which no longer use subpasses.
+     */
     uint32_t getSubpassCount() const
     {
         return subpassCount;
     };
 
+    /**
+     * @brief Get the attachment list for the render pass.
+     */
     const std::vector<RenderPassAttachment>& getAttachments() const
     {
         return attachments;
diff --git a/source_common/utils/misc.hpp b/source_common/utils/misc.hpp
index e38e0b7..8709e2e 100644
--- a/source_common/utils/misc.hpp
+++ b/source_common/utils/misc.hpp
@@ -130,8 +130,8 @@ bool isInMap(
 /**
  * @brief Append all values in one vector to the back of another.
  *
- * @param src   The destination vector to append to.
- * @param dst   The source vector; must not be src vector.
+ * @param dst   The destination vector to append to; must not be source vector.
+ * @param src   The source vector to append.
  */
 template<typename T>
 void vecAppend(
@@ -149,7 +149,12 @@ void vecAppend(
 /**
  * @brief Get a displayable pointer.
  *
- * On 64-bit Arm systems this strips the MTE tag in the top byte.
+ * On 64-bit Arm systems this strips the MTE tag in the top byte, which means
+ * that the pointer cannot be converted back into a usable pointer without
+ * triggering an MTE tag violation, so the returns value is for cosmetic use
+ * only.
+ *
+ * @param pointer   The pointer to display.
  *
  * @return The displayable pointer.
  */
diff --git a/source_common/utils/queue.hpp b/source_common/utils/queue.hpp
index 9702bed..4434837 100644
--- a/source_common/utils/queue.hpp
+++ b/source_common/utils/queue.hpp
@@ -36,7 +36,7 @@
 #include <condition_variable>
 
 /**
- * @brief Baseclass for a task.
+ * @brief Base class for a task.
  */
 class Task
 {
@@ -98,8 +98,9 @@ class TaskQueue
      *
      * @param task   The new task to append to the queue.
      */
-    void put(T task)
-    {
+    void put(
+        T task
+    ) {
         std::lock_guard<std::mutex> lock(store_lock);
         store.push_back(task);
         condition.notify_one();

From bc8c3c98d7dd987c9c56a086291cdb4ec7588dfa Mon Sep 17 00:00:00 2001
From: Peter Harris <peter.harris@arm.com>
Date: Thu, 12 Dec 2024 22:45:12 +0000
Subject: [PATCH 8/8] Update example to generator version

---
 layer_example/source/device.cpp     | 24 +++++++++++-------------
 layer_example/source/device.hpp     | 18 +++++++++++-------
 layer_example/source/instance.cpp   | 14 +++++++++-----
 layer_example/source/instance.hpp   | 13 ++++++-------
 layer_example/source/version.hpp.in |  4 +---
 5 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/layer_example/source/device.cpp b/layer_example/source/device.cpp
index 580e339..3371cff 100644
--- a/layer_example/source/device.cpp
+++ b/layer_example/source/device.cpp
@@ -34,6 +34,9 @@
 #include "device.hpp"
 #include "instance.hpp"
 
+/**
+ * @brief The dispatch lookup for all of the created Vulkan instances.
+ */
 static std::unordered_map<void*, std::unique_ptr<Device>> g_devices;
 
 /* See header for documentation. */
@@ -47,8 +50,8 @@ void Device::store(
 
 /* See header for documentation. */
 Device* Device::retrieve(
-    VkDevice handle)
-{
+    VkDevice handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_devices));
     return g_devices.at(key).get();
@@ -56,8 +59,8 @@ Device* Device::retrieve(
 
 /* See header for documentation. */
 Device* Device::retrieve(
-    VkQueue handle)
-{
+    VkQueue handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_devices));
     return g_devices.at(key).get();
@@ -65,8 +68,8 @@ Device* Device::retrieve(
 
 /* See header for documentation. */
 Device* Device::retrieve(
-    VkCommandBuffer handle)
-{
+    VkCommandBuffer handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_devices));
     return g_devices.at(key).get();
@@ -85,15 +88,10 @@ Device::Device(
     VkPhysicalDevice _physicalDevice,
     VkDevice _device,
     PFN_vkGetDeviceProcAddr nlayerGetProcAddress
-):  instance(_instance),
+):
+    instance(_instance),
     physicalDevice(_physicalDevice),
     device(_device)
 {
     initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver);
 }
-
-/* See header for documentation. */
-Device::~Device()
-{
-
-}
diff --git a/layer_example/source/device.hpp b/layer_example/source/device.hpp
index d6ecad7..c0e1f0a 100644
--- a/layer_example/source/device.hpp
+++ b/layer_example/source/device.hpp
@@ -24,8 +24,7 @@
  */
 
 /**
- * @file
- * Declares the root class for layer management of VkDevice objects.
+ * @file Declares the root class for layer management of VkDevice objects.
  *
  * Role summary
  * ============
@@ -41,10 +40,9 @@
  * Key properties
  * ==============
  *
- * Unlike EGL contexts, Vulkan devices are designed to be used concurrently by
- * multiple application threads. An application can have multiple concurrent
- * devices (although this is less common than with OpenGL ES applications), and
- * use each device from multiple threads.
+ * Vulkan devices are designed to be used concurrently by multiple application
+ * threads. An application can have multiple concurrent devices, and use each
+ * device from multiple threads.
  *
  * Access to the layer driver structures must therefore be kept thread-safe.
  * For sake of simplicity, we generally implement this by:
@@ -80,6 +78,8 @@ class Device
      * @brief Fetch a device from the global store of dispatchable devices.
      *
      * @param handle   The dispatchable device handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
      */
     static Device* retrieve(
         VkDevice handle);
@@ -88,6 +88,8 @@ class Device
      * @brief Fetch a device from the global store of dispatchable devices.
      *
      * @param handle   The dispatchable queue handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
      */
     static Device* retrieve(
         VkQueue handle);
@@ -96,6 +98,8 @@ class Device
      * @brief Fetch a device from the global store of dispatchable devices.
      *
      * @param handle   The dispatchable command buffer handle to use as an indirect lookup.
+     *
+     * @return The layer device context.
      */
     static Device* retrieve(
         VkCommandBuffer handle);
@@ -125,7 +129,7 @@ class Device
     /**
      * @brief Destroy this layer device object.
      */
-    ~Device();
+    ~Device() = default;
 
 public:
     /**
diff --git a/layer_example/source/instance.cpp b/layer_example/source/instance.cpp
index 6ac278e..0b62857 100644
--- a/layer_example/source/instance.cpp
+++ b/layer_example/source/instance.cpp
@@ -29,6 +29,9 @@
 
 #include "instance.hpp"
 
+/**
+ * @brief The dispatch lookup for all of the created Vulkan instances.
+ */
 static std::unordered_map<void*, std::unique_ptr<Instance>> g_instances;
 
 /* See header for documentation. */
@@ -42,8 +45,8 @@ void Instance::store(
 
 /* See header for documentation. */
 Instance* Instance::retrieve(
-    VkInstance handle)
-{
+    VkInstance handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_instances));
     return g_instances.at(key).get();
@@ -51,8 +54,8 @@ Instance* Instance::retrieve(
 
 /* See header for documentation. */
 Instance* Instance::retrieve(
-    VkPhysicalDevice handle)
-{
+    VkPhysicalDevice handle
+) {
     void* key = getDispatchKey(handle);
     assert(isInMap(key, g_instances));
     return g_instances.at(key).get();
@@ -68,7 +71,8 @@ void Instance::destroy(
 /* See header for documentation. */
 Instance::Instance(
     VkInstance _instance,
-    PFN_vkGetInstanceProcAddr _nlayerGetProcAddress) :
+    PFN_vkGetInstanceProcAddr _nlayerGetProcAddress
+) :
     instance(_instance),
     nlayerGetProcAddress(_nlayerGetProcAddress)
 {
diff --git a/layer_example/source/instance.hpp b/layer_example/source/instance.hpp
index cfda54e..fc6af6b 100644
--- a/layer_example/source/instance.hpp
+++ b/layer_example/source/instance.hpp
@@ -42,9 +42,8 @@
  * Key properties
  * ==============
  *
- * Unlike EGL contexts, Vulkan instances are designed to be used concurrently
- * by multiple application threads. An application can have multiple concurrent
- * instances (although this is less common than with OpenGL ES applications),
+ * Vulkan instances are designed to be used concurrently by multiple
+ * application threads. An application can have multiple concurrent instances,
  * and use each instance from multiple threads.
  *
  * Access to the layer driver structures must therefore be kept thread-safe.
@@ -65,10 +64,6 @@
 
 /**
  * @brief This class implements the layer state tracker for a single instance.
- *
- * These objects are relatively light-weight, as they are rarely used once a VkDevice has been
- * created, but we need to track the chain-of-ownership as the instance is the root object that
- * the application creates when initializing a rendering context.
  */
 class Instance
 {
@@ -87,6 +82,8 @@ class Instance
      * @brief Fetch an instance from the global store of dispatchable instances.
      *
      * @param handle   The dispatchable instance handle to use as an indirect lookup.
+     *
+     * @return The layer instance context.
      */
     static Instance* retrieve(
         VkInstance handle);
@@ -95,6 +92,8 @@ class Instance
      * @brief Fetch an instance from the global store of dispatchable instances.
      *
      * @param handle   The dispatchable physical device handle to use as an indirect lookup.
+     *
+     * @return The layer instance context.
      */
     static Instance* retrieve(
         VkPhysicalDevice handle);
diff --git a/layer_example/source/version.hpp.in b/layer_example/source/version.hpp.in
index 50c30b9..5fcb9c3 100644
--- a/layer_example/source/version.hpp.in
+++ b/layer_example/source/version.hpp.in
@@ -24,9 +24,7 @@
  */
 
 /**
- * @file
- * This header implements placeholder templates that are populated by CMake
- * during configure.
+ * @file Placeholder templates that are populated by CMake during configure.
  */
 
 #pragma once