Skip to content

Commit f829962

Browse files
committed
Add preliminary gpu_timeline layer
1 parent 6b5bef7 commit f829962

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+5680
-26
lines changed

.github/workflows/build_test.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ jobs:
3030
cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release ..
3131
make -j4
3232
33+
- name: Build layer_gpu_timeline
34+
run: |
35+
export CXX=clang++
36+
mkdir layer_gpu_timeline/build_rel
37+
cd layer_gpu_timeline/build_rel
38+
cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release ..
39+
make -j4
40+
3341
- name: Build and run unit tests
3442
run: |
3543
export CXX=clang++
@@ -56,6 +64,14 @@ jobs:
5664
cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release ..
5765
make -j4
5866
67+
- name: Build layer_gpu_timeline
68+
run: |
69+
export CXX=g++
70+
mkdir layer_gpu_timeline/build_rel
71+
cd layer_gpu_timeline/build_rel
72+
cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release ..
73+
make -j4
74+
5975
build-android:
6076
name: Android
6177
runs-on: ubuntu-22.04

generator/vk_layer/source/entry.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535
#include <mutex>
3636
#include <thread>
3737

38-
#include "framework/utils.hpp"
3938
#include "framework/instance_functions_manual.hpp"
39+
#include "framework/utils.hpp"
4040

4141
std::mutex g_vulkanLock;
4242

layer_example/source/entry.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535
#include <mutex>
3636
#include <thread>
3737

38-
#include "framework/utils.hpp"
3938
#include "framework/instance_functions_manual.hpp"
39+
#include "framework/utils.hpp"
4040

4141
std::mutex g_vulkanLock;
4242

layer_example/source/layer_device_functions.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
* ----------------------------------------------------------------------------
2424
*/
2525

26+
#include <vulkan/vulkan.h>
27+
2628
#include "framework/utils.hpp"
2729

2830
/* See Vulkan API for documentation. */

layer_gpu_timeline/CMakeLists.txt

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# SPDX-License-Identifier: MIT
2+
# -----------------------------------------------------------------------------
3+
# Copyright (c) 2024 Arm Limited
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to
7+
# deal in the Software without restriction, including without limitation the
8+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
9+
# sell copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in
13+
# all copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
# -----------------------------------------------------------------------------
23+
24+
cmake_minimum_required(VERSION 3.17)
25+
26+
set(CMAKE_CXX_STANDARD 20)
27+
28+
project(VkLayerGPUTimeline VERSION 1.0.0)
29+
30+
# Common configuration
31+
set(LGL_LOG_TAG "VkLayerGPUTimeline")
32+
include(../source_common/compiler_helper.cmake)
33+
34+
# Build steps
35+
add_subdirectory(../source_common/comms source_common/comms)
36+
add_subdirectory(../source_common/framework source_common/framework)
37+
add_subdirectory(../source_common/trackers source_common/trackers)
38+
add_subdirectory(source)

layer_gpu_timeline/README_LAYER.md

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
# Layer: GPU Timeline
2+
3+
This layer is used with Arm GPUs for tracking submitted schedulable workloads
4+
and emitting semantic information about them. This data can be combined with
5+
the raw workload execution timing information captured using the Android
6+
Perfetto service, providing developers with a richer debug visualization.
7+
8+
## What devices?
9+
10+
The Arm GPU driver integration with the Perfetto render stages scheduler event
11+
trace is supported at production quality since the r47p0 driver version.
12+
However, associating semantics from this layer relies on a further integration
13+
with debug labels which requires an r51p0 or later driver version.
14+
15+
## What workloads?
16+
17+
A schedulable workload is the smallest workload that the Arm GPU command stream
18+
scheduler will issue to the GPU hardware work queues. This includes the
19+
following workload types:
20+
21+
* Render passes, split into:
22+
* Vertex or Binning phase
23+
* Fragment or Main phase
24+
* Compute dispatches
25+
* Trace rays
26+
* Transfers to a buffer
27+
* Transfers to an image
28+
29+
Most workloads are dispatched using a single API call, and are trivial to
30+
manage in the layer. However, render passes are more complex and need extra
31+
handling. In particular:
32+
33+
* Render passes are issued using multiple API calls.
34+
* Useful render pass properties, such as draw count, are not known until the
35+
render pass recording has ended.
36+
* Dynamic render passes using `vkCmdBeginRendering()` and `vkCmdEndRendering()`
37+
can be suspended and resumed across command buffer boundaries. Properties
38+
such as draw count are not defined by the scope of a single command buffer.
39+
40+
## Tracking workloads
41+
42+
This layer tracks workloads encoded in command buffers, and emits semantic
43+
metadata for each workload via a communications side-channel. A host tool
44+
combines the semantic data stream with the Perfetto data stream, using debug
45+
label tags injected by the layer as a common cross-reference to link across
46+
the streams.
47+
48+
### Workload labelling
49+
50+
Command stream labelling is implemented using `vkCmdDebugMarkerBeginEXT()`
51+
and `vkCmdDebugMarkerEndEXT()`, wrapping one layer-owned `tagID` label around
52+
each semantic workload. This `tagID` can unambiguously refer to this workload
53+
encoding, and metadata that we do not expect to change per submit will be
54+
emitted using the matching `tagID` as the sole identifier.
55+
56+
_**TODO:** Dynamic `submitID` tracking is not yet implemented._
57+
58+
The `tagID` label is encoded into the recorded command buffer which means, for
59+
reusable command buffers, it is not an unambiguous identifier of a specific
60+
running workload. To allow us to disambiguate specific workload instances, the
61+
layer can optionally add an outer wrapper of `submitID` labels around each
62+
submitted command buffer. This wrapper is only generated if the submit contains
63+
any command buffers that require the generation of a per-submit annex (see the
64+
following section for when this is needed).
65+
66+
The `submitID.tagID` pair of IDs uniquely identifies a specific running
67+
workload, and can be used to attach an instance-specific metadata annex to a
68+
specific submitted workload rather than to the shared recorded command buffer.
69+
70+
### Workload metadata for split render passes
71+
72+
_**TODO:** Split render pass tracking is not yet implemented._
73+
74+
Dynamic render passes can be split across multiple Begin/End pairs, including
75+
being split across command buffer boundaries. If these splits occur within a
76+
single primary command buffer, or its secondaries, it is handled transparently
77+
by the layer and it appears as a single message as if no splits occurred. If
78+
these splits occur across primary command buffer boundaries, then some
79+
additional work is required.
80+
81+
In our design a `tagID` debug marker is only started when the render pass first
82+
starts (not on resume), and stopped at the end of the render pass (not on
83+
suspend). The same `tagID` is used to refer to all parts of the render pass,
84+
no matter how many times it was suspended and resumed.
85+
86+
If a render pass splits across command buffers, we cannot precompute metrics
87+
based on `tagID` alone, even if the command buffers are one-time use. This is
88+
because we do not know what combination of submitted command buffers will be
89+
used, and so we cannot know what the render pass contains until submit time.
90+
Split render passes will emit a `submitID.tagID` metadata annex containing
91+
the parameters that can only be known at submit time.
92+
93+
### Workload metadata for compute dispatches
94+
95+
_**TODO:** Compute workgroup parsing from the SPIR-V is not yet implemented._
96+
97+
Compute workload dispatch is simple to track, but one of the metadata items we
98+
want to export is the total size of the work space (work_group_count *
99+
work_group_size).
100+
101+
The work group count is defined by the API call, but may be an indirect
102+
parameter (see indirect tracking above).
103+
104+
The work group size is defined by the program pipeline, and is defined in the
105+
SPIR-V via a literal or a build-time specialization constant. To support this
106+
use case we will need to parse the SPIR-V when the pipeline is built, if
107+
SPIR-V is available.
108+
109+
### Workload metadata for indirect calls
110+
111+
_**TODO:** Indirect parameter tracking is not yet implemented._
112+
113+
One of the valuable pieces of metadata that we want to present is the size of
114+
each workload. For render passes this is captured at API call time, but for
115+
other workloads the size can be an indirect parameter that is not known when
116+
the triggering API call is made.
117+
118+
To capture indirect parameters we insert a transfer that copies the indirect
119+
parameters into a layer-owned buffer. To ensure exclusive use of the buffer and
120+
avoid data corruption, each buffer region used is unique to a specific `tagID`.
121+
Attempting to submit the same command buffer multiple times will result in
122+
the workload being serialized to avoid racy access to the buffer. Once the
123+
buffer has been retrieved by the layer, a metadata annex containing the
124+
indirect parameters will be emitted using the `submitID.tagID` pair. This may
125+
be some time later than the original submit.
126+
127+
### Workload metadata for user-defined labels
128+
129+
The workload metadata captures user-defined labels that the application
130+
provides using `vkCmdDebugMarkerBeginEXT()` and `vkCmdDebugMarkerEndEXT()`.
131+
These are a stack-based debug mechanism where `Begin` pushes a new entry on to
132+
to the stack, and `End` pops the the most recent level off the stack.
133+
134+
Workloads are labelled with the stack values that existed when the workload
135+
was started. For render passes this is the value on the stack when, e.g.,
136+
`vkCmdBeginRenderPass()` was called. We do not capture any labels that exist
137+
inside the render pass.
138+
139+
The debug label stack belongs to the queue, not to the command buffer, so the
140+
value of the label stack is not known until submit time. The debug information
141+
for a specific `submitID.tagID` pair is therefore provided as an annex at
142+
submit time once the stack can be resolved.
143+
144+
## Message protocol
145+
146+
For each workload in a command buffer, or part-workload in the case of a
147+
suspended render pass, we record a JSON metadata blob containing the payload
148+
we want to send.
149+
150+
The low level protocol message contains:
151+
152+
* Message type `uint8_t`
153+
* Sequence ID `uint64_t` (optional, implied by message type)
154+
* Tag ID `uint64_t`
155+
* JSON length `uint32_t`
156+
* JSON payload `uint8_t[]`
157+
158+
Each workload will read whatever properties it can from the `tagID` metadata
159+
and will then merge in all fields from any subsequent `sequenceID.tagID`
160+
metadata that matches.
161+
162+
- - -
163+
164+
_Copyright © 2024, Arm Limited and contributors._
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/usr/bin/env bash
2+
# SPDX-License-Identifier: MIT
3+
# ----------------------------------------------------------------------------
4+
# Copyright (c) 2024 Arm Limited
5+
#
6+
# Permission is hereby granted, free of charge, to any person obtaining a copy
7+
# of this software and associated documentation files (the "Software"), to
8+
# deal in the Software without restriction, including without limitation the
9+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
# sell copies of the Software, and to permit persons to whom the Software is
11+
# furnished to do so, subject to the following conditions:
12+
#
13+
# The above copyright notice and this permission notice shall be included in
14+
# all copies or substantial portions of the Software.
15+
#
16+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22+
# IN THE SOFTWARE.
23+
# ----------------------------------------------------------------------------
24+
25+
# ----------------------------------------------------------------------------
26+
# Configuration
27+
28+
# Exit immediately if any component command errors
29+
set -e
30+
31+
BUILD_DIR_64=build_arm64
32+
BUILD_DIR_PACK=build_package
33+
34+
# ----------------------------------------------------------------------------
35+
# Process command line options
36+
if [ "$#" -lt 1 ]; then
37+
BUILD_TYPE=Release
38+
else
39+
BUILD_TYPE=$1
40+
fi
41+
42+
# Process command line options
43+
if [ "$#" -lt 2 ]; then
44+
PACKAGE=0
45+
else
46+
PACKAGE=$2
47+
fi
48+
49+
if [ "${PACKAGE}" -gt "0" ]; then
50+
echo "Building a ${BUILD_TYPE} build with packaging"
51+
else
52+
echo "Building a ${BUILD_TYPE} build without packaging"
53+
fi
54+
55+
# ----------------------------------------------------------------------------
56+
# Build the 64-bit layer
57+
mkdir -p ${BUILD_DIR_64}
58+
pushd ${BUILD_DIR_64}
59+
60+
cmake \
61+
-DCMAKE_SYSTEM_NAME=Android \
62+
-DANDROID_PLATFORM=29 \
63+
-DANDROID_ABI=arm64-v8a \
64+
-DANDROID_TOOLCHAIN=clang \
65+
-DANDROID_STL=c++_static \
66+
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
67+
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \
68+
..
69+
70+
make -j8
71+
72+
popd
73+
74+
# ----------------------------------------------------------------------------
75+
# Build the release package
76+
if [ "${PACKAGE}" -gt "0" ]; then
77+
# Setup the package directories
78+
mkdir -p ${BUILD_DIR_PACK}/bin/android/arm64
79+
80+
# Install the 64-bit layer
81+
cp ${BUILD_DIR_64}/source/*.so ${BUILD_DIR_PACK}/bin/android/arm64
82+
fi

0 commit comments

Comments
 (0)