Skip to content

Commit 00286be

Browse files
authored
ICB Execution Range (#78)
1 parent 4f7ffd2 commit 00286be

File tree

6 files changed

+154
-83
lines changed

6 files changed

+154
-83
lines changed

Sources/VimKit/Geometry.swift

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ public class Geometry: ObservableObject, @unchecked Sendable {
118118
publish(state: .loading)
119119

120120
let device = MTLContext.device
121+
let supportsIndirectCommandBuffers = device.supportsFamily(.apple4)
121122
let cacheDir = FileManager.default.cacheDirectory
122123

123124
// 1) Build the positions (vertex) buffer
@@ -158,7 +159,11 @@ public class Geometry: ObservableObject, @unchecked Sendable {
158159

159160
// 10 Start indexing the file
160161
publish(state: .indexing)
161-
await bvh = BVH(self)
162+
163+
// Don't bother building the bvh tree if indirect command buffers are supported
164+
if !supportsIndirectCommandBuffers {
165+
await bvh = BVH(self)
166+
}
162167
incrementProgressCount()
163168

164169
publish(state: .ready)
@@ -540,9 +545,7 @@ public class Geometry: ObservableObject, @unchecked Sendable {
540545
/// Provides the offset into instanced meshes where the transparent instanced meshes begin.
541546
/// This vale can be used as the buffer offset by multiplying with `MemoryLayout<InstancedMesh>.size`.
542547
public lazy var transparentInstancedMeshesOffset: Int = {
543-
instancedMeshes.firstIndex { instancedMesh in
544-
instancedMesh.transparent == true
545-
} ?? .zero
548+
instancedMeshes.firstIndex { $0.transparent == true } ?? .zero
546549
}()
547550

548551
// MARK: Materials
@@ -670,9 +673,10 @@ extension Geometry {
670673
computeEncoder.setBytes(&indicesCount, length: MemoryLayout<Int>.size, index: 5)
671674

672675
// Set the thread group size and dispatch
673-
let gridSize = MTLSizeMake(1, 1, 1)
674-
let maxThreadsPerGroup = pipelineState.maxTotalThreadsPerThreadgroup
675-
let threadgroupSize = MTLSizeMake(maxThreadsPerGroup, 1, 1)
676+
let gridSize: MTLSize = .init(width: 1, height: 1, depth: 1)
677+
let width = pipelineState.threadExecutionWidth
678+
let height = pipelineState.maxTotalThreadsPerThreadgroup / width
679+
let threadgroupSize: MTLSize = .init(width: width, height: height, depth: 1)
676680
computeEncoder.dispatchThreadgroups(gridSize, threadsPerThreadgroup: threadgroupSize)
677681

678682
computeEncoder.endEncoding()
@@ -698,7 +702,7 @@ extension Geometry {
698702
}
699703

700704
let commandQueue = device.makeCommandQueue()
701-
var instanceCount = instances.count
705+
var instanceCount = instances.count - 1
702706

703707
guard !Task.isCancelled,
704708
let library = MTLContext.makeLibrary(),
@@ -719,13 +723,13 @@ extension Geometry {
719723
computeEncoder.setBuffer(instancesBuffer, offset: 0, index: 2)
720724
computeEncoder.setBuffer(meshesBuffer, offset: 0, index: 3)
721725
computeEncoder.setBuffer(submeshesBuffer, offset: 0, index: 4)
722-
computeEncoder.setBytes(&instanceCount, length: MemoryLayout<Int>.size, index: 5)
723726

724727
// Set the thread group size and dispatch
725-
let gridSize: MTLSize = MTLSizeMake(1, 1, 1)
726-
let maxThreadsPerGroup = pipelineState.maxTotalThreadsPerThreadgroup
727-
let threadgroupSize = MTLSizeMake(maxThreadsPerGroup, 1, 1)
728-
computeEncoder.dispatchThreadgroups(gridSize, threadsPerThreadgroup: threadgroupSize)
728+
let gridSize: MTLSize = .init(width: instanceCount, height: 1, depth: 1)
729+
let width = pipelineState.threadExecutionWidth
730+
let height = pipelineState.maxTotalThreadsPerThreadgroup / width
731+
let threadsPerThreadgroup: MTLSize = .init(width: width, height: height, depth: 1)
732+
computeEncoder.dispatchThreads(gridSize, threadsPerThreadgroup: threadsPerThreadgroup)
729733

730734
computeEncoder.endEncoding()
731735
commandBuffer.commit()

Sources/VimKit/Renderer/RenderPass+Indirect.swift

Lines changed: 98 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
//
55
// Created by Kevin McKee
66
//
7-
7+
import Combine
88
import MetalKit
99
import VimKitShaders
1010

1111
private let functionNameVertex = "vertexMain"
1212
private let functionNameVertexDepthOnly = "vertexDepthOnly"
1313
private let functionNameFragment = "fragmentMain"
14-
private let functionNameEncodeIndirectCommands = "encodeIndirectCommands"
14+
private let functionNameEncodeIndirectRenderCommands = "encodeIndirectRenderCommands"
1515
private let functionNameDepthPyramid = "depthPyramid"
1616
private let labelICB = "IndirectCommandBuffer"
1717
private let labelICBAlphaMask = "IndirectCommandBufferAlphaMask"
@@ -26,9 +26,9 @@ private let labelRasterizationRateMapData = "RenderRasterizationMapData"
2626
private let labelDepthPyramidGeneration = "DepthPyramidGeneration"
2727
private let labelTextureDepth = "DepthTexture"
2828
private let labelTextureDepthPyramid = "DepthPyramidTexture"
29-
private let maxCommandCount = 1024 * 64
3029
private let maxBufferBindCount = 24
31-
private let executionRangeCount = 3
30+
private let maxCommandCount = 1024 * 64
31+
private let maxExecutionRange = 1024 * 16
3232

3333
/// Provides an indirect render pass using indirect command buffers.
3434
class RenderPassIndirect: RenderPass {
@@ -44,6 +44,8 @@ class RenderPassIndirect: RenderPass {
4444
var commandBufferDepthOnlyAlphaMask: MTLIndirectCommandBuffer
4545
/// A metal buffer storing the icb execution range
4646
var executionRange: MTLBuffer
47+
/// The number of execution ranges.
48+
var executionRangeCount: Int
4749
/// The icb encodeer arguments buffers consisting of MTLArgumentEncoders
4850
var argumentEncoder: MTLBuffer
4951
var argumentEncoderAlphaMask: MTLBuffer
@@ -67,13 +69,17 @@ class RenderPassIndirect: RenderPass {
6769
private var depthPyramidTexture: MTLTexture?
6870

6971
/// The compute pipeline state.
72+
private var computeFunction: MTLFunction?
7073
private var computePipelineState: MTLComputePipelineState?
7174
/// The render pipeline stae.
7275
private var pipelineState: MTLRenderPipelineState?
7376
private var pipelineStateDepthOnly: MTLRenderPipelineState?
7477
private var depthStencilState: MTLDepthStencilState?
7578
private var samplerState: MTLSamplerState?
7679

80+
/// Combine subscribers.
81+
var subscribers = Set<AnyCancellable>()
82+
7783
/// Initializes the render pass with the provided rendering context.
7884
/// - Parameter context: the rendering context.
7985
init?(_ context: RendererContext) {
@@ -87,7 +93,21 @@ class RenderPassIndirect: RenderPass {
8793
self.samplerState = makeSamplerState()
8894
self.depthPyramid = DepthPyramid(device, library)
8995
makeComputePipelineState(library)
96+
makeIndirectCommandBuffers()
9097
makeRasterizationMap()
98+
99+
context.vim.geometry?.$state.sink { [weak self] state in
100+
guard let self, let geometry else { return }
101+
switch state {
102+
case .ready:
103+
let gridSize = geometry.gridSize
104+
let totalCommands = gridSize.width * gridSize.height
105+
debugPrint("􀬨 Building indirect command buffers [\(totalCommands)]")
106+
makeIndirectCommandBuffers(totalCommands)
107+
case .indexing, .loading, .unknown, .error:
108+
break
109+
}
110+
}.store(in: &subscribers)
91111
}
92112

93113
/// Performs all encoding and setup options before drawing.
@@ -104,8 +124,8 @@ class RenderPassIndirect: RenderPass {
104124
guard let renderPassDescriptor = makeRenderPassDescriptor(),
105125
let renderEncoder = descriptor.commandBuffer.makeRenderCommandEncoder(descriptor: renderPassDescriptor) else { return }
106126

107-
// Draw the geometry occluder
108-
drawCulling(descriptor: descriptor, renderEncoder: renderEncoder)
127+
// Draw the geometry occluder offscreen
128+
drawDepthOffscreen(descriptor: descriptor, renderEncoder: renderEncoder)
109129

110130
// End encoding
111131
renderEncoder.endEncoding()
@@ -156,10 +176,12 @@ class RenderPassIndirect: RenderPass {
156176
computeEncoder.setBuffer(materialsBuffer, offset: 0, index: .materials)
157177
computeEncoder.setBuffer(colorsBuffer, offset: 0, index: .colors)
158178
computeEncoder.setBuffer(icb.argumentEncoder, offset: 0, index: .commandBufferContainer)
179+
computeEncoder.setBuffer(icb.executionRange, offset: 0, index: .executionRange)
159180
computeEncoder.setTexture(depthPyramidTexture, index: 0)
160181

161182
// 2) Use Resources
162183
computeEncoder.useResource(icb.commandBuffer, usage: .read)
184+
computeEncoder.useResource(icb.executionRange, usage: .write)
163185
computeEncoder.useResource(framesBuffer, usage: .read)
164186
computeEncoder.useResource(materialsBuffer, usage: .read)
165187
computeEncoder.useResource(instancesBuffer, usage: .read)
@@ -171,8 +193,9 @@ class RenderPassIndirect: RenderPass {
171193

172194
// 3) Dispatch the threads
173195
let gridSize = geometry.gridSize
174-
let threadExecutionWidth = computePipelineState.maxTotalThreadsPerThreadgroup
175-
let threadgroupSize: MTLSize = .init(width: threadExecutionWidth, height: 1, depth: 1)
196+
let width = computePipelineState.threadExecutionWidth
197+
let height = computePipelineState.maxTotalThreadsPerThreadgroup / width
198+
let threadgroupSize: MTLSize = .init(width: width, height: height, depth: 1)
176199
computeEncoder.dispatchThreads(gridSize, threadsPerThreadgroup: threadgroupSize)
177200

178201
// 4) End Encoding
@@ -193,23 +216,36 @@ class RenderPassIndirect: RenderPass {
193216
renderEncoder.setFragmentBuffer(rasterizationRateMapData, offset: 0, index: .rasterizationRateMapData)
194217
}
195218

219+
/// Optimizes the icb.
220+
/// - Parameters:
221+
/// - descriptor: the draw descriptor
222+
/// - renderEncoder: the render encoder
223+
private func optimize(descriptor: DrawDescriptor, renderEncoder: MTLRenderCommandEncoder) {
224+
225+
guard let icb, let geometry else { return }
226+
guard let blitEncoder = descriptor.commandBuffer.makeBlitCommandEncoder() else { return }
227+
let range = 0..<geometry.instancedMeshes.count
228+
blitEncoder.optimizeIndirectCommandBuffer(icb.commandBuffer, range: range)
229+
blitEncoder.endEncoding()
230+
}
231+
196232
/// Performs the indirect drawing via icb.
197233
/// - Parameters:
198234
/// - descriptor: the draw descriptor to use
199235
/// - renderEncoder: the render encoder to use
200236
private func drawIndirect(descriptor: DrawDescriptor, renderEncoder: MTLRenderCommandEncoder) {
201-
guard let geometry, let icb else { return }
202-
203-
let gridSize = geometry.gridSize
204-
// Build the range of commands to execute
205-
//let range = 0..<geometry.instancedMeshes.count
206-
let range = 0..<geometry.gridSize.width * geometry.gridSize.height
207-
208-
// Execute the commands in range
209-
renderEncoder.executeCommandsInBuffer(icb.commandBuffer, range: range)
237+
guard let icb else { return }
238+
for i in 0..<icb.executionRangeCount {
239+
let offset = MemoryLayout<MTLIndirectCommandBufferExecutionRange>.size * i
240+
renderEncoder.executeCommandsInBuffer(icb.commandBuffer, indirectBuffer: icb.executionRange, offset: offset)
241+
}
210242
}
211243

212-
private func drawCulling(descriptor: DrawDescriptor, renderEncoder: MTLRenderCommandEncoder) {
244+
/// Draws the depth pyramid offscreen.
245+
/// - Parameters:
246+
/// - descriptor: the draw descriptor to use
247+
/// - renderEncoder: the render encoder to use
248+
private func drawDepthOffscreen(descriptor: DrawDescriptor, renderEncoder: MTLRenderCommandEncoder) {
213249

214250
guard let geometry,
215251
let icb,
@@ -272,9 +308,17 @@ class RenderPassIndirect: RenderPass {
272308
}
273309

274310
// Make the compute pipeline state
275-
guard let function = library.makeFunction(name: functionNameEncodeIndirectCommands),
276-
let computePipelineState = try? device.makeComputePipelineState(function: function) else { return }
311+
guard let computeFunction = library.makeFunction(name: functionNameEncodeIndirectRenderCommands),
312+
let computePipelineState = try? device.makeComputePipelineState(function: computeFunction) else { return }
277313
self.computePipelineState = computePipelineState
314+
self.computeFunction = computeFunction
315+
}
316+
317+
/// Makes the indirect command buffer struct.
318+
/// - Parameter totalCommands: the total amount of commands the indirect command buffer supports.
319+
private func makeIndirectCommandBuffers(_ totalCommands: Int = maxCommandCount) {
320+
321+
guard let computeFunction else { return }
278322

279323
// Make the indirect command buffer descriptor
280324
let descriptor = MTLIndirectCommandBufferDescriptor()
@@ -285,11 +329,11 @@ class RenderPassIndirect: RenderPass {
285329
descriptor.maxFragmentBufferBindCount = maxBufferBindCount
286330

287331
// Make the indirect command buffers and label them
288-
guard let commandBuffer = device.makeIndirectCommandBuffer(descriptor: descriptor, maxCommandCount: maxCommandCount),
289-
let commandBufferAlphaMask = device.makeIndirectCommandBuffer(descriptor: descriptor, maxCommandCount: maxCommandCount),
290-
let commandBufferTransparent = device.makeIndirectCommandBuffer(descriptor: descriptor, maxCommandCount: maxCommandCount),
291-
let commandBufferDepthOnly = device.makeIndirectCommandBuffer(descriptor: descriptor, maxCommandCount: maxCommandCount),
292-
let commandBufferDepthOnlyAlphaMask = device.makeIndirectCommandBuffer(descriptor: descriptor, maxCommandCount: maxCommandCount) else { return }
332+
guard let commandBuffer = device.makeIndirectCommandBuffer(descriptor: descriptor, maxCommandCount: totalCommands),
333+
let commandBufferAlphaMask = device.makeIndirectCommandBuffer(descriptor: descriptor, maxCommandCount: totalCommands),
334+
let commandBufferTransparent = device.makeIndirectCommandBuffer(descriptor: descriptor, maxCommandCount: totalCommands),
335+
let commandBufferDepthOnly = device.makeIndirectCommandBuffer(descriptor: descriptor, maxCommandCount: totalCommands),
336+
let commandBufferDepthOnlyAlphaMask = device.makeIndirectCommandBuffer(descriptor: descriptor, maxCommandCount: totalCommands) else { return }
293337

294338
commandBuffer.label = labelICB
295339
commandBufferAlphaMask.label = labelICBAlphaMask
@@ -298,11 +342,12 @@ class RenderPassIndirect: RenderPass {
298342
commandBufferDepthOnlyAlphaMask.label = labelICBDepthOnlyAlphaMask
299343

300344
// Make the execution range buffer
301-
guard let executionRange = device.makeBuffer(length: MemoryLayout<MTLIndirectCommandBufferExecutionRange>.size * executionRangeCount,
302-
options: [.storageModeShared]) else { return }
345+
let executionRangeResult = makeExecutionRange(totalCommands)
346+
guard let executionRange = executionRangeResult.buffer else { return }
347+
let executionRangeCount = executionRangeResult.count
303348

304349
// Make the argument encoders
305-
let icbArgumentEncoder = function.makeArgumentEncoder(.commandBufferContainer)
350+
let icbArgumentEncoder = computeFunction.makeArgumentEncoder(.commandBufferContainer)
306351
guard let argumentEncoder = device.makeBuffer(length: icbArgumentEncoder.encodedLength),
307352
let argumentEncoderAlphaMask = device.makeBuffer(length: icbArgumentEncoder.encodedLength),
308353
let argumentEncoderTransparent = device.makeBuffer(length: icbArgumentEncoder.encodedLength) else { return }
@@ -326,12 +371,13 @@ class RenderPassIndirect: RenderPass {
326371
commandBufferDepthOnly: commandBufferDepthOnly,
327372
commandBufferDepthOnlyAlphaMask: commandBufferDepthOnlyAlphaMask,
328373
executionRange: executionRange,
374+
executionRangeCount: executionRangeCount,
329375
argumentEncoder: argumentEncoder,
330376
argumentEncoderAlphaMask: argumentEncoderAlphaMask,
331377
argumentEncoderTransparent: argumentEncoderTransparent)
332378
}
333379

334-
/// Rebuilds the depth textures
380+
/// Makes the depth textures.
335381
private func makeTextures() {
336382
guard screenSize != .zero else { return }
337383

@@ -362,6 +408,7 @@ class RenderPassIndirect: RenderPass {
362408
depthPyramidTexture?.label = labelTextureDepthPyramid
363409
}
364410

411+
/// Makes the rasterization map data.
365412
private func makeRasterizationMap() {
366413

367414
guard screenSize != .zero else { return }
@@ -384,6 +431,7 @@ class RenderPassIndirect: RenderPass {
384431
}
385432

386433
/// Builds an offscreen render pass descriptor.
434+
/// - Returns: the offscreen render pass descriptor
387435
private func makeRenderPassDescriptor() -> MTLRenderPassDescriptor? {
388436

389437
let renderPassDescriptor = MTLRenderPassDescriptor()
@@ -396,6 +444,27 @@ class RenderPassIndirect: RenderPass {
396444
renderPassDescriptor.rasterizationRateMap = rasterizationRateMap
397445
return renderPassDescriptor
398446
}
447+
448+
/// Makes the execution range buffer.
449+
/// - Parameter totalCommands: the total amount of commands the indirect command buffer supports.
450+
/// - Returns: a new metal buffer cf MTLIndirectCommandBufferExecutionRange
451+
private func makeExecutionRange(_ totalCommands: Int) -> (count: Int, buffer: MTLBuffer?) {
452+
453+
let rangeCount = Int(ceilf(Float(totalCommands)/Float(maxExecutionRange)))
454+
var executionRanges: [MTLIndirectCommandBufferExecutionRange] = .init()
455+
456+
for i in 0..<rangeCount {
457+
let offset = i * maxExecutionRange
458+
let commandsInRange = totalCommands - offset
459+
let length = min(commandsInRange, maxExecutionRange)
460+
let range = MTLIndirectCommandBufferExecutionRange(location: UInt32(offset), length: UInt32(length))
461+
executionRanges.append(range)
462+
}
463+
464+
let length = MemoryLayout<MTLIndirectCommandBufferExecutionRange>.size * executionRanges.count
465+
let buffer = device.makeBuffer(bytes: &executionRanges, length: length, options: [.storageModeShared])
466+
return (executionRanges.count, buffer)
467+
}
399468
}
400469

401470
@MainActor

Sources/VimKit/Renderer/RenderPass+Visibility.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class RenderPassVisibility: RenderPass {
4646
var currentResults: [Int] = .init()
4747
/// Returns the subset of instanced mesh indexes that have returned true from the occlusion query.
4848
var currentVisibleResults: [Int] = .init()
49-
/// Combine Subscribers which drive rendering events
49+
/// Combine subscribers.
5050
var subscribers = Set<AnyCancellable>()
5151

5252
/// Initializes the render pass with the provided rendering context.

0 commit comments

Comments
 (0)