From c5713159e153c5ee0378fae5690a87f70d5f8ce6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Fri, 2 Jan 2026 13:12:26 +0100 Subject: [PATCH 01/23] Fixed use of temporary reference --- regen/camera/reflection-camera.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/regen/camera/reflection-camera.cpp b/regen/camera/reflection-camera.cpp index 42226955dd..cb9c06a32f 100644 --- a/regen/camera/reflection-camera.cpp +++ b/regen/camera/reflection-camera.cpp @@ -49,7 +49,8 @@ ReflectionCamera::ReflectionCamera( transform_ = modelMat.value().in; if (transform_.get() != nullptr) { transformStamp_ = transform_->stampOfReadData() - 1; - const Mat4f &M = transform_->mapClientData(BUFFER_GPU_READ).r[0]; + auto mapped = transform_->mapClientData(BUFFER_GPU_READ); + const Mat4f &M = mapped.r[0]; posWorld_ = M.mul_t31(posWorld_); norWorld_ = M.mul_t30(norWorld_); norWorld_.normalize(); From a51c189835824a617b9e679e588e50675be114fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Fri, 2 Jan 2026 13:13:10 +0100 Subject: [PATCH 02/23] Added rotateBuffers function to staging system --- .../scene-display/scene-display-widget.cpp | 5 +- regen/memory/staged-buffer.cpp | 29 ++- regen/memory/staged-buffer.h | 4 +- regen/memory/staging-buffer.cpp | 96 +++---- regen/memory/staging-system.cpp | 236 ++++++++++-------- regen/memory/staging-system.h | 18 +- 6 files changed, 210 insertions(+), 178 deletions(-) diff --git a/applications/scene-display/scene-display-widget.cpp b/applications/scene-display/scene-display-widget.cpp index e013d686e0..1799dc4275 100644 --- a/applications/scene-display/scene-display-widget.cpp +++ b/applications/scene-display/scene-display-widget.cpp @@ -1102,9 +1102,12 @@ void SceneDisplayWidget::loadSceneGraphicsThread(const string &sceneFile) { animations_.emplace_back(timeWidgetAnimation_); loadAnim_ = ref_ptr(); lightStates_ = sceneParser.getResources()->getLights(); + + // Make sure all staging operations are done before resuming animations. + StagingSystem::instance().rotateBuffers(); + AnimationManager::get().setSpatialIndices(spatialIndexList_); AnimationManager::get().resetTime(); - AnimationManager::get().resume(); REGEN_INFO("XML Scene Loaded."); } diff --git a/regen/memory/staged-buffer.cpp b/regen/memory/staged-buffer.cpp index d5a9b62b5b..e9311ac60a 100644 --- a/regen/memory/staged-buffer.cpp +++ b/regen/memory/staged-buffer.cpp @@ -7,10 +7,11 @@ using namespace regen; -//#define REGEN_DISABLE_GLOBAL_STAGING -//#define REGEN_DISABLE_EXPLICIT_FLUSHING -//#define REGEN_FORCE_IMPLICIT_STAGING -#define REGEN_FORCE_CLIENT_DOUBLE_BUFFER +namespace regen { + static constexpr bool REGEN_FORCE_IMPLICIT_STAGING = false; + static constexpr bool REGEN_DISABLE_GLOBAL_STAGING = false; + static constexpr bool REGEN_FORCE_CLIENT_DOUBLE_BUFFER = true; +} uint32_t StagedBuffer::MIN_SEGMENTS_PARTIAL_TEMPORARY = 6; float StagedBuffer::MAX_UPDATE_RATIO_PARTIAL_TEMPORARY = 0.33f; @@ -47,9 +48,9 @@ StagedBuffer::StagedBuffer( setSyncFlag(BUFFER_SYNC_IMPLICIT_STAGING); } clientBuffer_->setFrameLocked(hints.frequency < BUFFER_UPDATE_PER_DRAW); -#ifdef REGEN_FORCE_IMPLICIT_STAGING - setSyncFlag(BUFFER_SYNC_IMPLICIT_STAGING); -#endif + if constexpr(REGEN_FORCE_IMPLICIT_STAGING) { + setSyncFlag(BUFFER_SYNC_IMPLICIT_STAGING); + } adoptBufferRange_ = [this](uint32_t requiredSize) { return adoptBufferRange(requiredSize); }; @@ -143,9 +144,9 @@ void StagedBuffer::updateStorageFlags() { // input has client data, so we need to set the access mode such that the CPU can write to it. enableWriteAccess(); setStagingBuffering(SINGLE_BUFFER); -#ifdef REGEN_FORCE_CLIENT_DOUBLE_BUFFER - clientBuffer_->setClientBufferMode(ClientBuffer::DoubleBuffer); -#endif + if constexpr(REGEN_FORCE_CLIENT_DOUBLE_BUFFER) { + clientBuffer_->setClientBufferMode(ClientBuffer::DoubleBuffer); + } // NOTE: in local staging we avoid persistent mapping the buffer to CPU memory to avoid performance issues // with fencing, as currently local staging uses per-BO and per-segment fences which is overkill @@ -542,12 +543,10 @@ void StagedBuffer::resetStagingBuffer(bool removeFromStagingSystem) { } void StagedBuffer::createStagingBuffer() { -#ifdef REGEN_DISABLE_GLOBAL_STAGING - // disable global staging, falling back to local staging buffer. ref_ptr buf; -#else - auto buf = StagingSystem::instance().addBufferBlock(this); -#endif + if constexpr(!REGEN_DISABLE_GLOBAL_STAGING) { + buf = StagingSystem::instance().addBufferBlock(this); + } shared_->stagingOffset_ = 0; if (buf.get() != nullptr) { // the block was added to the staging system. diff --git a/regen/memory/staged-buffer.h b/regen/memory/staged-buffer.h index 1b0c4fa977..966bb1e2c5 100644 --- a/regen/memory/staged-buffer.h +++ b/regen/memory/staged-buffer.h @@ -188,7 +188,7 @@ namespace regen { /** * Set status for the current frame, i.e. if the buffer block was updated or not. - * @param isStalled true if the frame was stalled, false otherwise. + * @param isUpdated true if the buffer block was updated in the current frame, false otherwise. */ inline void setUpdatedFrame(bool isUpdated) { shared_->setUpdatedFrame(isUpdated); } @@ -240,7 +240,7 @@ namespace regen { inputSize = other.inputSize; } - ShaderInput *input; + ShaderInput *input = nullptr; uint32_t offset = 0; std::vector lastStamp = {0, 0}; uint32_t inputSize = 0; diff --git a/regen/memory/staging-buffer.cpp b/regen/memory/staging-buffer.cpp index c6129c6f9c..244298100e 100644 --- a/regen/memory/staging-buffer.cpp +++ b/regen/memory/staging-buffer.cpp @@ -2,11 +2,14 @@ #include "regen/gl/gl-param.h" #include "staging-system.h" -#define REGEN_USE_STAGING_ALLOCATOR -//#define REGEN_STAGING_USE_DIRECT_FLUSHING - using namespace regen; +namespace regen { + // static constants + static constexpr bool REGEN_USE_STAGING_ALLOCATOR = true; + static constexpr bool REGEN_STAGING_USE_DIRECT_FLUSHING = false; +} + float StagingBuffer::MAX_ACCEPTABLE_STALL_RATE = 0.1f; // 10% of frames can stall // note: camera is currently with 448 bytes slightly below 512 bytes uint32_t StagingBuffer::MIN_SIZE_MEDIUM = 512; // Bytes @@ -107,14 +110,14 @@ bool StagingBuffer::resizeBuffer(uint32_t segmentSize, uint32_t numRingSegments) if (stagingRef_.get()) { BufferObject::orphanBufferRange(stagingRef_.get()); } -#ifdef REGEN_USE_STAGING_ALLOCATOR - stagingRef_ = BufferObject::adoptBufferRange( - segmentSize_ * numSegments, - getStagingAllocator(storageMode_)); -#else - stagingRef_ = stagingBO_->adoptBufferRange( - segmentSize_ * numSegments); -#endif + if constexpr(REGEN_USE_STAGING_ALLOCATOR) { + stagingRef_ = BufferObject::adoptBufferRange( + segmentSize_ * numSegments, + getStagingAllocator(storageMode_)); + } else { + stagingRef_ = stagingBO_->adoptBufferRange( + segmentSize_ * numSegments); + } if (!stagingRef_->mappedData() && (storageFlags_ & MAP_PERSISTENT)) { REGEN_ERROR("Failed to map buffer " << " target: " << flags_.target << @@ -183,24 +186,24 @@ void StagingBuffer::resetStallRate() { } void StagingBuffer::pushToFlushQueue(const BufferRange2ui *dirtySegments, uint32_t numDirtySegments) { -#ifndef REGEN_STAGING_USE_DIRECT_FLUSHING - if (flags_.mapMode == BUFFER_MAP_PERSISTENT_FLUSH) { - RingSegment &writeSegment = bufferSegments_[writeBufferIndex_]; - const uint32_t totalDirtySegments = writeSegment.numDirtySegments + numDirtySegments; - // ensure the vector has enough space - if (totalDirtySegments > writeSegment.dirtySegments.size()) { - writeSegment.dirtySegments.resize(totalDirtySegments); + if constexpr(REGEN_STAGING_USE_DIRECT_FLUSHING) { + if (flags_.mapMode == BUFFER_MAP_PERSISTENT_FLUSH) { + RingSegment &writeSegment = bufferSegments_[writeBufferIndex_]; + const uint32_t totalDirtySegments = writeSegment.numDirtySegments + numDirtySegments; + // ensure the vector has enough space + if (totalDirtySegments > writeSegment.dirtySegments.size()) { + writeSegment.dirtySegments.resize(totalDirtySegments); + } + // copy the dirty segments into the vector + auto *dataStart = writeSegment.dirtySegments.data() + writeSegment.numDirtySegments; + std::memcpy( + (byte *) dataStart, + (byte *) dirtySegments, + numDirtySegments * sizeof(BufferRange2ui)); + // finally increment the number of dirty segments + writeSegment.numDirtySegments = totalDirtySegments; } - // copy the dirty segments into the vector - auto *dataStart = writeSegment.dirtySegments.data() + writeSegment.numDirtySegments; - std::memcpy( - (byte *) dataStart, - (byte *) dirtySegments, - numDirtySegments * sizeof(BufferRange2ui)); - // finally increment the number of dirty segments - writeSegment.numDirtySegments = totalDirtySegments; } -#endif } byte *StagingBuffer::getMappedSegment( @@ -315,31 +318,30 @@ void StagingBuffer::endMappedWrite( // non-persistent mapping glUnmapNamedBuffer(targetRef->bufferID()); } -#ifdef REGEN_STAGING_USE_DIRECT_FLUSHING - else if (accessFlags_ & MAP_FLUSH_EXPLICIT) { - // direct flushing - RingSegment &writeSegment = bufferSegments_[writeBufferIndex_]; - glFlushMappedNamedBufferRange(targetRef->bufferID(), - writeSegment.offset + localOffset, - drawBufferRef->allocatedSize()); + else if constexpr(REGEN_STAGING_USE_DIRECT_FLUSHING) { + if (accessFlags_ & MAP_FLUSH_EXPLICIT) { + // direct flushing + glFlushMappedNamedBufferRange(targetRef->bufferID(), + writeSegment.offset + localOffset, + drawBufferRef->allocatedSize()); + } } -#endif if (flags_.useExplicitStaging()) { -#ifndef REGEN_STAGING_USE_DIRECT_FLUSHING - // Make sure the last write to current readBuffer is flushed before we copy the data. - if (accessFlags_ & MAP_FLUSH_EXPLICIT) { - for (uint32_t flushIdx = 0; flushIdx < readSegment.numDirtySegments; ++flushIdx) { - // get the segment to flush - const BufferRange2ui &flushSegment = readSegment.dirtySegments[flushIdx]; - glFlushMappedNamedBufferRange( - targetRef->bufferID(), - readSegment.offset + localOffset + flushSegment.offset, - flushSegment.size); + if constexpr(REGEN_STAGING_USE_DIRECT_FLUSHING) { + // Make sure the last write to current readBuffer is flushed before we copy the data. + if (accessFlags_ & MAP_FLUSH_EXPLICIT) { + for (uint32_t flushIdx = 0; flushIdx < readSegment.numDirtySegments; ++flushIdx) { + // get the segment to flush + const BufferRange2ui &flushSegment = readSegment.dirtySegments[flushIdx]; + glFlushMappedNamedBufferRange( + targetRef->bufferID(), + readSegment.offset + localOffset + flushSegment.offset, + flushSegment.size); + } + readSegment.numDirtySegments = 0; // reset the dirty segments } - readSegment.numDirtySegments = 0; // reset the dirty segments } -#endif // NOTE: We delay copy to draw buffer until we reach a read segment that has been written to. // This causes some frames of delay until the upload starts. The draw buffer best has some // meaningful initial value that can be drawn first few frames! diff --git a/regen/memory/staging-system.cpp b/regen/memory/staging-system.cpp index 75871401c5..6aaabf7cd9 100644 --- a/regen/memory/staging-system.cpp +++ b/regen/memory/staging-system.cpp @@ -475,112 +475,7 @@ void StagingSystem::updateData(float dt_ms) { } for (uint32_t arenaIdx = 0; arenaIdx < ARENA_TYPE_LAST; arenaIdx++) { - auto &arena = arenas_[arenaIdx]; - // skip inactive arenas: those that are not initialized, and those that are cooling down. - if (!arena || arena->cooldown(dt_ms)) continue; - - // Dynamically resize the arena if needed. - // NOTE: the arena will also indicate size change in case of adaptive size change in ring buffers, - // or the arena is not large enough to hold all BOs. - if (updateArenaSize(arena)) { - arena->resize(); - arena->sort(); - } - if constexpr(STAGING_DEBUG_TIME) { - elapsedTime().push(REGEN_STRING(arena->type << " resized")); - } - if (!arena->flags.isReadable() && !arena->isDirty) { - // early exit writing arenas before fencing in case of no updates. - continue; - } - if constexpr(STAGING_DEBUG_STATISTICS) { - stats_.numDirtyArenas++; - } - - const uint32_t copyIdx = arena->stagingBuffer->nextWriteIndex(); - const uint32_t drawIdx = arena->stagingBuffer->nextReadIndex(); - // we do manual synchronization in case of persistent mapping arenas. - const bool useFence = isMapModePersistent(arena->flags.mapMode); - // For now, reading arenas must not be marked dirty, it is assumed the draw - // buffer is written to every frame. - // Reason: the use as output buffer is currently not tracked, but could be done to mark - // GPU write buffers as dirty -- but must be careful with syncing then! - const bool forceUpdate = arena->flags.isReadable(); - - // Wait for the fence in case of persistent mapped arenas. - // This might block the CPU in case of the last write into this segment - // has not been consumed by the GPU yet. - // TODO: The interaction with the fence still consumes a lot of CPU time. - // - The main bottleneck now seems *setFencePoint*. Reason might be that - // we do glDeleteSync/glFenceSync calls every time setFencePoint is called. - // - As far as I know, we cannot re-use fences across frames. - // - Maybe the only way to improve would be to reduce the number of fences. - // - Idea: Let arenas share fences. However, this is difficult because arenas - // currently may have ring buffers of different sizes. - // - Maybe the mechanism can be adjusted such that we do not need a fence every - // frame for every arena. - if (useFence) { - arena->stagingBuffer->fence(copyIdx).wait(); - } - - // Copy data from CPU to staging to draw buffer, - // or in case of reading, the other way around. - for (auto &managed: arena->bufferObjects) { - // NOTE: temporary mapping is only used for rare updates, - // so it is not really worth it to consider temporary mapping on arena level. - // NOTE: This will only copy data if the BO is dirty, i.e. has new data to write. - //if (managed.bo->hasDirtySegments()) { - // REGEN_INFO("Dirty BO: " << managed.bo->name()); - //} - if constexpr(STAGING_DEBUG_STATISTICS) { - if (managed.bo->hasDirtySegments()) { - stats_.numDirtyBOs++; - stats_.numDirtySegments += managed.bo->numDirtySegments(); - } - stats_.numTotalBOs++; - } - managed.bo->copyStagingData(forceUpdate); - } - - // Do the actual copy from staging to draw buffer. - // We do this here as we attempted to coalesce the copy ranges into - // larger contiguous ranges for fewer copies. - for (uint32_t scheduleIdx = 0; scheduleIdx < numScheduledCopies_; scheduleIdx++) { - auto © = scheduledCopies_[scheduleIdx]; - glCopyNamedBufferSubData( - copy.srcBufferID, copy.dstBufferID, - copy.srcOffset, copy.dstOffset, copy.size); - //REGEN_INFO("Scheduled copy " << copy); - } - numScheduledCopies_ = 0; // reset scheduled copies - if constexpr(STAGING_DEBUG_TIME) { - elapsedTime().push(REGEN_STRING(arena->type << " copied")); - } - - // Create a fence just after glCopyNamedBufferSubData -- marking the point where the - // written data of this frame has been consumed by the GPU. - if (useFence) { - arena->stagingBuffer->fence(drawIdx).setFencePoint(); - } - if constexpr(STAGING_DEBUG_TIME) { - elapsedTime().push(REGEN_STRING(arena->type << " synced")); - } - - // Advance to next segment in case of multi-buffering and ring buffers. - arena->stagingBuffer->swapBuffers(); - arena->isDirty = false; // reset dirty flag - if constexpr(STAGING_DEBUG_TIME) { - elapsedTime().push(REGEN_STRING(arena->type << " swapped")); - } - - if constexpr(STAGING_DEBUG_STALLS) { - if (useFence) { - REGEN_INFO("Arena " << arena->type << " stall rate: " - << arena->stagingBuffer->fence(copyIdx).getStallRate()); - } - REGEN_INFO("Arena " << arena->type << " fragmentation: " - << arena->freeList->getFragmentationScore()); - } + updateArenaData(dt_ms, static_cast(arenaIdx)); } if constexpr (!ANIMATION_THREAD_SWAPS_CLIENT_BUFFERS) { @@ -602,6 +497,115 @@ void StagingSystem::updateData(float dt_ms) { } } +void StagingSystem::updateArenaData(float dt_ms, ArenaType arenaType) { + auto &arena = arenas_[arenaType]; + // skip inactive arenas: those that are not initialized, and those that are cooling down. + if (!arena || arena->cooldown(dt_ms)) return; + + // Dynamically resize the arena if needed. + // NOTE: the arena will also indicate size change in case of adaptive size change in ring buffers, + // or the arena is not large enough to hold all BOs. + if (updateArenaSize(arena)) { + arena->resize(); + arena->sort(); + } + if constexpr(STAGING_DEBUG_TIME) { + elapsedTime().push(REGEN_STRING(arena->type << " resized")); + } + if (!arena->flags.isReadable() && !arena->isDirty) { + // early exit writing arenas before fencing in case of no updates. + return; + } + if constexpr(STAGING_DEBUG_STATISTICS) { + stats_.numDirtyArenas++; + } + + const uint32_t copyIdx = arena->stagingBuffer->nextWriteIndex(); + const uint32_t drawIdx = arena->stagingBuffer->nextReadIndex(); + // we do manual synchronization in case of persistent mapping arenas. + const bool useFence = isMapModePersistent(arena->flags.mapMode); + // For now, reading arenas must not be marked dirty, it is assumed the draw + // buffer is written to every frame. + // Reason: the use as output buffer is currently not tracked, but could be done to mark + // GPU write buffers as dirty -- but must be careful with syncing then! + const bool forceUpdate = arena->flags.isReadable(); + + // Wait for the fence in case of persistent mapped arenas. + // This might block the CPU in case of the last write into this segment + // has not been consumed by the GPU yet. + // TODO: The interaction with the fence still consumes a lot of CPU time. + // - The main bottleneck now seems *setFencePoint*. Reason might be that + // we do glDeleteSync/glFenceSync calls every time setFencePoint is called. + // - As far as I know, we cannot re-use fences across frames. + // - Maybe the only way to improve would be to reduce the number of fences. + // - Idea: Let arenas share fences. However, this is difficult because arenas + // currently may have ring buffers of different sizes. + // - Maybe the mechanism can be adjusted such that we do not need a fence every + // frame for every arena. + if (useFence) { + arena->stagingBuffer->fence(copyIdx).wait(); + } + + // Copy data from CPU to staging to draw buffer, + // or in case of reading, the other way around. + for (auto &managed: arena->bufferObjects) { + // NOTE: temporary mapping is only used for rare updates, + // so it is not really worth it to consider temporary mapping on arena level. + // NOTE: This will only copy data if the BO is dirty, i.e. has new data to write. + //if (managed.bo->hasDirtySegments()) { + // REGEN_INFO("Dirty BO: " << managed.bo->name()); + //} + if constexpr(STAGING_DEBUG_STATISTICS) { + if (managed.bo->hasDirtySegments()) { + stats_.numDirtyBOs++; + stats_.numDirtySegments += managed.bo->numDirtySegments(); + } + stats_.numTotalBOs++; + } + managed.bo->copyStagingData(forceUpdate); + } + + // Do the actual copy from staging to draw buffer. + // We do this here as we attempted to coalesce the copy ranges into + // larger contiguous ranges for fewer copies. + for (uint32_t scheduleIdx = 0; scheduleIdx < numScheduledCopies_; scheduleIdx++) { + auto © = scheduledCopies_[scheduleIdx]; + glCopyNamedBufferSubData( + copy.srcBufferID, copy.dstBufferID, + copy.srcOffset, copy.dstOffset, copy.size); + //REGEN_INFO("Scheduled copy " << copy); + } + numScheduledCopies_ = 0; // reset scheduled copies + if constexpr(STAGING_DEBUG_TIME) { + elapsedTime().push(REGEN_STRING(arena->type << " copied")); + } + + // Create a fence just after glCopyNamedBufferSubData -- marking the point where the + // written data of this frame has been consumed by the GPU. + if (useFence) { + arena->stagingBuffer->fence(drawIdx).setFencePoint(); + } + if constexpr(STAGING_DEBUG_TIME) { + elapsedTime().push(REGEN_STRING(arena->type << " synced")); + } + + // Advance to next segment in case of multi-buffering and ring buffers. + arena->stagingBuffer->swapBuffers(); + arena->isDirty = false; // reset dirty flag + if constexpr(STAGING_DEBUG_TIME) { + elapsedTime().push(REGEN_STRING(arena->type << " swapped")); + } + + if constexpr(STAGING_DEBUG_STALLS) { + if (useFence) { + REGEN_INFO("Arena " << arena->type << " stall rate: " + << arena->stagingBuffer->fence(copyIdx).getStallRate()); + } + REGEN_INFO("Arena " << arena->type << " fragmentation: " + << arena->freeList->getFragmentationScore()); + } +} + void StagingSystem::swapClientData() { if constexpr(STAGING_DEBUG_STATISTICS) { stats_.numSwapCopies = 0; @@ -609,7 +613,6 @@ void StagingSystem::swapClientData() { for (uint32_t arenaIdx = 0; arenaIdx < ARENA_TYPE_LAST; arenaIdx++) { auto &arena = arenas_[arenaIdx]; if (!arena) continue; // skip uninitialized arenas - for (auto &managed: arena->bufferObjects) { if constexpr(STAGING_DEBUG_STATISTICS) { stats_.numSwapCopies += managed.bo->clientBuffer()->swapData(); @@ -623,6 +626,25 @@ void StagingSystem::swapClientData() { } } +void StagingSystem::rotateBuffers() { + for (uint32_t arenaIdx = 0; arenaIdx < ARENA_TYPE_LAST; arenaIdx++) { + auto &arena = arenas_[arenaIdx]; + if (!arena) continue; // skip uninitialized arenas + + // First swap the client buffers such that we can read the latest data from CPU side. + for (auto &managed: arena->bufferObjects) { + managed.bo->clientBuffer()->swapData(); + } + + // Second, rotate the staging buffers. + // We rotate through all ring segments to ensure that + // the staging buffer is in sync with the client buffers. + for (uint32_t ringIdx=0; ringIdx < arena->numRingSegments; ringIdx++) { + updateArenaData(0.0f, arena->type); + } + } +} + bool StagingSystem::moveAdaptive(Arena *arena, ManagedBO &managed, float boUpdateRate) { managed.maxUpdateRate = std::max(boUpdateRate, managed.maxUpdateRate); diff --git a/regen/memory/staging-system.h b/regen/memory/staging-system.h index 23b950714d..c15f38d7dc 100644 --- a/regen/memory/staging-system.h +++ b/regen/memory/staging-system.h @@ -5,13 +5,11 @@ #include "regen/gl/queries/elapsed-time.h" #include "free-list.h" -// Note: We need to swap client buffers after each copy from -// client buffer into staging. We can either do it directly in the staging -// system right after the copy into staging, or we can let the animation thread -// perform the swapping, but it has to wait for the copy into staging to complete. -//#define REGEN_STAGING_ANIMATION_THREAD_SWAPS_CLIENT - namespace regen { + // Note: We need to swap client buffers after each copy from + // client buffer into staging. We can either do it directly in the staging + // system right after the copy into staging, or we can let the animation thread + // perform the swapping, but it has to wait for the copy into staging to complete. static constexpr bool ANIMATION_THREAD_SWAPS_CLIENT_BUFFERS = false; /** @@ -144,6 +142,12 @@ namespace regen { */ void swapClientData(); + /** + * This method can be called to rotate through all ring buffer segments in the staging buffers. + * This can be useful to make sure all buffer segments have initially the same data. + */ + void rotateBuffers(); + /** * \brief Clear the staging system. * @@ -196,6 +200,8 @@ namespace regen { bool updateArenaSize(Arena *arena); + void updateArenaData(float dt_ms, ArenaType arenaType); + struct StagingStatistics { uint32_t numDirtyArenas = 0; uint32_t numDirtyBOs = 0; From 449fab1a28f53962b4c25f694aacb2efd1e2fac7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Fri, 2 Jan 2026 14:08:55 +0100 Subject: [PATCH 03/23] Added comment --- applications/scene-display/examples/transparency.xml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/applications/scene-display/examples/transparency.xml b/applications/scene-display/examples/transparency.xml index 01a2b8d335..72fe615f9c 100644 --- a/applications/scene-display/examples/transparency.xml +++ b/applications/scene-display/examples/transparency.xml @@ -145,7 +145,9 @@ - + From 03f4d13cfe171a3f94ccbecfce216100838c02e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Fri, 2 Jan 2026 14:17:48 +0100 Subject: [PATCH 04/23] Modernized code --- regen/simulation/boids-gpu.cpp | 97 +++++++++++++++++----------------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/regen/simulation/boids-gpu.cpp b/regen/simulation/boids-gpu.cpp index cc4faf19b5..a538f9563b 100644 --- a/regen/simulation/boids-gpu.cpp +++ b/regen/simulation/boids-gpu.cpp @@ -4,15 +4,16 @@ using namespace regen; #define BOID_USE_HALF_VELOCITY -#define BOID_USE_SORTED_DATA -//#define BOID_DEBUG_GRID_OFFSETS -//#define BOID_DEBUG_GRID_SORTING -//#define BOID_DEBUG_BBOX_TIME -//#define BOID_DEBUG_GRID_TIME -//#define BOID_DEBUG_SIMULATION_TIME -#if defined(BOID_DEBUG_SIMULATION_TIME) || defined(BOID_DEBUG_GRID_TIME) -#define BOID_DEBUG_TIME -#endif + +namespace regen { + static constexpr bool BOID_USE_SORTED_DATA = true; + static constexpr bool BOID_DEBUG_GRID_OFFSETS = false; + static constexpr bool BOID_DEBUG_GRID_SORTING = false; + static constexpr bool BOID_DEBUG_BBOX_TIME = false; + static constexpr bool BOID_DEBUG_GRID_TIME = false; + static constexpr bool BOID_DEBUG_SIMULATION_TIME = false; + static constexpr bool BOID_DEBUG_TIME = BOID_DEBUG_GRID_TIME || BOID_DEBUG_SIMULATION_TIME; +} BoidsGPU::BoidsGPU(const ref_ptr &tf) : BoidSimulation(tf), @@ -77,9 +78,9 @@ void BoidsGPU::createResource() { // compute initial bbox computeBBox(initialPositions.data()); updateGridSize(); -#ifdef BOID_DEBUG_TIME - timeElapsedQuery_ = ref_ptr::alloc(); -#endif + if constexpr(BOID_DEBUG_TIME) { + timeElapsedQuery_ = ref_ptr::alloc(); + } u_numCells_ = ref_ptr::alloc("numGridCells"); u_numCells_->setUniformData(numCells_); @@ -149,8 +150,7 @@ void BoidsGPU::createResource() { } // create a state that updates the boids grid updateGridState_ = ref_ptr::alloc(); - #ifdef BOID_USE_SORTED_DATA - { + if constexpr(BOID_USE_SORTED_DATA) { // NOTE: this might be a HUGE buffer. It stores boid // positions and velocities. boidDataBuffer_ = ref_ptr::alloc("BoidDataBuffer", @@ -158,7 +158,6 @@ void BoidsGPU::createResource() { boidDataBuffer_->addStagedInput(ref_ptr>::alloc("BoidData", "boidData", numBoids_)); boidDataBuffer_->update(); } - #endif { auto radixSort = ref_ptr::alloc(numBoids_); // note: with compaction enabled, there will be an additional "numVisibleKeys" field in the Key buffer, @@ -198,14 +197,14 @@ void BoidsGPU::createResource() { updateState->setInput(((RadixSort_GPU*)radixSort_.get())->keyBuffer()); updateState->setInput(((RadixSort_GPU*)radixSort_.get())->valueBuffer()); updateState->setInput(gridOffsetBuffer_); - #ifdef BOID_USE_SORTED_DATA - updateState->setInput(velBuffer_); - if (tf_.get()) { - updateState->setInput(tfBuffer_); + if constexpr(BOID_USE_SORTED_DATA) { + updateState->setInput(velBuffer_); + if (tf_.get()) { + updateState->setInput(tfBuffer_); + } + updateState->setInput(boidDataBuffer_); + updateState->shaderDefine("USE_SORTED_DATA", "TRUE"); } - updateState->setInput(boidDataBuffer_); - updateState->shaderDefine("USE_SORTED_DATA", "TRUE"); - #endif #ifdef BOID_USE_HALF_VELOCITY updateState->shaderDefine("USE_HALF_VELOCITY", "TRUE"); #endif @@ -246,9 +245,9 @@ void BoidsGPU::createResource() { if (tf_.get()) { simulationState_->setInput(tfBuffer_); } -#ifdef BOID_USE_SORTED_DATA - simulationState_->setInput(boidDataBuffer_); -#endif + if constexpr(BOID_USE_SORTED_DATA) { + simulationState_->setInput(boidDataBuffer_); + } if (heightMap_.get()) { simulationState_->setInput( createUniform("mapCenter", mapCenter_)); @@ -298,9 +297,9 @@ void BoidsGPU::gpuUpdate(RenderState *rs, double dt) { // limit FPS to 6, grid must not be entirely accurate. // usually the grid size changes only every second or so. if (bbox_time_ > 166.0) { -#ifdef BOID_DEBUG_BBOX_TIME - timeElapsedQuery_->begin(); -#endif + if constexpr(BOID_DEBUG_BBOX_TIME) { + timeElapsedQuery_->begin(); + } bboxBuffer_->clear(); bboxPass_->enable(rs); bboxPass_->disable(rs); @@ -314,35 +313,35 @@ void BoidsGPU::gpuUpdate(RenderState *rs, double dt) { vrStamp_ = vrStamp; } } -#ifdef BOID_DEBUG_BBOX_TIME - REGEN_INFO("BBox computation took: " << timeElapsedQuery_->end() << " ms"); -#endif + if constexpr(BOID_DEBUG_BBOX_TIME) { + REGEN_INFO("BBox computation took: " << timeElapsedQuery_->end() << " ms"); + } bbox_time_ = 0.0; } if (numCells_ > 0) { -#ifdef BOID_DEBUG_GRID_TIME - timeElapsedQuery_->begin(); -#endif + if constexpr(BOID_DEBUG_GRID_TIME) { + timeElapsedQuery_->begin(); + } updateGridState_->enable(rs); updateGridState_->disable(rs); -#ifdef BOID_DEBUG_GRID_TIME - REGEN_INFO("Grid update took: " << timeElapsedQuery_->end() << " ms"); -#endif -#ifdef BOID_DEBUG_GRID_OFFSETS - printOffsets(rs); -#endif -#ifdef BOID_DEBUG_GRID_SORTING - debugGridSorting(rs); -#endif -#ifdef BOID_DEBUG_SIMULATION_TIME - timeElapsedQuery_->begin(); -#endif + if constexpr(BOID_DEBUG_GRID_TIME) { + REGEN_INFO("Grid update took: " << timeElapsedQuery_->end() << " ms"); + } + if constexpr(BOID_DEBUG_GRID_OFFSETS) { + printOffsets(rs); + } + if constexpr(BOID_DEBUG_GRID_SORTING) { + debugGridSorting(rs); + } + if constexpr(BOID_DEBUG_SIMULATION_TIME) { + timeElapsedQuery_->begin(); + } // update the boids positions and velocities simulate(rs, time_); -#ifdef BOID_DEBUG_SIMULATION_TIME - REGEN_INFO("Boid simulation took: " << timeElapsedQuery_->end() << " ms"); -#endif + if constexpr(BOID_DEBUG_SIMULATION_TIME) { + REGEN_INFO("Boid simulation took: " << timeElapsedQuery_->end() << " ms"); + } } time_ = 0.0; } From 474111f251b6f519b41e6214b5f84c6c4a1308f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Fri, 2 Jan 2026 15:54:27 +0100 Subject: [PATCH 05/23] Make sure GL surface dimension is divisible by 2 --- applications/scene-display/scene-display-widget.cpp | 6 ++++++ regen/scene/scene.cpp | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/applications/scene-display/scene-display-widget.cpp b/applications/scene-display/scene-display-widget.cpp index 1799dc4275..7c72dd6f57 100644 --- a/applications/scene-display/scene-display-widget.cpp +++ b/applications/scene-display/scene-display-widget.cpp @@ -149,6 +149,12 @@ SceneDisplayWidget::SceneDisplayWidget(QtApplication *app) // load window width/height from Qt settings int width = settings_.value("width", 1280).toInt(); int height = settings_.value("height", 960).toInt(); + // Make sure we do not have too small window + if (width < 400) { width = 400; } + if (height < 300) { height = 300; } + // Make sure dimensions are divisible by 2 + if (width % 2 != 0) { width += 1; } + if (height % 2 != 0) { height += 1; } REGEN_INFO("Initial window size: " << width << "x" << height); ui_.setupUi(this); diff --git a/regen/scene/scene.cpp b/regen/scene/scene.cpp index 9988421e18..a06ac6877f 100644 --- a/regen/scene/scene.cpp +++ b/regen/scene/scene.cpp @@ -204,7 +204,11 @@ void Scene::keyDown(const KeyEvent &ev) { } void Scene::resizeGL(const Vec2i &size) { - screen_->setViewport(size); + // make sure size is at least 2x2, non-negative and divisible by 2 + const Vec2i safeSize( + std::max(2, size.x + size.x % 2), + std::max(2, size.y + size.y % 2)); + screen_->setViewport(safeSize); queueEmit(RESIZE_EVENT); updateMousePosition(); } From bb6fcad886ae7bedd8ec12425dd5cbb62a8ca3ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Fri, 2 Jan 2026 17:43:03 +0100 Subject: [PATCH 06/23] Minor revision of volume shader --- .../examples/compute/bonsai-training-data.xml | 4 +-- regen/objects/volume.glsl | 31 ++++++++++++++++--- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/applications/scene-display/examples/compute/bonsai-training-data.xml b/applications/scene-display/examples/compute/bonsai-training-data.xml index cc291ca0d7..57926b40c7 100644 --- a/applications/scene-display/examples/compute/bonsai-training-data.xml +++ b/applications/scene-display/examples/compute/bonsai-training-data.xml @@ -104,7 +104,7 @@ - + @@ -147,7 +147,7 @@ - + diff --git a/regen/objects/volume.glsl b/regen/objects/volume.glsl index c88054fbd3..c8b4b1254d 100644 --- a/regen/objects/volume.glsl +++ b/regen/objects/volume.glsl @@ -151,10 +151,10 @@ void main() { // do the ray casting from start to stop vec3 ray = rayStop - rayStart; + float stepSize = max(in_rayStep, 0.001); float rayLength = length(ray); - int numSteps = int(rayLength / in_rayStep); + int numSteps = int(rayLength / stepSize); vec3 stepVector = ray / float(numSteps); - //vec3 stepVector = normalize(ray) * max(in_rayStep, 0.001); vec3 pos = rayStart; vec4 dst = vec4(0); @@ -164,13 +164,14 @@ void main() { #if RAY_CASTING_MODE==MAX_INTENSITY float maxIntensity = 0.0; #endif + for(int i=0; i in_densityThreshold) { dst = volumeTransfer(value); + // make a look ahead to find a better maximum + for(int j=0; j<2; j++) { + vec3 lookAheadPos = pos + stepVector * float(j); + float lookAheadValue = texture(in_volumeTexture, lookAheadPos).x; + if(lookAheadValue > value) { + dst = volumeTransfer(lookAheadValue); + value = lookAheadValue; + i = j; // advance main loop + } + } break; } #else // emission/absorbtion @@ -291,7 +302,7 @@ void main() { // do the ray casting from start to stop vec3 ray = rayStop - rayStart; float rayLength = length(ray); - int numSteps = int(rayLength / in_rayStep); + int numSteps = int(rayLength / max(in_rayStep, 0.001)); vec3 stepVector = ray / float(numSteps); vec3 pos = rayStart; float density = 0.0; @@ -302,6 +313,18 @@ void main() { if(value > in_densityThreshold) { density = value; hit = true; + // make a look ahead to find a better maximum + for(int j=0; j<5; j++) { + vec3 lookAheadPos = pos + stepVector * float(j); + float lookAheadValue = texture(in_volumeTexture, lookAheadPos).x; + if(lookAheadValue > value) { + density = lookAheadValue; + value = lookAheadValue; + i = j; // advance main loop + } else { + break; + } + } break; } pos += stepVector; From 50217bb8b52f682284c398310bbdbf2a65e9d8c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Fri, 2 Jan 2026 17:43:19 +0100 Subject: [PATCH 07/23] Improved error handling --- regen/av/video-recorder.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/regen/av/video-recorder.cpp b/regen/av/video-recorder.cpp index 226fa1c759..af1f28683b 100644 --- a/regen/av/video-recorder.cpp +++ b/regen/av/video-recorder.cpp @@ -93,7 +93,11 @@ void VideoRecorder::initialize() { codecCtx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } if (avcodec_open2(codecCtx_, codec, nullptr) < 0) { - throw std::runtime_error("Could not open codec"); + // print error message + char err_buf[AV_ERROR_MAX_STRING_SIZE]{}; + auto err_type = AVERROR(errno); + av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, err_type); + throw std::runtime_error(REGEN_STRING("Could not open codec: " << err_buf << " (" << err_type << ")")); } stream_ = avformat_new_stream(formatCtx_, codec); From 137768340d19eb9a7066fedad68f6f5c4af6a507 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Fri, 2 Jan 2026 18:28:18 +0100 Subject: [PATCH 08/23] Code cleanup --- applications/scene-display/examples/character.xml | 2 +- regen/av/audio.cpp | 6 +++--- regen/av/audio.h | 11 ++++------- regen/av/demuxer.cpp | 2 ++ regen/av/demuxer.h | 11 ++--------- regen/behavior/world/body-part.cpp | 2 +- regen/camera/camera-anchor.h | 2 +- regen/compute/quadric.h | 2 +- regen/compute/quaternion.h | 1 - regen/compute/radix-sort-cpu.h | 2 +- regen/config.h.cmake | 1 + regen/gl/gl-object.h | 8 +------- regen/gl/gl-param.cpp | 2 ++ regen/gl/gl-param.h | 2 -- regen/gl/gl-util.h | 11 ----------- regen/gl/render-state.cpp | 4 +++- regen/gl/states/atomic-states.h | 2 +- regen/memory/aligned-allocator.h | 2 -- regen/memory/aligned-array.h | 4 +--- regen/memory/bbox-buffer.h | 1 - regen/memory/position-reader.h | 2 +- regen/memory/staging-buffer.cpp | 2 +- regen/memory/staging-buffer.h | 2 +- regen/objects/assimp-importer.cpp | 4 ++-- regen/objects/assimp-importer.h | 11 ----------- regen/objects/composite-mesh.cpp | 5 +++-- regen/objects/lod/tessellation.cpp | 2 +- regen/objects/lod/tessellation.h | 2 +- regen/objects/primitives/sphere.cpp | 2 +- regen/objects/sky/lightning-bolt.h | 4 ++-- regen/passes/filter.cpp | 2 +- regen/regen.h | 3 ++- regen/scene/resource-manager.h | 2 -- regen/scene/scene-input.cpp | 2 -- regen/shader/directive-processor.cpp | 6 +++--- regen/shader/input-schema.cpp | 2 +- regen/shader/io-processor.cpp | 5 +++-- regen/shader/shader-state.cpp | 2 +- regen/shader/shader.cpp | 2 +- regen/shader/shader.h | 7 +------ regen/shapes/batch-of-shapes.h | 2 -- regen/shapes/frustum.cpp | 2 +- regen/shapes/shape-processor.cpp | 2 -- 43 files changed, 52 insertions(+), 101 deletions(-) diff --git a/applications/scene-display/examples/character.xml b/applications/scene-display/examples/character.xml index 7c7182c5a2..b7a8c2eb72 100644 --- a/applications/scene-display/examples/character.xml +++ b/applications/scene-display/examples/character.xml @@ -784,7 +784,7 @@ - + diff --git a/regen/av/audio.cpp b/regen/av/audio.cpp index cdb26a4181..725776b240 100644 --- a/regen/av/audio.cpp +++ b/regen/av/audio.cpp @@ -186,7 +186,7 @@ void AudioListener::set3f(const ALenum &p, const Vec3f &v) { } Vec3f AudioListener::get3f(const ALenum &p) { - Vec3f v; + Vec3f v = Vec3f::zero(); alGetListenerf(p, &v.x); return v; } @@ -196,7 +196,7 @@ void AudioListener::set6f(const ALenum &p, const Vec6f &v) { } Vec6f AudioListener::get6f(const ALenum &p) { - Vec6f v; + auto v = Vec6f{ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; alGetListenerf(p, &v.x0); return v; } @@ -281,7 +281,7 @@ void AudioSource::set3f(const ALenum &p, const Vec3f &v) const { } Vec3f AudioSource::get3f(const ALenum &p) const { - Vec3f v; + Vec3f v = Vec3f::zero(); alGetSourcef(id_, p, &v.x); return v; } diff --git a/regen/av/audio.h b/regen/av/audio.h index 58c77b7020..fd1747f5a1 100644 --- a/regen/av/audio.h +++ b/regen/av/audio.h @@ -8,8 +8,6 @@ #ifndef AUDIO_SOURCE_H_ #define AUDIO_SOURCE_H_ -#include - extern "C" { #include #include @@ -121,6 +119,10 @@ namespace regen { ~AudioSource() override; + AudioSource(const AudioSource &) = delete; + + AudioSource &operator=(const AudioSource &) = delete; + /** * The audio source ID. */ @@ -218,11 +220,6 @@ namespace regen { #endif void doClearQueue(); - - private: - AudioSource(const AudioSource &); - - AudioSource &operator=(const AudioSource &); }; } // namespace diff --git a/regen/av/demuxer.cpp b/regen/av/demuxer.cpp index 1b30a174f2..7dcc2dac1a 100644 --- a/regen/av/demuxer.cpp +++ b/regen/av/demuxer.cpp @@ -27,6 +27,8 @@ static void avLogCallback(void *, int level, const char *msg, va_list args) { break; case AV_LOG_WARNING: REGEN_WARN(buffer); break; + default: + break; } } diff --git a/regen/av/demuxer.h b/regen/av/demuxer.h index 2e764a71b9..53a1eb12a3 100644 --- a/regen/av/demuxer.h +++ b/regen/av/demuxer.h @@ -54,11 +54,6 @@ namespace regen { */ bool hasInput() const; - /** - * Total number of seconds elapsed in the stream. - */ - float elapsedSeconds() const; - /** * Total number of seconds of currently loaded stream. */ @@ -131,10 +126,8 @@ namespace regen { bool pauseFlag_; bool repeatStream_; - int videoStreamIndex_; - int audioStreamIndex_; - - //float elapsedSeconds_; + int videoStreamIndex_ = -1; + int audioStreamIndex_ = -1; struct SeekPosition { bool isRequired; diff --git a/regen/behavior/world/body-part.cpp b/regen/behavior/world/body-part.cpp index 928ea5c105..7a01aeaf0f 100644 --- a/regen/behavior/world/body-part.cpp +++ b/regen/behavior/world/body-part.cpp @@ -32,7 +32,7 @@ std::istream ®en::operator>>(std::istream &in, BodyPart &v) { else if (val == "LEG") v = BodyPart::LEG; else { REGEN_WARN("Unknown BodyPart value: " << val); - val = "NO_BODY_PART"; + v = BodyPart::HEAD; } return in; } diff --git a/regen/camera/camera-anchor.h b/regen/camera/camera-anchor.h index 7da0531fe2..2c904917dd 100644 --- a/regen/camera/camera-anchor.h +++ b/regen/camera/camera-anchor.h @@ -72,7 +72,7 @@ namespace regen { protected: ref_ptr transform_; - Vec3f offset_; + Vec3f offset_ = Vec3f::zero(); Mode mode_; }; } diff --git a/regen/compute/quadric.h b/regen/compute/quadric.h index e325da9ee5..bab4ff9591 100644 --- a/regen/compute/quadric.h +++ b/regen/compute/quadric.h @@ -10,7 +10,7 @@ namespace regen { */ class quadric { public: - float a[10] = {0}; // 10 unique components of the symmetric 4x4 matrix + float a[10] = {}; // 10 unique components of the symmetric 4x4 matrix quadric() = default; diff --git a/regen/compute/quaternion.h b/regen/compute/quaternion.h index fdf0782a5d..dcffa1d123 100644 --- a/regen/compute/quaternion.h +++ b/regen/compute/quaternion.h @@ -303,7 +303,6 @@ namespace regen { // adjust signs (if necessary) Quaternion end = pEnd; if (cosom < 0.0f) { - cosom = -cosom; end.x = -end.x; // Reverse all signs end.y = -end.y; end.z = -end.z; diff --git a/regen/compute/radix-sort-cpu.h b/regen/compute/radix-sort-cpu.h index 6493311f7b..04442f11c2 100644 --- a/regen/compute/radix-sort-cpu.h +++ b/regen/compute/radix-sort-cpu.h @@ -34,7 +34,7 @@ namespace regen { std::vector tmp_indices_; std::vector histogram_; alignas(32) KeyType tmpBins_[KEYS_PER_SIMD_PASS] = {0}; - alignas(32) int32_t tmpKeys32[8] = {0}; + alignas(32) int32_t tmpKeys32[8] = {}; /** * @brief Constructor diff --git a/regen/config.h.cmake b/regen/config.h.cmake index b3ba225a22..4fabf5acda 100644 --- a/regen/config.h.cmake +++ b/regen/config.h.cmake @@ -50,4 +50,5 @@ namespace regen { }; #endif // REGEN_CONFIG_H_ +#include #include diff --git a/regen/gl/gl-object.h b/regen/gl/gl-object.h index ac69114d14..de5361eb91 100644 --- a/regen/gl/gl-object.h +++ b/regen/gl/gl-object.h @@ -1,7 +1,6 @@ #ifndef REGEN_GL_OBJECT_H_ #define REGEN_GL_OBJECT_H_ -#include #include #include @@ -56,12 +55,7 @@ namespace regen { GLObject(const GLObject &other); - virtual ~GLObject(); - - /** - * Releases and allocates resources again. - */ - void resetGL(); + ~GLObject() override; /** * Switch to the next allocated buffer. diff --git a/regen/gl/gl-param.cpp b/regen/gl/gl-param.cpp index c55619650b..822bfb7c1e 100644 --- a/regen/gl/gl-param.cpp +++ b/regen/gl/gl-param.cpp @@ -1,5 +1,7 @@ #include "gl-param.h" +#include "regen/compute/vector.h" + namespace regen { template<> bool glParam(GLenum param) { auto &store = GLParameterStore::instance(); diff --git a/regen/gl/gl-param.h b/regen/gl/gl-param.h index 4cbe68ae60..a779868e25 100644 --- a/regen/gl/gl-param.h +++ b/regen/gl/gl-param.h @@ -4,8 +4,6 @@ #include #include -#include - namespace regen { // introduce a template function to get the value of a GL parameter template T glParam(GLenum param); diff --git a/regen/gl/gl-util.h b/regen/gl/gl-util.h index 34e06fe0fd..d1cfb2eec7 100644 --- a/regen/gl/gl-util.h +++ b/regen/gl/gl-util.h @@ -1,19 +1,8 @@ -/* - * gl-util.h - * - * Created on: 20.03.2011 - * Author: daniel - */ - #ifndef __GL_UTIL__ #define __GL_UTIL__ -#include - #include #include -#include -#include namespace regen { /** diff --git a/regen/gl/render-state.cpp b/regen/gl/render-state.cpp index 463f9a0223..3b436d2a26 100644 --- a/regen/gl/render-state.cpp +++ b/regen/gl/render-state.cpp @@ -2,6 +2,8 @@ #include "render-state.h" +#include "regen/utility/logging.h" + using namespace regen; #ifndef GL_DEBUG_OUTPUT @@ -446,7 +448,7 @@ GLenum RenderState::toggleToID(Toggle t) { return GL_NONE; } return GL_NONE; -}; +} namespace regen { std::ostream &operator<<(std::ostream &out, const RenderState::Toggle &mode) { diff --git a/regen/gl/states/atomic-states.h b/regen/gl/states/atomic-states.h index 3a752acce9..97eefe8270 100644 --- a/regen/gl/states/atomic-states.h +++ b/regen/gl/states/atomic-states.h @@ -392,7 +392,7 @@ namespace regen { */ class ClearState : public ServerSideState { public: - ClearState(const ref_ptr &fbo) + explicit ClearState(const ref_ptr &fbo) : ServerSideState(), fbo_(fbo) {} void addClearBit(GLbitfield clearBit) { diff --git a/regen/memory/aligned-allocator.h b/regen/memory/aligned-allocator.h index 859628aa2c..e4ba0ac0ee 100644 --- a/regen/memory/aligned-allocator.h +++ b/regen/memory/aligned-allocator.h @@ -1,10 +1,8 @@ #ifndef REGEN_ALIGNED_ALLOCATOR_H_ #define REGEN_ALIGNED_ALLOCATOR_H_ -#include #include #include -#include namespace regen { /** diff --git a/regen/memory/aligned-array.h b/regen/memory/aligned-array.h index f58e118d18..a63e58d676 100644 --- a/regen/memory/aligned-array.h +++ b/regen/memory/aligned-array.h @@ -1,11 +1,9 @@ #ifndef REGEN_ALIGNED_ARRAY_H_ #define REGEN_ALIGNED_ARRAY_H_ -#include #include #include -#include -#include +#include namespace regen { /** diff --git a/regen/memory/bbox-buffer.h b/regen/memory/bbox-buffer.h index f89cab0a31..14144e7a65 100644 --- a/regen/memory/bbox-buffer.h +++ b/regen/memory/bbox-buffer.h @@ -2,7 +2,6 @@ #define REGEN_BOUNDING_BOX_BUFFER_H_ #include "ssbo.h" -#include "staging-buffer.h" #include "regen/shapes/bounds.h" namespace regen { diff --git a/regen/memory/position-reader.h b/regen/memory/position-reader.h index e03b85d909..2d3100f64a 100644 --- a/regen/memory/position-reader.h +++ b/regen/memory/position-reader.h @@ -60,7 +60,7 @@ namespace regen { private: ClientDataRaw_ro rawData_mat; ClientDataRaw_ro rawData_offset; - mutable Vec3f tmpPos_; + mutable Vec3f tmpPos_ = Vec3f::zero(); public: /** * The mapped data for reading. diff --git a/regen/memory/staging-buffer.cpp b/regen/memory/staging-buffer.cpp index 244298100e..e1b1f5b6d4 100644 --- a/regen/memory/staging-buffer.cpp +++ b/regen/memory/staging-buffer.cpp @@ -60,7 +60,7 @@ BufferPool* StagingBuffer::getStagingAllocator(BufferStorageMode storageMode) { stagingAllocator->set_index((int)storageMode); stagingAllocator->set_alignment(StagingSystem::STAGING_BUFFER_ALIGNMENT); stagingAllocator->set_minSize(8u * 1024u * 1024u); // 2048 pages = 8 MiB - bufferPools[(int)storageMode] = stagingAllocator; + bufferPools[static_cast(storageMode)] = stagingAllocator; } return stagingAllocator; } diff --git a/regen/memory/staging-buffer.h b/regen/memory/staging-buffer.h index 2eff96b7b0..8da308312b 100644 --- a/regen/memory/staging-buffer.h +++ b/regen/memory/staging-buffer.h @@ -79,7 +79,7 @@ namespace regen { * Set whether the buffer should swap on each access. * @param v true if the buffer should swap on each access, false otherwise. */ - void setSwappingOnAccess(bool v) { useSwappingOnAccess_ = v; }; + void setSwappingOnAccess(bool v) { useSwappingOnAccess_ = v; } /** * Set whether the buffer should clear its segments on resize. diff --git a/regen/objects/assimp-importer.cpp b/regen/objects/assimp-importer.cpp index 3630c5fe77..dcb4c53874 100644 --- a/regen/objects/assimp-importer.cpp +++ b/regen/objects/assimp-importer.cpp @@ -160,8 +160,8 @@ vector > AssetImporter::loadLights() { light->setPosition(0, *((Vec3f *) &lightPos.x)); light->setDirection(0, *((Vec3f *) &assimpLight->mDirection.x)); light->setConeAngles( - acos(assimpLight->mAngleOuterCone) * 360.0f / (2.0f * M_PI), - acos(assimpLight->mAngleInnerCone) * 360.0f / (2.0f * M_PI)); + acosf(assimpLight->mAngleOuterCone) * 360.0f / (2.0f * M_PIf), + acosf(assimpLight->mAngleInnerCone) * 360.0f / (2.0f * M_PIf)); setLightRadius(assimpLight, light); break; } diff --git a/regen/objects/assimp-importer.h b/regen/objects/assimp-importer.h index b839b438e7..12b04bab7f 100644 --- a/regen/objects/assimp-importer.h +++ b/regen/objects/assimp-importer.h @@ -1,10 +1,3 @@ -/* - * assimp-loader.h - * - * Created on: 24.10.2011 - * Author: daniel - */ - #ifndef ASSIMP_LOADER_H_ #define ASSIMP_LOADER_H_ @@ -12,10 +5,6 @@ #include #include -#include -#include -#include -#include #include #include diff --git a/regen/objects/composite-mesh.cpp b/regen/objects/composite-mesh.cpp index 1faf932b01..4ec6421593 100644 --- a/regen/objects/composite-mesh.cpp +++ b/regen/objects/composite-mesh.cpp @@ -21,6 +21,7 @@ #include "silhouette-mesh.h" #include "primitives/blanket.h" #include "primitives/cone.h" +#include "regen/animation/bones.h" #include "terrain/blanket-trail.h" #include "terrain/ground-path.h" @@ -811,7 +812,7 @@ void CompositeMesh::loadIndexRange( const ref_ptr &compositeMesh, std::queue, uint32_t> > &meshQueue, const std::string &prefix) { - auto indexRange = CompositeMesh::loadIndexRange(input, prefix); + std::vector indexRange = CompositeMesh::loadIndexRange(input, prefix); if (indexRange.empty()) { uint32_t idx = 0u; for (auto &it: compositeMesh->meshes()) { @@ -819,7 +820,7 @@ void CompositeMesh::loadIndexRange( } } else { for (auto &index: indexRange) { - if (index >= 0 && index < static_cast(compositeMesh->meshes().size())) { + if (index < static_cast(compositeMesh->meshes().size())) { meshQueue.push({compositeMesh->meshes()[index], index}); } else { REGEN_WARN("Ignoring " << input.getDescription() << ", invalid mesh index '" << index << "'."); diff --git a/regen/objects/lod/tessellation.cpp b/regen/objects/lod/tessellation.cpp index 70186909f0..e56ea7504b 100644 --- a/regen/objects/lod/tessellation.cpp +++ b/regen/objects/lod/tessellation.cpp @@ -20,7 +20,7 @@ struct Edge { namespace std { template <> struct hash { - std::size_t operator()(const Edge &e) const { + std::size_t operator()(const Edge &e) const noexcept { return std::hash()(e.v1) ^ std::hash()(e.v2); } }; diff --git a/regen/objects/lod/tessellation.h b/regen/objects/lod/tessellation.h index bcdc15aa5a..2d602c3321 100644 --- a/regen/objects/lod/tessellation.h +++ b/regen/objects/lod/tessellation.h @@ -16,7 +16,7 @@ namespace regen { */ TriangleVertex(const Vec3f &_p, const uint32_t &_i) : p(_p), i(_i) {} - TriangleVertex() : i(0) {} + TriangleVertex() : p(Vec3f::zero()), i(0) {} /** The vertex position. */ Vec3f p; diff --git a/regen/objects/primitives/sphere.cpp b/regen/objects/primitives/sphere.cpp index 65dfdf3281..9c00e50c7b 100644 --- a/regen/objects/primitives/sphere.cpp +++ b/regen/objects/primitives/sphere.cpp @@ -51,7 +51,7 @@ Sphere::Config::Config() } static Vec3f computeSphereTangent(const Vec3f &v) { - Vec3f vAbs = Vec3f(abs(v.x), abs(v.y), abs(v.z)); + auto vAbs = Vec3f(abs(v.x), abs(v.y), abs(v.z)); Vec3f v_; if (1.0f - v.z < std::numeric_limits::epsilon()) { // there is a singularity at the back pole diff --git a/regen/objects/sky/lightning-bolt.h b/regen/objects/sky/lightning-bolt.h index 8213813446..99999e379b 100644 --- a/regen/objects/sky/lightning-bolt.h +++ b/regen/objects/sky/lightning-bolt.h @@ -24,9 +24,9 @@ namespace regen { const StrikePoint &target, const ref_ptr &alpha); - LightningStrike(const LightningStrike &) = default; + LightningStrike(const LightningStrike &) = delete; - LightningStrike &operator=(const LightningStrike &) = default; + LightningStrike &operator=(const LightningStrike &) = delete; /** * Sets the frequency of lightning strikes. diff --git a/regen/passes/filter.cpp b/regen/passes/filter.cpp index a729efa4dc..451b41dc01 100644 --- a/regen/passes/filter.cpp +++ b/regen/passes/filter.cpp @@ -299,7 +299,7 @@ void FilterSequence::enable(RenderState *rs) { resize(); if (clearFirstFilter_) { - Filter *firstFilter = (*filterSequence_.begin()).filter.get(); + Filter *firstFilter = filterSequence_.begin()->filter.get(); firstFilter->output()->fbo_->clearAllColorAttachments(clearColor_); } auto oldViewport = rs->viewport().current(); diff --git a/regen/regen.h b/regen/regen.h index fe4994f086..02eae0a094 100644 --- a/regen/regen.h +++ b/regen/regen.h @@ -29,5 +29,6 @@ namespace regen { static inline const T &getClamped(const std::span &vec, uint32_t idx) { return vec.size() <= idx ? vec[0] : vec[idx]; } -}; +} + #endif //REGEN_REGEN_H diff --git a/regen/scene/resource-manager.h b/regen/scene/resource-manager.h index 70cc6cd01a..cb899dd43c 100644 --- a/regen/scene/resource-manager.h +++ b/regen/scene/resource-manager.h @@ -82,8 +82,6 @@ namespace regen { */ ref_ptr getAsset(SceneLoader *parser, const std::string &id); - ref_ptr getState(SceneLoader *parser, const std::string &id); - /** * @param id the resource id. * @param cam A Camera instance. diff --git a/regen/scene/scene-input.cpp b/regen/scene/scene-input.cpp index ed6ff12622..142eacd182 100644 --- a/regen/scene/scene-input.cpp +++ b/regen/scene/scene-input.cpp @@ -122,7 +122,6 @@ list > SceneInputNode::getChildren(const string &categor ref_ptr SceneInputNode::getFirstChild(const string &category, const string &name) { const list > &children = getChildren(); - list > out; for (const auto& n : children) { if (n->getName() == name && n->getCategory() == category) return n; @@ -132,7 +131,6 @@ ref_ptr SceneInputNode::getFirstChild(const string &category, co ref_ptr SceneInputNode::getFirstChild(const string &category) { const list > &children = getChildren(); - list > out; for (const auto& n : children) { if (n->getCategory() == category) return n; } diff --git a/regen/shader/directive-processor.cpp b/regen/shader/directive-processor.cpp index 0e8a635490..103ec567cd 100644 --- a/regen/shader/directive-processor.cpp +++ b/regen/shader/directive-processor.cpp @@ -123,11 +123,11 @@ string DirectiveProcessor::MacroTree::define(const string &arg) { } else if (isNumber(arg)) { return arg; } else { - map::iterator it = defines_.find(arg); - if (it == defines_.end()) { + auto jt = defines_.find(arg); + if (jt == defines_.end()) { return arg; } else { - return it->second; + return jt->second; } } } diff --git a/regen/shader/input-schema.cpp b/regen/shader/input-schema.cpp index d24608cc5a..3c1d9ebff9 100644 --- a/regen/shader/input-schema.cpp +++ b/regen/shader/input-schema.cpp @@ -50,7 +50,7 @@ const InputSchema *InputSchema::unknown() { } const InputSchema *InputSchema::color() { - static const InputSchema *color_ = []() { + static const InputSchema *color_ = [] { auto *schema = new InputSchema(InputSchema::COLOR); schema->setLimits(0, 0, 1); schema->setLimits(1, 0, 1); diff --git a/regen/shader/io-processor.cpp b/regen/shader/io-processor.cpp index 609bb2c51e..97d97b496e 100644 --- a/regen/shader/io-processor.cpp +++ b/regen/shader/io-processor.cpp @@ -175,6 +175,7 @@ void IOProcessor::defineHandleIO(PreProcessorState &state) { " " << outName << " = " << inName << "[i];")); break; case GL_FRAGMENT_SHADER: + default: break; } @@ -283,7 +284,7 @@ void IOProcessor::declareSpecifiedInput(PreProcessorState &state) { stringstream val; val << io.dataType << "("; - (*in.get()).write(val); + in.get()->write(val); val << ")"; io.value = val.str(); } else if (in->isBufferBlock()) { @@ -549,7 +550,7 @@ bool IOProcessor::process(PreProcessorState &state, string &line) { stringstream val; val << io.dataType << "("; - (*in.get()).write(val); + in.get()->write(val); val << ")"; io.value = val.str(); } else { diff --git a/regen/shader/shader-state.cpp b/regen/shader/shader-state.cpp index 946aab4e6c..b6a01eb156 100644 --- a/regen/shader/shader-state.cpp +++ b/regen/shader/shader-state.cpp @@ -132,7 +132,7 @@ void ShaderState::enable(RenderState *rs) { ref_ptr ShaderState::findShader(State *s) { auto joined = s->joined(); for (auto it = joined->rbegin(); it != joined->rend(); ++it) { - ref_ptr out = findShader((*it).get()); + ref_ptr out = findShader(it->get()); if (out.get() != nullptr) return out; } diff --git a/regen/shader/shader.cpp b/regen/shader/shader.cpp index 866484a962..96693861e7 100644 --- a/regen/shader/shader.cpp +++ b/regen/shader/shader.cpp @@ -339,7 +339,7 @@ bool Shader::link() { } } -bool Shader::validate() { +bool Shader::validate() const { glValidateProgram(id()); int status; glGetProgramiv(id(), GL_VALIDATE_STATUS, &status); diff --git a/regen/shader/shader.h b/regen/shader/shader.h index 5732ffb151..0cb7d62e48 100644 --- a/regen/shader/shader.h +++ b/regen/shader/shader.h @@ -96,18 +96,13 @@ namespace regen { /** * @return true if the validation was successful. */ - bool validate(); + bool validate() const; /** * The program object. */ unsigned int id() const; - /** - * Returns true if the given name is a valid vertex attribute name. - */ - bool isAttribute(const std::string &name) const; - /** * Returns the locations for a given vertex attribute name or -1 if the name is not known. */ diff --git a/regen/shapes/batch-of-shapes.h b/regen/shapes/batch-of-shapes.h index 831df72172..7e22ad01b6 100644 --- a/regen/shapes/batch-of-shapes.h +++ b/regen/shapes/batch-of-shapes.h @@ -2,9 +2,7 @@ #define REGEN_BATCH_OF_SHAPES_H_ #include -#include #include -#include namespace regen { class BoundingShape; diff --git a/regen/shapes/frustum.cpp b/regen/shapes/frustum.cpp index 057856d106..7db9a9290a 100644 --- a/regen/shapes/frustum.cpp +++ b/regen/shapes/frustum.cpp @@ -172,7 +172,7 @@ void Frustum::split(double splitWeight, std::vector &frustumSplit) cons } Vec3f Frustum::closestPointOnSurface(const Vec3f &point) const { - Vec3f closestPoint; + Vec3f closestPoint = point; float minDistanceSqr = std::numeric_limits::max(); for (const auto &plane: planes) { diff --git a/regen/shapes/shape-processor.cpp b/regen/shapes/shape-processor.cpp index 10f7177780..25c2f9e9b6 100644 --- a/regen/shapes/shape-processor.cpp +++ b/regen/shapes/shape-processor.cpp @@ -133,8 +133,6 @@ createTriangleMesh(SceneInputNode &input, const ref_ptr &mesh) { indexType = PHY_UCHAR; break; case GL_UNSIGNED_INT: - indexType = PHY_INTEGER; - break; default: indexType = PHY_INTEGER; break; From addd46b7791cf2b88a8ca3885e3d32537a8049eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Fri, 2 Jan 2026 18:47:15 +0100 Subject: [PATCH 09/23] Code cleanup --- regen/shapes/cull-shape.cpp | 2 +- regen/shapes/cull-shape.h | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/regen/shapes/cull-shape.cpp b/regen/shapes/cull-shape.cpp index 995a8cd474..a215187af5 100644 --- a/regen/shapes/cull-shape.cpp +++ b/regen/shapes/cull-shape.cpp @@ -36,7 +36,7 @@ void CullShape::initCullShape( } } boundingShape_ = boundingShape; - numInstances_ = boundingShape->numInstances(); + set_numInstances(boundingShape->numInstances()); } ref_ptr CullShape::getIndirectDrawBuffer(const ref_ptr &mesh) const { diff --git a/regen/shapes/cull-shape.h b/regen/shapes/cull-shape.h index 0f7c004dcf..155a49a08b 100644 --- a/regen/shapes/cull-shape.h +++ b/regen/shapes/cull-shape.h @@ -35,11 +35,6 @@ namespace regen { const ref_ptr &boundingShape, std::string_view shapeName); - /** - * @return the number of instances of this shape. - */ - uint32_t numInstances() const { return numInstances_; } - /** * @return the name of the shape. */ @@ -122,7 +117,6 @@ namespace regen { protected: std::string shapeName_; std::vector> parts_; - uint32_t numInstances_ = 1u; ref_ptr spatialIndex_; ref_ptr boundingShape_; SortMode instanceSortMode_ = SortMode::FRONT_TO_BACK; From 9f6f818e4698174fdba2f292d4899b8107ea1b79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Fri, 2 Jan 2026 18:50:02 +0100 Subject: [PATCH 10/23] Default to alignment based on SIMD register width --- regen/memory/aligned-array.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/regen/memory/aligned-array.h b/regen/memory/aligned-array.h index a63e58d676..91fcf30b68 100644 --- a/regen/memory/aligned-array.h +++ b/regen/memory/aligned-array.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include namespace regen { /** @@ -90,7 +92,7 @@ namespace regen { uint32_t capacity_; uint32_t allocatedSize_ = 0; - static constexpr size_t Alignment = 32; + static constexpr size_t Alignment = simd::RegisterWidth * 4; void allocate(size_t count) { const size_t totalSize = count * sizeof(T); From 1c15a40d664092f7460705c86b78ceff8a48e2b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Fri, 2 Jan 2026 21:45:32 +0100 Subject: [PATCH 11/23] minor --- applications/mesh-viewer/mesh-viewer-widget.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/applications/mesh-viewer/mesh-viewer-widget.cpp b/applications/mesh-viewer/mesh-viewer-widget.cpp index 029c8ae470..731019e40f 100644 --- a/applications/mesh-viewer/mesh-viewer-widget.cpp +++ b/applications/mesh-viewer/mesh-viewer-widget.cpp @@ -20,6 +20,8 @@ #include #include +#include "regen/animation/bones.h" + using namespace std; // Resizes Framebuffer texture when the window size changed @@ -492,12 +494,15 @@ void MeshViewerWidget::gl_loadScene() { sceneRoot_->state()->joinStates(blit); GL_ERROR_LOG(); + app_->initializeScene(); + // resize fbo with window app_->connect(Scene::RESIZE_EVENT, ref_ptr::alloc(fboState)); // Update frustum when window size changes app_->connect(Scene::RESIZE_EVENT, ref_ptr::alloc(userCamera_, app_->screen())); + AnimationManager::get().resetTime(); AnimationManager::get().resume(); REGEN_INFO("Scene Loaded."); createCameraController(); From 8ea96c0aa4ac650ea1bb85dc736e1f3a35cbf4bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Fri, 2 Jan 2026 22:35:04 +0100 Subject: [PATCH 12/23] fix: call updateBuffers on camera --- applications/mesh-viewer/mesh-viewer-widget.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/applications/mesh-viewer/mesh-viewer-widget.cpp b/applications/mesh-viewer/mesh-viewer-widget.cpp index 731019e40f..6c9ff50a10 100644 --- a/applications/mesh-viewer/mesh-viewer-widget.cpp +++ b/applications/mesh-viewer/mesh-viewer-widget.cpp @@ -390,6 +390,7 @@ static ref_ptr createUserCamera(const Vec2i &viewport) { cam->setPosition(0, Vec3f(0.0f, 0.0f, -3.0f)); cam->setDirection(0, Vec3f(0.0f, 0.0f, 1.0f)); cam->setPerspective(aspect, 45.0f, 0.1f, 100.0f); + cam->updateBuffers(); cam->updateCamera(); cam->updateShaderData(0.0f); return cam; From 97e5b3539c26cdfc7e2e691e8af6c2d1aa46845e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Sat, 3 Jan 2026 12:37:19 +0100 Subject: [PATCH 13/23] Use uniform size of ring buffers in staging system --- regen/memory/staging-system.cpp | 193 ++++++++++++++++++++++---------- regen/memory/staging-system.h | 11 +- 2 files changed, 141 insertions(+), 63 deletions(-) diff --git a/regen/memory/staging-system.cpp b/regen/memory/staging-system.cpp index 6aaabf7cd9..86fba1e8bc 100644 --- a/regen/memory/staging-system.cpp +++ b/regen/memory/staging-system.cpp @@ -28,6 +28,9 @@ float StagingSystem::MAX_COOLDOWN_NEVER_WRITE = 2000.0f; namespace regen { static constexpr bool STAGING_DEBUG_STALLS = false; static constexpr bool STAGING_EXPLICIT_FLUSH = false; + // Use uniform ring buffer sizes for all arenas. + // This allows to reduce number of fences for synchronization. + static constexpr bool STAGING_UNIFORM_RING_SIZE = true; static constexpr bool STAGING_DEBUG_TIME = false; static constexpr bool STAGING_DEBUG_STATISTICS = false; @@ -60,7 +63,7 @@ namespace regen { // plus some extra space to handle dynamic allocation of segments without resizing. uint32_t alignedSize = 0; // the current number of segments in the ring buffer - uint32_t numRingSegments = 2; + uint32_t numRingSegments = 1; // indicates if the arena has new CPU data to flush bool isDirty = false; // for rare updates, we use a cooldown to avoid updating too often. @@ -79,8 +82,6 @@ namespace regen { // the ranges are relative to the buffer segments, i.e. the same offset applies to all. ref_ptr freeList; - static Arena *create(ArenaType arenaType, ClientAccessMode accessMode); - static void setStagingOffset(ManagedBO &managed, uint32_t offset, uint32_t size); void sort(); @@ -103,6 +104,7 @@ namespace regen { StagingSystem::StagingSystem() : arenas_() { + ringFences_.resize(numRingSegments_); if (STAGING_RANGE_ALIGNMENT == 0) { // make sure to meet all alignment requirements STAGING_RANGE_ALIGNMENT = std::max(16u, @@ -208,7 +210,7 @@ StagingSystem::Arena *StagingSystem::addBufferBlock_writeOnly( return nullptr; } -StagingSystem::Arena *StagingSystem::Arena::create(ArenaType arenaType, ClientAccessMode accessMode) { +StagingSystem::Arena *StagingSystem::createArena(ArenaType arenaType, ClientAccessMode accessMode) const { auto *arena = new Arena(); arena->type = arenaType; arena->flags.accessMode = accessMode; @@ -219,13 +221,6 @@ StagingSystem::Arena *StagingSystem::Arena::create(ArenaType arenaType, ClientAc // the maximum number of segments in the ring buffer uint32_t maxRingSegments = 16; - // TODO: Special attention is needed for synchronization of different per-frame buffers when they - // have different number of buffer segments! Though it does not seem to be a problem so far... - // - the easiest way would be to use same number of segments for all per-frame buffers. - // - in some cases it could be useful to skip frames of buffers with less segments, - // but then we would get into synchronization issues. - // - but often it might not matter, i.e. in case there are no data dependencies. - // probably this should be modeled and taken into account here! switch (arenaType) { case WRITE_PER_FRAME_SMALL_DATA: // PER-FRAME updated SMALL to MEDIUM Staging + LARGE @@ -242,8 +237,13 @@ StagingSystem::Arena *StagingSystem::Arena::create(ArenaType arenaType, ClientAc arena->flags.mapMode = BUFFER_MAP_PERSISTENT_COHERENT; } arena->flags.bufferingMode = RING_BUFFER; - arena->numRingSegments = 3; - maxRingSegments = 16; + if constexpr(STAGING_UNIFORM_RING_SIZE) { + arena->numRingSegments = numRingSegments_; + maxRingSegments = maxRingSegments_; + } else { + arena->numRingSegments = 3; + maxRingSegments = 16; + } break; case WRITE_PER_FRAME_LARGE_DATA: arena->flags.updateHints.frequency = BUFFER_UPDATE_PER_FRAME; @@ -254,8 +254,13 @@ StagingSystem::Arena *StagingSystem::Arena::create(ArenaType arenaType, ClientAc arena->flags.mapMode = BUFFER_MAP_PERSISTENT_COHERENT; } arena->flags.bufferingMode = RING_BUFFER; - arena->numRingSegments = 2; - maxRingSegments = 4; + if constexpr(STAGING_UNIFORM_RING_SIZE) { + arena->numRingSegments = numRingSegments_; + maxRingSegments = maxRingSegments_; + } else { + arena->numRingSegments = 2; + maxRingSegments = 4; + } break; case WRITE_PER_FRAME_HUGE_DATA: // PER-FRAME (or PER-DRAW) updated VERY-LARGE Staging @@ -303,8 +308,13 @@ StagingSystem::Arena *StagingSystem::Arena::create(ArenaType arenaType, ClientAc arena->flags.updateHints.scope = BUFFER_UPDATE_PARTIALLY; arena->flags.mapMode = BUFFER_MAP_PERSISTENT_COHERENT; arena->flags.bufferingMode = RING_BUFFER; - arena->numRingSegments = 2; - maxRingSegments = 16; + if constexpr(STAGING_UNIFORM_RING_SIZE) { + arena->numRingSegments = numRingSegments_; + maxRingSegments = maxRingSegments_; + } else { + arena->numRingSegments = 2; + maxRingSegments = 16; + } break; case READ_RARELY: arena->flags.updateHints.frequency = BUFFER_UPDATE_RARE; @@ -332,7 +342,7 @@ StagingSystem::Arena *StagingSystem::Arena::create(ArenaType arenaType, ClientAc StagingSystem::Arena *StagingSystem::addToArena(const BlockPtr &block, ArenaType arenaType, bool isMoved) { if (!arenas_[arenaType]) { - arenas_[arenaType] = Arena::create(arenaType, block->stagingFlags().accessMode); + arenas_[arenaType] = createArena(arenaType, block->stagingFlags().accessMode); } auto &targetArena = arenas_[arenaType]; // Note: the buffer will reserve the required size in the next loop of updateRequiredSize @@ -466,6 +476,7 @@ void StagingSystem::updateData(float dt_ms) { //copyInProgress_.store(true, std::memory_order_release); if constexpr(STAGING_DEBUG_TIME) { elapsedTime().beginFrame(); + elapsedTime().push("updateData"); } if constexpr(STAGING_DEBUG_STATISTICS) { stats_.numDirtyArenas = 0; @@ -474,9 +485,75 @@ void StagingSystem::updateData(float dt_ms) { stats_.numTotalBOs = 0; } + bool needsRotate = false; + + if constexpr(STAGING_UNIFORM_RING_SIZE) { + auto &writeFence = ringFences_[writeBufferIndex_]; + // Wait for the fence in case of persistent mapped arenas. + // This might block the CPU in case of the last write into this segment + // has not been consumed by the GPU yet. + // Note that the interaction with the fence is quite expensive, + // so it should only be done when really needed. + writeFence.wait(); + + // Check if there is too much stall in the ring buffers. + // If so, we try to increase the number of segments in the ring buffers. + const float stallRate = writeFence.getStallRate(); + if (stallRate > StagingBuffer::MAX_ACCEPTABLE_STALL_RATE) { + // if the stall rate is too high, we need to increase the number of segments in the ring buffer. + // this will be done in resize() function. + const uint32_t newNumSegments = std::min(numRingSegments_ + 1u, maxRingSegments_); + if (newNumSegments != numRingSegments_) { + REGEN_INFO("High stall rate (" << stallRate << ") detected" + << ", increasing to " << newNumSegments); + ringFences_.resize(newNumSegments); + numRingSegments_ = newNumSegments; + needsRotate = true; + } + for (auto &fence : ringFences_) { + fence.resetStallHistory(); + } + } + } + if constexpr(STAGING_DEBUG_TIME) { + elapsedTime().push("fence waited"); + } + + // Update each arena. + // NOTE: There could be glitches when ring sizes are not uniform as arenas could be + // out of sync then. But it did not seem to be a problem so far. for (uint32_t arenaIdx = 0; arenaIdx < ARENA_TYPE_LAST; arenaIdx++) { updateArenaData(dt_ms, static_cast(arenaIdx)); } + if constexpr(STAGING_DEBUG_TIME) { + elapsedTime().push("arenas updated"); + } + + if constexpr(STAGING_UNIFORM_RING_SIZE) { + // Mark the point where we finished writing into the current write buffer segment. + // This fence will be waited upon in the next frame before writing into this segment again. + ringFences_[readBufferIndex_].setFencePoint(); + + // Rotate ring buffers in case the number of segments changed. + // This is done to ensure that all segments have up-to-date data after the change. + if (needsRotate) { + rotateBuffers(); + } + + // Advance to next fence index. + readBufferIndex_ += 1; + if (readBufferIndex_ >= numRingSegments_) { + readBufferIndex_ = 0; + } + writeBufferIndex_ += 1; + if (writeBufferIndex_ >= numRingSegments_) { + writeBufferIndex_ = 0; + } + } + + if constexpr(STAGING_DEBUG_TIME) { + elapsedTime().push("fence set"); + } if constexpr (!ANIMATION_THREAD_SWAPS_CLIENT_BUFFERS) { // finally, swap client buffers @@ -509,9 +586,6 @@ void StagingSystem::updateArenaData(float dt_ms, ArenaType arenaType) { arena->resize(); arena->sort(); } - if constexpr(STAGING_DEBUG_TIME) { - elapsedTime().push(REGEN_STRING(arena->type << " resized")); - } if (!arena->flags.isReadable() && !arena->isDirty) { // early exit writing arenas before fencing in case of no updates. return; @@ -530,20 +604,15 @@ void StagingSystem::updateArenaData(float dt_ms, ArenaType arenaType) { // GPU write buffers as dirty -- but must be careful with syncing then! const bool forceUpdate = arena->flags.isReadable(); - // Wait for the fence in case of persistent mapped arenas. - // This might block the CPU in case of the last write into this segment - // has not been consumed by the GPU yet. - // TODO: The interaction with the fence still consumes a lot of CPU time. - // - The main bottleneck now seems *setFencePoint*. Reason might be that - // we do glDeleteSync/glFenceSync calls every time setFencePoint is called. - // - As far as I know, we cannot re-use fences across frames. - // - Maybe the only way to improve would be to reduce the number of fences. - // - Idea: Let arenas share fences. However, this is difficult because arenas - // currently may have ring buffers of different sizes. - // - Maybe the mechanism can be adjusted such that we do not need a fence every - // frame for every arena. - if (useFence) { - arena->stagingBuffer->fence(copyIdx).wait(); + if constexpr(!STAGING_UNIFORM_RING_SIZE) { + // Wait for the fence in case of persistent mapped arenas. + // This might block the CPU in case of the last write into this segment + // has not been consumed by the GPU yet. + // Note that the interaction with the fence is quite expensive, + // so it should only be done when really needed. + if (useFence) { + arena->stagingBuffer->fence(copyIdx).wait(); + } } // Copy data from CPU to staging to draw buffer, @@ -576,27 +645,20 @@ void StagingSystem::updateArenaData(float dt_ms, ArenaType arenaType) { //REGEN_INFO("Scheduled copy " << copy); } numScheduledCopies_ = 0; // reset scheduled copies - if constexpr(STAGING_DEBUG_TIME) { - elapsedTime().push(REGEN_STRING(arena->type << " copied")); - } - // Create a fence just after glCopyNamedBufferSubData -- marking the point where the - // written data of this frame has been consumed by the GPU. - if (useFence) { - arena->stagingBuffer->fence(drawIdx).setFencePoint(); - } - if constexpr(STAGING_DEBUG_TIME) { - elapsedTime().push(REGEN_STRING(arena->type << " synced")); + if constexpr(!STAGING_UNIFORM_RING_SIZE) { + // Create a fence just after glCopyNamedBufferSubData -- marking the point where the + // written data of this frame has been consumed by the GPU. + if (useFence) { + arena->stagingBuffer->fence(drawIdx).setFencePoint(); + } } // Advance to next segment in case of multi-buffering and ring buffers. arena->stagingBuffer->swapBuffers(); arena->isDirty = false; // reset dirty flag - if constexpr(STAGING_DEBUG_TIME) { - elapsedTime().push(REGEN_STRING(arena->type << " swapped")); - } - if constexpr(STAGING_DEBUG_STALLS) { + if constexpr(STAGING_DEBUG_STALLS && !STAGING_UNIFORM_RING_SIZE) { if (useFence) { REGEN_INFO("Arena " << arena->type << " stall rate: " << arena->stagingBuffer->fence(copyIdx).getStallRate()); @@ -795,21 +857,28 @@ bool StagingSystem::updateArenaSize(Arena *arena) { return true; // size changed } - // size did not change, next check if there is too much stall in the ring buffer. - const uint32_t copyIdx = arena->stagingBuffer->nextWriteIndex(); - const float stallRate = arena->stagingBuffer->fence(copyIdx).getStallRate(); - if (stallRate > StagingBuffer::MAX_ACCEPTABLE_STALL_RATE) { - // if the stall rate is too high, we need to increase the number of segments in the ring buffer. - // this will be done in resize() function. - uint32_t newNumSegments = std::min(arena->numRingSegments + 1u, arena->stagingBuffer->maxRingSegments()); - if (newNumSegments != arena->numRingSegments) { - REGEN_INFO("High stall rate (" << stallRate << ") detected in \"" << arena->type - << "\" arena with " << arena->numRingSegments << " segments" - << ", increasing to " << newNumSegments); - arena->numRingSegments = newNumSegments; - arena->stagingBuffer->resetStallRate(); + if constexpr(STAGING_UNIFORM_RING_SIZE) { + if (arena->numRingSegments>1 && arena->numRingSegments!=numRingSegments_) { + arena->numRingSegments = numRingSegments_; return true; // size changed } + } else { + // size did not change, next check if there is too much stall in the ring buffer. + const uint32_t copyIdx = arena->stagingBuffer->nextWriteIndex(); + const float stallRate = arena->stagingBuffer->fence(copyIdx).getStallRate(); + if (stallRate > StagingBuffer::MAX_ACCEPTABLE_STALL_RATE) { + // if the stall rate is too high, we need to increase the number of segments in the ring buffer. + // this will be done in resize() function. + uint32_t newNumSegments = std::min(arena->numRingSegments + 1u, arena->stagingBuffer->maxRingSegments()); + if (newNumSegments != arena->numRingSegments) { + REGEN_INFO("High stall rate (" << stallRate << ") detected in \"" << arena->type + << "\" arena with " << arena->numRingSegments << " segments" + << ", increasing to " << newNumSegments); + arena->numRingSegments = newNumSegments; + arena->stagingBuffer->resetStallRate(); + return true; // size changed + } + } } return false; diff --git a/regen/memory/staging-system.h b/regen/memory/staging-system.h index c15f38d7dc..7cd161c51d 100644 --- a/regen/memory/staging-system.h +++ b/regen/memory/staging-system.h @@ -182,6 +182,13 @@ namespace regen { std::vector scheduledCopies_; uint32_t numScheduledCopies_ = 0; + // Used only if ring size is uniform among arenas: + uint32_t numRingSegments_ = 2; + uint32_t maxRingSegments_ = 4; + std::vector ringFences_; // size: numRingSegments_ + uint32_t readBufferIndex_ = 0u; // < numRingSegments_ + uint32_t writeBufferIndex_ = 1u; // < numRingSegments_ + Arena *addBufferBlock_readOnly( const BlockPtr &block, const BufferFlags &flags, @@ -202,6 +209,8 @@ namespace regen { void updateArenaData(float dt_ms, ArenaType arenaType); + Arena *createArena(ArenaType arenaType, ClientAccessMode accessMode) const; + struct StagingStatistics { uint32_t numDirtyArenas = 0; uint32_t numDirtyBOs = 0; @@ -210,7 +219,7 @@ namespace regen { uint32_t numSwapCopies = 0; } stats_; - static ElapsedTimeDebugger elapsedTime() { + static ElapsedTimeDebugger& elapsedTime() { static ElapsedTimeDebugger x("Staging System", 300); return x; } From 61985636bb1aacaaf32bf8b96a0b7abb3cd5b3f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Sat, 3 Jan 2026 14:33:30 +0100 Subject: [PATCH 14/23] Use uniform size of ring buffers in staging system --- regen/memory/staging-system.cpp | 2 ++ regen/memory/staging-system.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/regen/memory/staging-system.cpp b/regen/memory/staging-system.cpp index 86fba1e8bc..c4133f5cea 100644 --- a/regen/memory/staging-system.cpp +++ b/regen/memory/staging-system.cpp @@ -30,6 +30,8 @@ namespace regen { static constexpr bool STAGING_EXPLICIT_FLUSH = false; // Use uniform ring buffer sizes for all arenas. // This allows to reduce number of fences for synchronization. + // For some reason, does not seem to make a huge difference in practice though + // but in theory it should help a bit with reducing fence interactions. static constexpr bool STAGING_UNIFORM_RING_SIZE = true; static constexpr bool STAGING_DEBUG_TIME = false; static constexpr bool STAGING_DEBUG_STATISTICS = false; diff --git a/regen/memory/staging-system.h b/regen/memory/staging-system.h index 7cd161c51d..f457cc4567 100644 --- a/regen/memory/staging-system.h +++ b/regen/memory/staging-system.h @@ -186,6 +186,8 @@ namespace regen { uint32_t numRingSegments_ = 2; uint32_t maxRingSegments_ = 4; std::vector ringFences_; // size: numRingSegments_ + // read index is one behind as we set fence point for read slot, + // then the write index has max gap to it to avoid stalls. uint32_t readBufferIndex_ = 0u; // < numRingSegments_ uint32_t writeBufferIndex_ = 1u; // < numRingSegments_ From c3c8bd57c17211bef4eb12ca2eb89f7c1641b0c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Sat, 3 Jan 2026 18:32:19 +0100 Subject: [PATCH 15/23] removed useless code --- regen/objects/volume.glsl | 2 -- 1 file changed, 2 deletions(-) diff --git a/regen/objects/volume.glsl b/regen/objects/volume.glsl index c8b4b1254d..1003a1bd51 100644 --- a/regen/objects/volume.glsl +++ b/regen/objects/volume.glsl @@ -197,7 +197,6 @@ void main() { if(lookAheadValue > value) { dst = volumeTransfer(lookAheadValue); value = lookAheadValue; - i = j; // advance main loop } } break; @@ -320,7 +319,6 @@ void main() { if(lookAheadValue > value) { density = lookAheadValue; value = lookAheadValue; - i = j; // advance main loop } else { break; } From 78ad01f58cec7341d1d90eaa7989f1f69cdb2b3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Sat, 3 Jan 2026 18:32:29 +0100 Subject: [PATCH 16/23] improved error handling --- regen/av/video-recorder.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/regen/av/video-recorder.cpp b/regen/av/video-recorder.cpp index af1f28683b..b34c40d5e7 100644 --- a/regen/av/video-recorder.cpp +++ b/regen/av/video-recorder.cpp @@ -92,10 +92,11 @@ void VideoRecorder::initialize() { if (formatCtx_->oformat->flags & AVFMT_GLOBALHEADER) { codecCtx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } - if (avcodec_open2(codecCtx_, codec, nullptr) < 0) { + int status = avcodec_open2(codecCtx_, codec, nullptr); + if (status < 0) { // print error message - char err_buf[AV_ERROR_MAX_STRING_SIZE]{}; - auto err_type = AVERROR(errno); + char err_buf[AV_ERROR_MAX_STRING_SIZE] = {}; + auto err_type = AVERROR(status); av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, err_type); throw std::runtime_error(REGEN_STRING("Could not open codec: " << err_buf << " (" << err_type << ")")); } From 0392423d94eae069ca89b1a486a16c7c2bc6a461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Sat, 3 Jan 2026 18:32:55 +0100 Subject: [PATCH 17/23] fixed typos --- regen/memory/staging-buffer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/regen/memory/staging-buffer.cpp b/regen/memory/staging-buffer.cpp index e1b1f5b6d4..01d716790d 100644 --- a/regen/memory/staging-buffer.cpp +++ b/regen/memory/staging-buffer.cpp @@ -186,7 +186,7 @@ void StagingBuffer::resetStallRate() { } void StagingBuffer::pushToFlushQueue(const BufferRange2ui *dirtySegments, uint32_t numDirtySegments) { - if constexpr(REGEN_STAGING_USE_DIRECT_FLUSHING) { + if constexpr(!REGEN_STAGING_USE_DIRECT_FLUSHING) { if (flags_.mapMode == BUFFER_MAP_PERSISTENT_FLUSH) { RingSegment &writeSegment = bufferSegments_[writeBufferIndex_]; const uint32_t totalDirtySegments = writeSegment.numDirtySegments + numDirtySegments; @@ -328,7 +328,7 @@ void StagingBuffer::endMappedWrite( } if (flags_.useExplicitStaging()) { - if constexpr(REGEN_STAGING_USE_DIRECT_FLUSHING) { + if constexpr(!REGEN_STAGING_USE_DIRECT_FLUSHING) { // Make sure the last write to current readBuffer is flushed before we copy the data. if (accessFlags_ & MAP_FLUSH_EXPLICIT) { for (uint32_t flushIdx = 0; flushIdx < readSegment.numDirtySegments; ++flushIdx) { From cd15feab161647650e0bc20b33eda0b8245377ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Sat, 3 Jan 2026 18:33:13 +0100 Subject: [PATCH 18/23] minor improvements --- regen/av/demuxer.cpp | 10 +++++++--- regen/shader/io-processor.cpp | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/regen/av/demuxer.cpp b/regen/av/demuxer.cpp index 7dcc2dac1a..51f86b6074 100644 --- a/regen/av/demuxer.cpp +++ b/regen/av/demuxer.cpp @@ -18,16 +18,20 @@ static void avLogCallback(void *, int level, const char *msg, va_list args) { int count = vsprintf(buffer, msg, args); buffer[count - 1] = '\0'; switch (level) { - case AV_LOG_ERROR: REGEN_ERROR(buffer); + case AV_LOG_ERROR: + REGEN_ERROR(buffer); break; - case AV_LOG_INFO: REGEN_INFO(buffer); + case AV_LOG_INFO: + REGEN_INFO(buffer); break; case AV_LOG_DEBUG: //REGEN_DEBUG(buffer); break; - case AV_LOG_WARNING: REGEN_WARN(buffer); + case AV_LOG_WARNING: + REGEN_WARN(buffer); break; default: + REGEN_DEBUG(buffer); break; } } diff --git a/regen/shader/io-processor.cpp b/regen/shader/io-processor.cpp index 97d97b496e..ef993fca21 100644 --- a/regen/shader/io-processor.cpp +++ b/regen/shader/io-processor.cpp @@ -175,7 +175,9 @@ void IOProcessor::defineHandleIO(PreProcessorState &state) { " " << outName << " = " << inName << "[i];")); break; case GL_FRAGMENT_SHADER: + break; default: + REGEN_WARN("Unhandled shader stage '" << state.currStage << "'."); break; } From 39fce3e32941f837f32fbfba31e3ae39e491fd5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Sat, 3 Jan 2026 22:43:50 +0100 Subject: [PATCH 19/23] improved error handling --- regen/av/video-recorder.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/regen/av/video-recorder.cpp b/regen/av/video-recorder.cpp index b34c40d5e7..02052fc60a 100644 --- a/regen/av/video-recorder.cpp +++ b/regen/av/video-recorder.cpp @@ -96,9 +96,8 @@ void VideoRecorder::initialize() { if (status < 0) { // print error message char err_buf[AV_ERROR_MAX_STRING_SIZE] = {}; - auto err_type = AVERROR(status); - av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, err_type); - throw std::runtime_error(REGEN_STRING("Could not open codec: " << err_buf << " (" << err_type << ")")); + av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, status); + throw std::runtime_error(REGEN_STRING("Could not open codec: " << err_buf << " (" << status << ")")); } stream_ = avformat_new_stream(formatCtx_, codec); From 1b442626d2c596a2f64adc93efb555734985fe9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Sat, 3 Jan 2026 22:44:49 +0100 Subject: [PATCH 20/23] added deletePoolMemory function --- regen/memory/memory-allocator.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/regen/memory/memory-allocator.h b/regen/memory/memory-allocator.h index 86be461f49..5428f7200c 100644 --- a/regen/memory/memory-allocator.h +++ b/regen/memory/memory-allocator.h @@ -69,6 +69,13 @@ namespace regen { index_(0) {} ~AllocatorPool() { + deletePoolMemory(); + } + + /** + * Free all allocated memory. + */ + void deletePoolMemory() { poolLock_.lock(); for (Node *n = allocators_; n != nullptr;) { Node *buf = n; From 9612784a6511356400fecc7ba8da6c643fb5f29c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Sat, 3 Jan 2026 22:45:38 +0100 Subject: [PATCH 21/23] avoid repeated flushing of fence --- regen/gl/gpu-fence.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/regen/gl/gpu-fence.cpp b/regen/gl/gpu-fence.cpp index f9a47f51e6..17e0fd8050 100644 --- a/regen/gl/gpu-fence.cpp +++ b/regen/gl/gpu-fence.cpp @@ -62,6 +62,11 @@ bool GPUFence::wait(bool allowFrameDropping) { } // poll the fence status GLenum status = glClientWaitSync(fence_, GL_SYNC_FLUSH_COMMANDS_BIT, 0); + if (status == GL_WAIT_FAILED) { + GL_ERROR_LOG(); + return false; + } + if (allowFrameDropping) { if (status == GL_TIMEOUT_EXPIRED) { setStalledFrame(true); @@ -74,8 +79,7 @@ bool GPUFence::wait(bool allowFrameDropping) { // Note: we use a wait timeout here to avoid wasting too much time in the loop. setStalledFrame(status == GL_TIMEOUT_EXPIRED); while (status == GL_TIMEOUT_EXPIRED) { - status = glClientWaitSync(fence_, - GL_SYNC_FLUSH_COMMANDS_BIT, WAIT_TIMEOUT); + status = glClientWaitSync(fence_, 0, WAIT_TIMEOUT); } } From 54ed55963ceb801c274a3797ea5e010345266762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Sat, 3 Jan 2026 23:02:28 +0100 Subject: [PATCH 22/23] added resetMemoryPools method --- regen/memory/staging-buffer.cpp | 16 +++++++++++++++- regen/memory/staging-buffer.h | 7 +++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/regen/memory/staging-buffer.cpp b/regen/memory/staging-buffer.cpp index 01d716790d..e79639c41e 100644 --- a/regen/memory/staging-buffer.cpp +++ b/regen/memory/staging-buffer.cpp @@ -52,8 +52,13 @@ BufferSizeClass StagingBuffer::getBufferSizeClass(uint32_t size) { } } -BufferPool* StagingBuffer::getStagingAllocator(BufferStorageMode storageMode) { +BufferPool **StagingBuffer::stagingPools() { static std::array bufferPools; + return bufferPools.data(); +} + +BufferPool* StagingBuffer::getStagingAllocator(BufferStorageMode storageMode) { + auto *bufferPools = StagingBuffer::stagingPools(); BufferPool *stagingAllocator = bufferPools[(int)storageMode]; if (stagingAllocator == nullptr) { stagingAllocator = new BufferPool(); @@ -65,6 +70,15 @@ BufferPool* StagingBuffer::getStagingAllocator(BufferStorageMode storageMode) { return stagingAllocator; } +void StagingBuffer::resetMemoryPools() { + // delete all memory pools, they are re-created on demand + auto *bufferPools = StagingBuffer::stagingPools(); + for (int i = 0; i < (int) BUFFER_STORAGE_MODE_LAST; ++i) { + delete bufferPools[i]; + bufferPools[i] = nullptr; + } +} + bool StagingBuffer::resizeBuffer(uint32_t segmentSize, uint32_t numRingSegments) { uint32_t numSegments = (flags_.bufferingMode == RING_BUFFER ? numRingSegments : diff --git a/regen/memory/staging-buffer.h b/regen/memory/staging-buffer.h index 8da308312b..dc6b746a8c 100644 --- a/regen/memory/staging-buffer.h +++ b/regen/memory/staging-buffer.h @@ -70,6 +70,11 @@ namespace regen { // delete copy constructor StagingBuffer(const StagingBuffer &) = delete; + /** + * Delete allocated memory pools for staging buffers. + */ + static void resetMemoryPools(); + /** * @return the buffer flags used for staging. */ @@ -317,6 +322,8 @@ namespace regen { uint32_t readBufferIndex_ = 0u; uint32_t writeBufferIndex_ = 0u; + static BufferPool **stagingPools(); + static BufferPool *getStagingAllocator(BufferStorageMode storageMode); static byte *getMappedSegment( From 2b955f2eedcb7181484b1c648de9fc196d723abc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Be=C3=9Fler?= Date: Sat, 3 Jan 2026 23:03:29 +0100 Subject: [PATCH 23/23] release allocated GPU memory and unmap buffers on clear --- regen/memory/staging-system.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/regen/memory/staging-system.cpp b/regen/memory/staging-system.cpp index c4133f5cea..5386f0c42d 100644 --- a/regen/memory/staging-system.cpp +++ b/regen/memory/staging-system.cpp @@ -143,6 +143,11 @@ void StagingSystem::clear() { arena = nullptr; } } + // Free memory allocated for staging buffers + StagingBuffer::resetMemoryPools(); + // Clear any fences + ringFences_.clear(); + ringFences_.resize(numRingSegments_, GPUFence()); copyInProgress_.store(false, std::memory_order_relaxed); }