From 9ae52f3d7a1b98c66bef8f8f56715d902ccf7523 Mon Sep 17 00:00:00 2001 From: reuk Date: Wed, 19 Jun 2024 15:29:56 +0100 Subject: [PATCH] Direct2D: Update backbuffer implementation Problem description =================== Firstly, the linked-list of pending presentations acted as a stack (FILO). If the swap chain thread and main thread processed frames at varying rates, then the following sequence of events was possible: Main thread Swap chain thread Queue state --------------------------------------------------------- Push frame (1) [1] Push frame (2) [2, 1] Pop frame (2) [1] Push frame (3) [3, 1] Pop frame (3) [1] Pop frame (1) [] <-- Out of sequence! Secondly, the swap chain's sequential flip model can only maintain a valid back-buffer state as long as the list of dirty rects is correct, and every pixel within the dirty rects is painted incrementally. In the example above, if the main thread were to produce two frames before the swap chain thread could present any frame, then presenting *only* the frame 2 (skipping frame 1) may produce incorrect results when combined with the existing back buffer. This is because regions updated in frame 1 may not be updated in frame 2, so regions *only* updated in frame 1 will be omitted from the back buffer. Mitigation ========== This patch removes the old stack of presentations and replaces it with a slightly more complex mechanism that tracks two different Presentation objects. At any time, up to one Presentation may be in use by the swap chain thread (i.e. actively presenting), up to one Presentation may be accumulating updated/dirty regions (i.e. painting), and up to one region may be ready, awaiting display. This scheme resolves the first issue described above by ensuring that old frame data is not kept around. There is never more than one frame awaiting display, which means that if the swap chain thread attempts to display twice in a row (before the main thread produces a new frame), the second attempt will be a no-op. The second issue is resolved by accumulating changes into a single Presentation whenever the main thread produces two or more frames in a row. If there is already a 'ready' Presentation when the main thread finishes painting, then all updated regions from the newest Presentation will be added to the 'ready' Presentation, rather than replacing it. When the swap chain thread is ready to present, it will therefore see the result of all the accumulated Presentations produced by the main thread, instead of just the newest Presentation. --- .../juce_Direct2DHwndContext_windows.cpp | 249 ++++++++++-------- 1 file changed, 146 insertions(+), 103 deletions(-) diff --git a/modules/juce_graphics/native/juce_Direct2DHwndContext_windows.cpp b/modules/juce_graphics/native/juce_Direct2DHwndContext_windows.cpp index f195842caf..ca592165d0 100644 --- a/modules/juce_graphics/native/juce_Direct2DHwndContext_windows.cpp +++ b/modules/juce_graphics/native/juce_Direct2DHwndContext_windows.cpp @@ -35,15 +35,9 @@ namespace juce { -//============================================================================== -class alignas (MEMORY_ALLOCATION_ALIGNMENT) Presentation +class Presentation { public: - SLIST_ENTRY& getListEntry() - { - return listEntry; - } - auto getPresentationBitmap() const { jassert (presentationBitmap != nullptr); @@ -82,58 +76,142 @@ public: return paintAreas; } - void setResult (HRESULT x) - { - hr = x; - } - - auto getResult() const - { - return hr; - } - private: - SLIST_ENTRY listEntry; ComSmartPtr presentationBitmap; RectangleList paintAreas; - HRESULT hr = S_OK; }; -class SList +class PresentationQueue { public: - void push (SLIST_ENTRY& item) + Presentation* lockFront() { - jassert ((reinterpret_cast (&item) % MEMORY_ALLOCATION_ALIGNMENT) == 0); - InterlockedPushEntrySList (head.get(), &item); + const std::scoped_lock lock { mutex }; + displaying = std::exchange (readyToDisplay, nullptr); + return displaying; } - auto* pop() + void unlockFront() { - return InterlockedPopEntrySList (head.get()); + const std::scoped_lock lock { mutex }; + displaying = nullptr; + } + + Presentation* lockBack() + { + const std::scoped_lock lock { mutex }; + + preparing = [&]() -> Presentation* + { + for (auto& p : presentations) + if (&p != displaying && &p != readyToDisplay) + return &p; + + return nullptr; + }(); + + return preparing; + } + + void unlockBack() + { + { + const std::scoped_lock lock { mutex }; + + if (readyToDisplay != nullptr) + { + // Copy the dirty regions from the newest presentation over the top of the 'ready' + // presentation, then combine dirty regions. + // We're effectively combining several frames of dirty regions into one, until + // the screen update catches up. + + for (const auto& area : preparing->getPaintAreas()) + { + D2D1_POINT_2U destPoint { (uint32) area.getX(), (uint32) area.getY() }; + D2D1_RECT_U sourceRect { (uint32) area.getX(), + (uint32) area.getY(), + (uint32) area.getRight(), + (uint32) area.getBottom() }; + readyToDisplay->getPresentationBitmap()->CopyFromBitmap (&destPoint, preparing->getPresentationBitmap(), &sourceRect); + } + + auto areas = readyToDisplay->getPaintAreas(); + areas.add (preparing->getPaintAreas()); + readyToDisplay->setPaintAreas (std::move (areas)); + } + else + { + readyToDisplay = std::exchange (preparing, nullptr); + } + } + + SetEvent (wakeEvent.getHandle()); + } + + HANDLE getWakeEvent() const + { + return wakeEvent.getHandle(); } private: - struct Destructor - { - void operator() (void* ptr) const - { - _aligned_free (ptr); - } - }; + WindowsScopedEvent wakeEvent; - std::unique_ptr head { []() -> SLIST_HEADER* - { - auto* result = static_cast (_aligned_malloc (sizeof (SLIST_HEADER), MEMORY_ALLOCATION_ALIGNMENT)); - - if (result == nullptr) - return nullptr; - - InitializeSListHead (result); - return result; - }() }; + std::mutex mutex; + std::array presentations; + Presentation* preparing = nullptr; + Presentation* readyToDisplay = nullptr; + Presentation* displaying = nullptr; }; +template +class PresentationQueueLock +{ +public: + PresentationQueueLock() = default; + + explicit PresentationQueueLock (PresentationQueue& q) + : queue (&q), + presentation (queue != nullptr ? (queue->*lock)() : nullptr) + { + } + + ~PresentationQueueLock() + { + if (queue != nullptr) + (queue->*unlock)(); + } + + PresentationQueueLock (PresentationQueueLock&& other) noexcept + : queue (std::exchange (other.queue, nullptr)), + presentation (std::exchange (other.presentation, nullptr)) + { + } + + PresentationQueueLock& operator= (PresentationQueueLock&& other) noexcept + { + PresentationQueueLock { std::move (other) }.swap (*this); + return *this; + } + + PresentationQueueLock (const PresentationQueueLock&) = delete; + PresentationQueueLock& operator= (const PresentationQueueLock&) = delete; + + Presentation* getPresentation() const { return presentation; } + +private: + void swap (PresentationQueueLock& other) noexcept + { + std::swap (other.queue, queue); + std::swap (other.presentation, presentation); + } + + PresentationQueue* queue = nullptr; + Presentation* presentation = nullptr; +}; + +using BackBufferLock = PresentationQueueLock<&PresentationQueue::lockBack, &PresentationQueue::unlockBack>; +using FrontBufferLock = PresentationQueueLock<&PresentationQueue::lockFront, &PresentationQueue::unlockFront>; + struct Direct2DHwndContext::HwndPimpl : public Direct2DGraphicsContext::Pimpl { private: @@ -145,8 +223,6 @@ private: multithread (multithreadIn), swapChainEventHandle (ownerIn.swap.swapChainEvent->getHandle()) { - for (auto& p : presentations) - retired.push (p.getListEntry()); } ~SwapChainThread() @@ -155,38 +231,22 @@ private: thread.join(); } - Presentation* getFreshPresentation() + BackBufferLock getFreshPresentation() { - if (auto* listEntry = reinterpret_cast (retired.pop())) - return listEntry; - - return nullptr; - } - - void pushPaintedPresentation (Presentation* presentationIn) - { - painted.push (presentationIn->getListEntry()); - SetEvent (wakeEvent.getHandle()); - } - - void retirePresentation (Presentation* presentationIn) - { - retired.push (presentationIn->getListEntry()); + return BackBufferLock (queue); } void notify() { - SetEvent (wakeEvent.getHandle()); + SetEvent (queue.getWakeEvent()); } private: - SList painted, retired; Direct2DHwndContext::HwndPimpl& owner; + PresentationQueue queue; ComSmartPtr multithread; HANDLE swapChainEventHandle = nullptr; - std::vector presentations = std::vector (2); - WindowsScopedEvent wakeEvent; WindowsScopedEvent quitEvent; std::thread thread { [&] { threadLoop(); } }; @@ -201,25 +261,25 @@ private: if (! swapChainReady) return; - auto* listEntry = reinterpret_cast (painted.pop()); + FrontBufferLock frontBufferLock { queue }; + auto* frontBuffer = frontBufferLock.getPresentation(); - if (listEntry == nullptr) + if (frontBuffer == nullptr) return; JUCE_D2DMETRICS_SCOPED_ELAPSED_TIME (owner.owner.metrics, swapChainThreadTime); { ScopedMultithread scopedMultithread { multithread }; - owner.present (listEntry, 0); + owner.present (frontBuffer, 0); } - retired.push (listEntry->getListEntry()); swapChainReady = false; }; for (;;) { - const HANDLE handles[] { swapChainEventHandle, quitEvent.getHandle(), wakeEvent.getHandle() }; + const HANDLE handles[] { swapChainEventHandle, quitEvent.getHandle(), queue.getWakeEvent() }; const auto waitResult = WaitForMultipleObjects ((DWORD) std::size (handles), handles, FALSE, INFINITE); @@ -252,7 +312,7 @@ private: SwapChain swap; std::unique_ptr swapChainThread; - Presentation* presentation = nullptr; + BackBufferLock presentation; CompositionTree compositionTree; UpdateRegion updateRegion; RectangleList deferredRepaints; @@ -335,14 +395,9 @@ private: if (auto now = Time::getHighResolutionTicks(); Time::highResolutionTicksToSeconds (now - lastFinishFrameTicks) < 0.001) return false; - if (! presentation) - { + if (presentation.getPresentation() == nullptr) presentation = swapChainThread->getFreshPresentation(); - if (presentation && FAILED (presentation->getResult())) - teardown(); - } - // Paint if: // resources are allocated // deferredRepaints has areas to be painted @@ -351,7 +406,7 @@ private: ready &= swap.canPaint(); ready &= compositionTree.canPaint(); ready &= deferredRepaints.getNumRectangles() > 0 || resizing; - ready &= presentation != nullptr; + ready &= presentation.getPresentation() != nullptr; return ready; } @@ -395,8 +450,8 @@ public: ComSmartPtr getDeviceContextTarget() const override { - if (presentation != nullptr) - return presentation->getPresentationBitmap (swap.getSize(), deviceResources.deviceContext.context); + if (auto* p = presentation.getPresentation()) + return p->getPresentationBitmap (swap.getSize(), deviceResources.deviceContext.context); return {}; } @@ -494,19 +549,19 @@ public: setSize (getClientRect()); } - auto savedState = Pimpl::startFrame (dpiScale); + auto* savedState = Pimpl::startFrame (dpiScale); + + if (savedState == nullptr) + return nullptr; // If a new frame is starting, clear deferredAreas in case repaint is called // while the frame is being painted to ensure the new areas are painted on the // next frame - if (savedState) - { - JUCE_TRACE_LOG_D2D_PAINT_CALL (etw::direct2dHwndPaintStart, owner.getFrameId()); + JUCE_TRACE_LOG_D2D_PAINT_CALL (etw::direct2dHwndPaintStart, owner.getFrameId()); - presentation->setPaintAreas (paintAreas); + presentation.getPresentation()->setPaintAreas (paintAreas); - deferredRepaints.clear(); - } + deferredRepaints.clear(); return savedState; } @@ -515,24 +570,11 @@ public: { const ScopeGuard scope { [this] { - presentation = nullptr; + presentation = {}; lastFinishFrameTicks = Time::getHighResolutionTicks(); } }; - if (auto hr = Pimpl::finishFrame(); FAILED (hr)) - return hr; - - if (resizing) - { - present (presentation, 0); - swapChainThread->retirePresentation (presentation); - } - else - { - swapChainThread->pushPaintedPresentation (presentation); - } - - return S_OK; + return Pimpl::finishFrame(); } void present (Presentation* paintedPresentation, uint32 flags) @@ -606,8 +648,7 @@ public: // Present the freshly painted buffer const auto hr = swap.chain->Present1 (swap.presentSyncInterval, swap.presentFlags | flags, &presentParameters); - jassert (SUCCEEDED (hr)); - paintedPresentation->setResult (hr); + jassertquiet (SUCCEEDED (hr)); // The buffer is now completely filled and ready for dirty rectangles for the next frame swap.state = SwapChain::State::bufferFilled; @@ -633,6 +674,8 @@ public: if (const auto hr = deviceResources.deviceContext.context->CreateBitmap (size, nullptr, 0, bitmapProperties, snapshot.resetAndGetPointerAddress()); FAILED (hr)) return {}; + const ScopedMultithread scope { directX->getD2DMultithread() }; + swap.chain->Present (0, DXGI_PRESENT_DO_NOT_WAIT); // Copy the swap chain buffer to the bitmap snapshot