From 53e602e92d790e56e66de4a18b4ecdb3bfea28f9 Mon Sep 17 00:00:00 2001 From: johnwait Date: Fri, 16 Aug 2024 16:19:07 -0400 Subject: [PATCH] BUGFIX (definitive) for regex search matching deleted text --- scintilla/Scintilla.vcxproj | 2 +- scintilla/src/CellBuffer.cxx | 6 ---- scintilla/src/CellBuffer.h | 3 -- scintilla/src/Document.h | 4 --- scintilla/src/SplitVector.h | 35 ---------------------- scionigmo/OnigmoRegExEngine.cxx | 51 +++++++++++++++++++++++++++++---- 6 files changed, 47 insertions(+), 54 deletions(-) diff --git a/scintilla/Scintilla.vcxproj b/scintilla/Scintilla.vcxproj index 6c746573..b8c1b1fe 100644 --- a/scintilla/Scintilla.vcxproj +++ b/scintilla/Scintilla.vcxproj @@ -102,7 +102,7 @@ true Disabled NotUsing - X_SCINTILLA_RANGEGAP_FIX;JRB_BUILD;SCI_NAMESPACE;NO_CXX11_REGEX;SCI_OWNREGEX;ONIG_EXTERN=extern;_WIN64;_DEBUG;_WINDOWS;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;STATIC_BUILD;SCI_LEXER;USE_D2D;%(PreprocessorDefinitions) + JRB_BUILD;SCI_NAMESPACE;NO_CXX11_REGEX;SCI_OWNREGEX;ONIG_EXTERN=extern;_WIN64;_DEBUG;_WINDOWS;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;STATIC_BUILD;SCI_LEXER;USE_D2D;%(PreprocessorDefinitions) MultiThreadedDebug Level3 diff --git a/scintilla/src/CellBuffer.cxx b/scintilla/src/CellBuffer.cxx index 9fdf92bf..66caa71d 100644 --- a/scintilla/src/CellBuffer.cxx +++ b/scintilla/src/CellBuffer.cxx @@ -598,12 +598,6 @@ const char *CellBuffer::RangePointer(Sci::Position position, Sci::Position range return substance.RangePointer(position, rangeLength); } -#ifdef X_SCINTILLA_RANGEGAP_FIX -const char *CellBuffer::DataPointer(Sci::Position position, Sci::Position rangeLength) { - return substance.DataPointer(position, rangeLength); -} - -#endif Sci::Position CellBuffer::GapPosition() const noexcept { return substance.GapPosition(); } diff --git a/scintilla/src/CellBuffer.h b/scintilla/src/CellBuffer.h index 85fad50d..f1489809 100644 --- a/scintilla/src/CellBuffer.h +++ b/scintilla/src/CellBuffer.h @@ -150,9 +150,6 @@ class CellBuffer { void GetStyleRange(unsigned char *buffer, Sci::Position position, Sci::Position lengthRetrieve) const; const char *BufferPointer(); const char *RangePointer(Sci::Position position, Sci::Position rangeLength); -#ifdef X_SCINTILLA_RANGEGAP_FIX - const char *DataPointer(Sci::Position position, Sci::Position rangeLength); -#endif Sci::Position GapPosition() const noexcept; Sci::Position Length() const noexcept; diff --git a/scintilla/src/Document.h b/scintilla/src/Document.h index a92f374d..61a841be 100644 --- a/scintilla/src/Document.h +++ b/scintilla/src/Document.h @@ -377,11 +377,7 @@ class Document : PerLine, public IDocumentWithLineEnd, public ILoader { const char * SCI_METHOD BufferPointer() override { return cb.BufferPointer(); } std::string SCI_METHOD GetRegexLastError() { return regex->GetErrorInfo(); } -#ifdef X_SCINTILLA_RANGEGAP_FIX - const char *RangePointer(Sci::Position position, Sci::Position rangeLength) { return cb.DataPointer(position, rangeLength); } -#else // !X_SCINTILLA_RANGEGAP_FIX const char *RangePointer(Sci::Position position, Sci::Position rangeLength) { return cb.RangePointer(position, rangeLength); } -#endif Sci::Position GapPosition() const { return cb.GapPosition(); } int SCI_METHOD GetLineIndentation(Sci_Position line) override; diff --git a/scintilla/src/SplitVector.h b/scintilla/src/SplitVector.h index 7cde90fc..39895a70 100644 --- a/scintilla/src/SplitVector.h +++ b/scintilla/src/SplitVector.h @@ -321,41 +321,6 @@ class SplitVector { } } -#ifdef X_SCINTILLA_RANGEGAP_FIX - /// Return a pointer to a range of elements *excluding* the gap, while still - /// rearranging the buffer to make that range contiguous. - /// If the pointer is to end up at or past the end of the data (i.e. within - /// the gap once it's been pushed after all elements), the position returned - /// will be the end of the data block, never past it. - T *DataPointer(ptrdiff_t position, ptrdiff_t rangeLength) noexcept { - if (position >= part1Length) { - // Furthest position is at the end of the contiguous - // data block - return body.data() + part1Length; - } else { // position < part1Length - // Check if we have a contiguous data block of - // covering the whole range - // (i.e. from [position .. position + rangeLength] ) - if ((position + rangeLength) > part1Length) { - // Range overlaps gap, so move gap to *end* of range. - // (i.e. moving the gap past position + rangeLength) - GapTo(position + rangeLength); - // Now, part1Length might have changed; - // check if we still go over - if (position > part1Length) { - // Return furthest position possible - return body.data() + part1Length; - } else { - // We're good now - return body.data() + position; - } - } else { - return body.data() + position; - } - } - } - -#endif /// Return the position of the gap within the buffer. ptrdiff_t GapPosition() const noexcept { return part1Length; diff --git a/scionigmo/OnigmoRegExEngine.cxx b/scionigmo/OnigmoRegExEngine.cxx index 9b4ddebe..dee860f7 100644 --- a/scionigmo/OnigmoRegExEngine.cxx +++ b/scionigmo/OnigmoRegExEngine.cxx @@ -359,20 +359,61 @@ Sci::Position OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, S // --- search document range for pattern match --- // !!! Performance issue: Scintilla: moving Gap needs memcopy - high costs for find/replace in large document + // 2019-08-12: Modified Scintilla's code to use something other than // RangePointer(), which is optimized for insertion of text // and thus has a habit of giving out pointers passed // deleted text. // // TODO: Report bug at least to the Notepad3 project + // [2024-08-16: Notepad3 fixed it (unintentionally?) thru their commit 66c1209] + // // Steps to reproduce bug: - // 1) Paste some predefined text in a Scntilla's edit window. - // 2) Devise a regular expression that matches the whole text, using + // 1) Devise a regular expression to match an end of line, using // at least the $ anchor and making sure the regex doesn't // end with any of the quantifiers (?, *, + or {m,n}) but instead - // a fixed length section, e.g. [..](?:-(?:20|19)[0-9][0-9])) - // 3) Match the regex pattern - auto const docBegPtr = UCharCPtr(doc->RangePointer(0, docLen)); + // a fixed length section. + // e.g. (?:-(?:20|19)[0-9][0-9])$ + // 2) Build the subject string that is expected to match pattern + // devised in step 1), and then paste it into Scintilla's edit + // window _after_ having it typed & copied from *another* text + // editor. If caret ends on the same line of text, type a line + // return manually. + // IMPORTANT: Text in step 2) MUST have been pasted, and in one go; + // typing the text directly within Scintilla's edit window + // instead (char-by-char) will FAIL to reproduce the bug. + // If needed, go back to step 2), copy the whole text, + // create a new document discarding the current one, + // and paste back the copied text. + // e.g.: 16-08-2024 + // 3) Match the regex pattern. + // 4) Delete part of the pasted text from its end, e.g. by a few + // characters *including* the final line return, such that the + // pattern should no longer match. + // 5) Try matching the regex pattern again. + // If it STILL matches: the regex engine is given access to a version of + // the buffer still containing deleted text/chars, + // => bug successfully reproduced + // If it DOES NOT match: the regex engine is given access to an up-to-date + // version of the document buffer. + // => bug absent/fixed + // If bug was reproduced: + // 6) Delete the first char on the text's line and try matching the + // regex pattern again. It should no longer match, indicating that + // the document's buffer has collapsed into a single memory block. + // + // 2024-08-16: Revisited bug, so as to support updating Scintilla's + // code base by undoing modifications by the original fix. + // **BTW: The original bugfix was left out of all Release builds + // due to its dedicated preprocessor define missing (!!!) + // in the project's configuration (sorry!) + // + // BUGFIX: (definitive): Using Document::BufferPointer() collapses + // the buffer into a single block while returning the same + // pointer as doc->RangePointer(0, docLen) does. + // + ///auto const docBegPtr = UCharCPtr(doc->RangePointer(0, docLen)); + auto const docBegPtr = UCharCPtr(doc->BufferPointer()); auto const docEndPtr = UCharCPtr(doc->RangePointer(docLen, 0)); auto const rangeBegPtr = UCharCPtr(doc->RangePointer(rangeBeg, rangeLen)); auto const rangeEndPtr = UCharCPtr(doc->RangePointer(rangeEnd, 0));