From 9076639742bbc7ebd5863d1818570ae647ca1e4e Mon Sep 17 00:00:00 2001 From: reuk Date: Tue, 9 Jul 2024 20:21:19 +0100 Subject: [PATCH] UnicodeBidi: Replace implementation with SheenBidi --- .../fonts/juce_SimpleShapedText.cpp | 177 +-- .../juce_graphics/unicode/juce_Unicode.cpp | 132 +- .../unicode/juce_UnicodeBidi.cpp | 1214 +++-------------- .../unicode/juce_UnicodeUtils.cpp | 16 - 4 files changed, 292 insertions(+), 1247 deletions(-) diff --git a/modules/juce_graphics/fonts/juce_SimpleShapedText.cpp b/modules/juce_graphics/fonts/juce_SimpleShapedText.cpp index eda8e4d624..e0bb709edf 100644 --- a/modules/juce_graphics/fonts/juce_SimpleShapedText.cpp +++ b/modules/juce_graphics/fonts/juce_SimpleShapedText.cpp @@ -289,12 +289,6 @@ constexpr hb_script_t getScriptTag (TextScript type) return HB_SCRIPT_COMMON; } -struct ResolvedFontRun -{ - Font font; - Span run; -}; - SimpleShapedText::SimpleShapedText (const String* data, const ShapedTextOptions& options) : string (*data) @@ -393,7 +387,7 @@ static std::vector lowLevelShape (const String& string, const Font& font, TextScript script, const String& language, - TextDirection direction) + uint8_t embeddingLevel) { HbBuffer buffer { hb_buffer_create() }; hb_buffer_clear_contents (buffer.get()); @@ -403,7 +397,7 @@ static std::vector lowLevelShape (const String& string, hb_buffer_set_language (buffer.get(), hb_language_from_string (language.toRawUTF8(), -1)); hb_buffer_set_direction (buffer.get(), - direction == TextDirection::ltr ? HB_DIRECTION_LTR : HB_DIRECTION_RTL); + (embeddingLevel % 2) == 0 ? HB_DIRECTION_LTR : HB_DIRECTION_RTL); Utf8Lookup utf8Lookup { string }; @@ -495,7 +489,7 @@ static std::vector lowLevelShape (const String& string, // control characters that JUCE doesn't know how to handle appropriately. jassert (unknownGlyph == infos.end()); - [[maybe_unused]] const auto trackingAmount = (! HB_DIRECTION_IS_VERTICAL (direction) && ! trackingIsDefault) + [[maybe_unused]] const auto trackingAmount = ! trackingIsDefault ? font.getHeight() * tracking : 0; @@ -505,7 +499,7 @@ static std::vector lowLevelShape (const String& string, for (size_t i = 0; i < infos.size(); ++i) { - const auto j = direction == TextDirection::ltr ? i : infos.size() - 1 - i; + const auto j = (embeddingLevel % 2) == 0 ? i : infos.size() - 1 - i; const auto glyphId = infos[j].codepoint; const auto xAdvance = positions[j].x_advance; @@ -644,7 +638,7 @@ struct ShapingParams { TextScript script; String language; - TextDirection direction; + uint8_t embeddingLevel; Font resolvedFont; }; @@ -784,7 +778,7 @@ private: shapingParams.resolvedFont, shapingParams.script, shapingParams.language, - shapingParams.direction); + shapingParams.embeddingLevel); recalculateAdvances(); } @@ -820,50 +814,6 @@ struct LineChunkInLogicalOrder int bidiLevel{}; }; -static auto getStartingVisualIndices (const std::vector& chunks, - const Array& analysis) -{ - std::vector indices (chunks.size()); - - std::transform (chunks.cbegin(), - chunks.cend(), - indices.begin(), - [&analysis] (auto& c) { return analysis[(int) c.textRange.getStart()].visualIndex; }); - - return indices; -} - -static auto getChunkIndicesInVisualOrder (const std::vector& chunks, - const Array& analysis) -{ - const auto startingVisualIndices = getStartingVisualIndices (chunks, analysis); - - struct ChunkIndexWithVisualIndex - { - size_t chunkIndex{}; - size_t visualIndex{}; - }; - - std::vector sortableIndices; - sortableIndices.reserve (std::size (startingVisualIndices)); - - for (const auto [i, visualIndex] : enumerate (startingVisualIndices)) - sortableIndices.push_back (ChunkIndexWithVisualIndex { (size_t) i, visualIndex }); - - std::sort (sortableIndices.begin(), - sortableIndices.end(), - [] (const auto& a, const auto& b) { return a.visualIndex < b.visualIndex; }); - - std::vector result (std::size (sortableIndices)); - - std::transform (sortableIndices.begin(), - sortableIndices.end(), - result.begin(), - [] (auto x) { return x.chunkIndex; }); - - return result; -} - // Used to avoid signedness warning for types for which std::size() is int template static auto makeSpan (T& array) @@ -960,7 +910,11 @@ void SimpleShapedText::shape (const String& data, std::vector lineChunks; int64 numGlyphsInLine = 0; - const auto analysis = Unicode::performAnalysis (data, options.getReadingDirection()); + const auto analysis = Unicode::performAnalysis (data); + + std::vector data32 ((size_t) data.length()); + data.copyToUTF32 (data32.data(), data32.size() * sizeof (juce_wchar)); + const BidiAlgorithm bidiAlgorithm (data32); IntegralCanBreakBeforeIterator softBreakIterator { makeSpan (analysis) }; @@ -969,9 +923,44 @@ void SimpleShapedText::shape (const String& data, auto remainingWidth = options.getMaxWidth().has_value() ? (*options.getMaxWidth() - options.getFirstLineIndent()) : std::optional{}; - const auto commitLine = [&] + std::vector visualOrder; + + const auto commitLine = [&] (const BidiParagraph& bidiParagraph) { - const auto indicesInVisualOrder = getChunkIndicesInVisualOrder (lineChunks, analysis); + if (lineChunks.empty()) + return; + + const auto begin = (size_t) lineChunks.front().textRange.getStart(); + const auto end = (size_t) lineChunks.back().textRange.getEnd(); + const auto bidiLine = bidiParagraph.createLine (begin, end - begin); + + bidiLine.computeVisualOrder (visualOrder); + + const auto indicesInVisualOrder = [&] + { + std::vector result; + result.reserve (lineChunks.size()); + + for (auto it = visualOrder.begin(); it != visualOrder.end();) + { + const auto logicalIndex = *it; + const auto chunk = std::lower_bound (lineChunks.begin(), + lineChunks.end(), + logicalIndex, + [] (const LineChunkInLogicalOrder& c, size_t x) + { + return (size_t) c.textRange.getEnd() <= x; + }); + + jassert (chunk != lineChunks.end()); + + result.push_back ((size_t) std::distance (lineChunks.begin(), chunk)); + it += std::min ((ptrdiff_t) std::distance (it, visualOrder.end()), + (ptrdiff_t) chunk->textRange.getLength()); + } + + return result; + }(); for (auto chunkIndex : indicesInVisualOrder) { @@ -1020,11 +1009,11 @@ void SimpleShapedText::shape (const String& data, remainingWidth = options.getMaxWidth(); }; - const auto append = [&] (Range range, const ShapingParams& shapingParams) + const auto append = [&] (const BidiParagraph& bidiParagraph, Range range, const ShapingParams& shapingParams) { jassert (! range.isEmpty()); - auto glyphsToConsume = ConsumableGlyphs { data, range, shapingParams }; + ConsumableGlyphs glyphsToConsume { data, range, shapingParams }; while (! glyphsToConsume.isEmpty()) { @@ -1035,7 +1024,7 @@ void SimpleShapedText::shape (const String& data, { int64 breakBefore{}; - // We need to use maybeIgnoringWhitespace in comparisions, but + // We need to use maybeIgnoringWhitespace in comparisons, but // includingTrailingWhitespace when using subtraction to calculate the remaining // space. LineAdvance advance{}; @@ -1049,7 +1038,7 @@ void SimpleShapedText::shape (const String& data, static constexpr auto floatMax = std::numeric_limits::max(); for (auto breakBefore = softBreakIterator.next(); - breakBefore.has_value() && (lineNumbers.size() == 0 + breakBefore.has_value() && (lineNumbers.isEmpty() || (int64) lineNumbers.size() < options.getMaxNumLines() - 1); breakBefore = softBreakIterator.next()) { @@ -1069,7 +1058,7 @@ void SimpleShapedText::shape (const String& data, shapingParams.resolvedFont, shapingParams.script, shapingParams.language, - shapingParams.direction); + shapingParams.embeddingLevel); const auto beyondEnd = [&] { @@ -1089,7 +1078,7 @@ void SimpleShapedText::shape (const String& data, const auto advance = std::accumulate (glyphs.cbegin(), beyondEnd, - float {}, + float{}, [] (auto acc, const auto& elem) { return acc + elem.advance.getX(); }); if (advance < remainingWidth.value_or (floatMax) || ! bestMatch.has_value()) @@ -1104,7 +1093,7 @@ void SimpleShapedText::shape (const String& data, *glyphsToConsume.getAdvanceXUpToBreakPointIfSafe (glyphsToConsume.getCodepointRange().getEnd(), options.getTrailingWhitespacesShouldFit()), false, - std::vector {} }; + std::vector{} }; } jassert (bestMatch.has_value()); @@ -1121,12 +1110,10 @@ void SimpleShapedText::shape (const String& data, const auto textRange = glyphsToConsume.getCodepointRange().withEnd (bestMatch->breakBefore); - const auto createFakeBidiNestingLevel = [] (TextDirection dir) { return dir == TextDirection::ltr ? 0 : 1; }; - lineChunks.push_back ({ textRange, { glyphs.begin(), glyphs.end() }, shapingParams.resolvedFont, - createFakeBidiNestingLevel (shapingParams.direction) }); + shapingParams.embeddingLevel }); numGlyphsInLine += (int64) glyphs.size(); @@ -1142,14 +1129,14 @@ void SimpleShapedText::shape (const String& data, if (numGlyphsInLine == 0 && exactlyEqual (remainingWidth, options.getMaxWidth())) consumeBestMatch(); - commitLine(); + commitLine (bidiParagraph); } else { consumeBestMatch(); if (! glyphsToConsume.isEmpty()) - commitLine(); + commitLine (bidiParagraph); } } }; @@ -1172,29 +1159,51 @@ void SimpleShapedText::shape (const String& data, return resolved; }(); - for (Unicode::LineBreakIterator lineIter { makeSpan (analysis) }; auto lineRun = lineIter.next();) + bidiAlgorithm.forEachParagraph ([&] (const BidiParagraph& bidiParagraph) { - for (Unicode::ScriptRunIterator scriptIter { *lineRun }; - auto scriptRun = scriptIter.next();) + const auto bidiLevels = bidiParagraph.getResolvedLevels(); + const Span paragraphSpan { analysis.getRawDataPointer() + bidiParagraph.getOffset(), bidiParagraph.getLength() }; + + for (Unicode::LineBreakIterator lineIter { paragraphSpan }; auto lineRun = lineIter.next();) { - for (Unicode::BidiRunIterator bidiIter { *scriptRun }; auto bidiRun = bidiIter.next();) + for (Unicode::ScriptRunIterator scriptIter { *lineRun }; auto scriptRun = scriptIter.next();) { - for (const auto& [range, font] : fontsWithFallback.getIntersectionsWith (spanLookup.getRange (*bidiRun))) + const auto offsetInText = (size_t) std::distance (analysis.getRawDataPointer(), scriptRun->data()); + const auto offsetInParagraph = offsetInText - bidiParagraph.getOffset(); + const auto length = scriptRun->size(); + + const auto begin = bidiLevels.data() + offsetInParagraph; + const auto end = begin + length; + + for (auto it = begin; it != end;) { - append (range, { scriptRun->front().script, - options.getLanguage(), - bidiRun->front().direction, - font }); + const auto next = std::find_if (it, end, [&] (const auto& l) { return l != *it; }); + const auto bidiRunOffset = std::distance (begin, it); + const auto bidiRunLength = std::distance (it, next); + const Span bidiRun { analysis.getRawDataPointer() + bidiRunOffset + offsetInText, (size_t) bidiRunLength }; + + for (const auto& [range, font] : fontsWithFallback.getIntersectionsWith (spanLookup.getRange (bidiRun))) + { + append (bidiParagraph, + range, + { scriptRun->front().script, + options.getLanguage(), + *it, + font }); + } + + it = next; } } + + if (! lineChunks.empty()) + commitLine (bidiParagraph); } if (! lineChunks.empty()) - commitLine(); - } + commitLine (bidiParagraph); - if (! lineChunks.empty()) - commitLine(); + }, options.getReadingDirection()); } Range SimpleShapedText::getGlyphRangeForLine (size_t line) const diff --git a/modules/juce_graphics/unicode/juce_Unicode.cpp b/modules/juce_graphics/unicode/juce_Unicode.cpp index 0b4024d6be..f1cef2a07e 100644 --- a/modules/juce_graphics/unicode/juce_Unicode.cpp +++ b/modules/juce_graphics/unicode/juce_Unicode.cpp @@ -41,14 +41,7 @@ namespace juce */ class Unicode { - struct Key - { - String text; - std::optional directionOverride; - - auto tie() const { return std::tie (text, directionOverride); } - bool operator< (const Key& other) const { return tie() < other.tie(); } - }; + using Key = String; public: Unicode() = delete; @@ -61,13 +54,8 @@ public: { uint32_t codepoint; - size_t logicalIndex; // Index of the character in the source string - size_t visualIndex; - TextBreakType breaking; // Breaking characteristics of this codepoint - TextDirection direction; // Direction of this codepoint - TextScript script; // Script class for this codepoint }; @@ -88,14 +76,14 @@ public: /* Performs unicode analysis on a piece of text and returns an array of Codepoints in logical order. */ - static Array performAnalysis (const String& string, std::optional textDirection = {}) + static Array performAnalysis (const String& string) { if (string.isEmpty()) return {}; thread_local LruCache> cache; - return cache.get ({ string, textDirection }, analysisCallback); + return cache.get (string, analysisCallback); } //============================================================================== @@ -125,20 +113,6 @@ public: Span data; }; - struct BidiTraits - { - using ValueType = const Codepoint; - - static bool compare (const Codepoint& t1, const Codepoint& t2) - { - return t1.direction == t2.direction; - } - - static bool includeBreakingIndex() { return false; } - }; - - using BidiRunIterator = Iterator; - struct LineTraits { using ValueType = const Codepoint; @@ -182,43 +156,13 @@ public: using ScriptRunIterator = Iterator; private: - struct ParagraphIterator - { - explicit ParagraphIterator (Span Span) : data (Span) {} - - std::optional> next() - { - const auto start = head; - auto end = start; - - if ((size_t) start >= data.size()) - return std::nullopt; - - while ((size_t) end < data.size()) - { - constexpr auto paragraphSeparator = 0x2029; - - if (data[(size_t) end].character == paragraphSeparator) - break; - - end++; - } - - head = end + 1; - return std::make_optional (Range { start, end }); - } - - Span data; - int head = 0; - }; - static Array analysisCallback (const Key& key) { auto analysisBuffer = [&key] { std::vector points; - const auto data = key.text.toUTF32(); + const auto data = key.toUTF32(); const auto length = data.length(); points.reserve (length); @@ -256,76 +200,8 @@ private: result.getReference (index).breaking = type; }); - ParagraphIterator iter { analysisBuffer }; - - TR9::BidiOutput bidiOutput; - - while (auto range = iter.next()) - { - const auto run = Span { analysisBuffer.data() + (size_t) range->getStart(), (size_t) range->getLength() }; - - TR9::analyseBidiRun (bidiOutput, run, key.directionOverride); - - for (size_t i = 0; i < (size_t) range->getLength(); i++) - { - auto& point = result.getReference ((int) i + range->getStart()); - - point.direction = bidiOutput.resolvedLevels[i] % 2 == 0 ? TextDirection::ltr : TextDirection::rtl; - point.logicalIndex = (size_t) range->getStart() + i; - point.visualIndex = (size_t) bidiOutput.visualOrder[i]; - } - } - return result; } }; -#if JUCE_UNIT_TESTS - -class NumericalVisualOrderTest : UnitTest -{ -public: - NumericalVisualOrderTest() : UnitTest ("NumericalVisualOrderTest", UnitTestCategories::text) - { - } - - void runTest() override - { - auto doTest = [this] (const String& text) - { - String visual; - String logical; - - for (auto cp : Unicode::performAnalysis (text)) - { - visual << text[(int) cp.visualIndex]; - logical << text[(int) cp.logicalIndex]; - } - - beginTest (text); - expectEquals (visual, logical); - }; - - doTest ("12345"); - doTest ("12345_00001"); - doTest ("1_3(1)"); - doTest ("-12323"); - doTest ("8784-43_-33"); - doTest ("[v = get()](vector _arr) -> v2 { return _arr[5]; };"); - doTest (R"([(lambda x: (x, len(x), x.upper(), x[::-1]))(word) for word in "JUCE is great".split()])"); - doTest (R"(table.concat({table.unpack({string.reverse(string.gsub("JUCE is great", "%a", string.upper))})}, " "))"); - doTest (R"(result = sum([(mod(i, 2) * i**2, i = 1, 100)], mask = [(mod(i, 2) == 0, i = 1, 100)]))"); - doTest ("100 +100"); - doTest ("100+ 100"); - doTest ("100 - +100"); - doTest ("abs= +100"); - doTest ("1.19.0 [1]"); - } -}; - -static NumericalVisualOrderTest visualOrderTest; - -#endif - - } // namespace juce diff --git a/modules/juce_graphics/unicode/juce_UnicodeBidi.cpp b/modules/juce_graphics/unicode/juce_UnicodeBidi.cpp index 145644eccf..bd28dacf53 100644 --- a/modules/juce_graphics/unicode/juce_UnicodeBidi.cpp +++ b/modules/juce_graphics/unicode/juce_UnicodeBidi.cpp @@ -35,667 +35,185 @@ namespace juce { -class TR9 +class BidiLine { public: - TR9() = delete; + using ParagraphPtr = std::unique_ptr, FunctionPointerDestructor>; + using LinePtr = std::unique_ptr, FunctionPointerDestructor>; - struct BidiOutput + explicit BidiLine (ParagraphPtr p, LinePtr l) : paragraph (std::move (p)), line (std::move (l)) {} + + Span getRuns() const { - int embeddingLevel = -1; - std::vector resolvedLevels; - std::vector visualOrder; - }; - - static void analyseBidiRun (BidiOutput& output, - Span stream, - std::optional directionOverride = {}) - { - // BD1 - const auto paragraphLevel = directionOverride.has_value() ? (*directionOverride == TextDirection::rtl ? 1 : 0) - : resolveParagraphEmbeddingLevel (stream); - - for (auto& atom : stream) - atom.embeddingLevel = (uint16_t) paragraphLevel; - - // X1-8 - resolveExplicitLevels (stream, paragraphLevel); - - // X9 - for (auto& atom : stream) - if (contains ({ BidiType::rle, BidiType::lre, BidiType::rlo, BidiType::lro, BidiType::pdf }, atom.getBidiType())) - atom.setBidiType (BidiType::bn); - - // W1-W7 - resolveWeakTypes (stream, paragraphLevel); - - // N1-N2 - resolveNeutralTypes (stream, paragraphLevel); - - // I1-I2 - resolveImplicitTypes (stream); - - output.embeddingLevel = paragraphLevel; - - output.resolvedLevels.clear(); - - for (const auto& atom : stream) - output.resolvedLevels.push_back (atom.embeddingLevel); - - resolveReorderedIndices (output.visualOrder, stream, paragraphLevel); + return { SBLineGetRunsPtr (line.get()), SBLineGetRunCount (line.get()) }; } - static auto isOdd (int level) { return bool (level & 1); } - static auto computeLeastEven (int level) { return isOdd (level) ? level + 1 : level + 2; } - static auto computeLeastOdd (int level) { return isOdd (level) ? level + 2 : level + 1; } - - static BidiType getEmbeddingDirection (int level) + void computeVisualOrder (std::vector& result) const { - return isOdd (level) ? BidiType::rtl : BidiType::ltr; + result.clear(); + + const auto runs = getRuns(); + + if (runs.empty()) + return; + + return computeResultVector (SBLineGetOffset (line.get()), + SBLineGetLength (line.get()), + SBParagraphGetBaseLevel (paragraph.get()), + runs, + result); } - static bool isNeutralIsolate (BidiType x) + static void computeResultVector (SBUInteger offset, + SBUInteger length, + SBLevel baseLevel, + Span runs, + std::vector& result) { - return isNeutral (x) || isIsolate (x); - } - - static bool isNeutral (BidiType x) - { - return contains ({ BidiType::b, BidiType::s, BidiType::ws, BidiType::on }, x); - } - - static bool isIsolate (BidiType x) - { - return isIsolateInitiator (x) || isIsolateTerminator (x); - } - - static bool isStrong (const UnicodeAnalysisPoint& x) - { - return contains ({ BidiType::rtl, BidiType::ltr, BidiType::al }, x.getBidiType()); - } - - static bool isIsolateInitiator (BidiType x) - { - return contains ({ BidiType::lri, BidiType::rli, BidiType::fsi }, x); - } - - static bool isIsolateTerminator (BidiType x) - { - return contains ({ BidiType::pdi }, x); - } - - static void resolveNeutralTypes (Span buffer, int paragraphLevel) - { - n1 (buffer, paragraphLevel); - n2 (buffer); - } - - static void n1 (Span buffer, int paragraphLevel) - { - static auto getStrongType = [] (BidiType bt) + const auto level = [] (const SBRun& x) { - if (bt == BidiType::rtl || bt == BidiType::en || bt == BidiType::an) - return BidiType::rtl; - - return BidiType::ltr; + return x.level; }; - const auto begin = buffer.begin(); - const auto end = buffer.end(); - - const auto iso = getEmbeddingDirection (paragraphLevel); - - for (auto iter = begin; iter != end;) + const auto high = level (*std::max_element (runs.begin(), runs.end(), [&] (const auto& a, const auto& b) { - const auto predicate = [] (const UnicodeAnalysisPoint& uap) { return isNeutralIsolate (uap.getBidiType()); }; - const auto niBegin = std::find_if (iter, end, predicate); + return level (a) < level (b); + })); - if (niBegin == end) - break; - - const auto niEnd = std::find_if_not (niBegin, end, predicate); - const auto pre = niBegin == begin ? iso : getStrongType ((niBegin - 1)->getBidiType()); - const auto post = niEnd == end ? iso : getStrongType ((niEnd)->getBidiType()); - - if (pre == post) - std::for_each (niBegin, niEnd, [pre] (auto& uap) { uap.setBidiType (pre); }); - - iter = niEnd; - } - } - - static void n2 (Span buffer) - { - for (auto& uap : buffer) - if (isNeutralIsolate (uap.getBidiType())) - uap.setBidiType (getEmbeddingDirection (uap.embeddingLevel)); - } - - static int resolveParagraphEmbeddingLevel (const Span buffer) - { - auto seek = [buffer] (BidiType type, Range seekRange) -> int + const auto pseudoLevel = [] (const SBRun& x) { - for (int i = seekRange.getStart(); i < seekRange.getEnd(); i++) - { - if (buffer[(size_t) i].data.bidi == type) - return i; - } - - return seekRange.getEnd(); + const auto l = x.level; + return (l % 2) == 1 ? l : 0xff; }; - const auto bufferSize = (int) buffer.size(); - - for (int i = 0; i < bufferSize; i++) + const auto low = pseudoLevel (*std::min_element (runs.begin(), runs.end(), [&] (const auto& a, const auto& b) { - const auto& atom = buffer[(size_t) i]; + return pseudoLevel (a) < pseudoLevel (b); + })); - if (isStrong (atom)) - return atom == BidiType::ltr ? 0 : 1; + result.resize (length); + std::iota (result.begin(), result.end(), offset); - if (isIsolateInitiator (atom.getBidiType())) + for (auto currentLevel = high; currentLevel >= low; --currentLevel) + { + const auto doFlip = [&] (auto beginRuns, auto endRuns) { - // skip to past matching PDI or EOS - const auto end = seek (BidiType::pdi, { i, bufferSize }); - i = end != bufferSize ? end + 1 : bufferSize; - } - } + const auto getStartOfRunInResult = [&] (const auto runIterator) + { + return runIterator == endRuns ? result.end() + : result.begin() + (ptrdiff_t) (runIterator->offset - offset); + }; - return 0; - } + for (auto it = beginRuns; it != endRuns;) + { + const auto begin = std::find_if (it, endRuns, [&] (const SBRun& x) { return currentLevel <= x.level; }); + it = std::find_if (begin, endRuns, [&] (const SBRun& x) { return x.level < currentLevel; }); - struct WeakContext - { - size_t position; - Span buffer; - BidiType prev, next; - int paragraphLevel; - }; + std::reverse (getStartOfRunInResult (begin), getStartOfRunInResult (it)); + } + }; - static void w1 (const WeakContext& context) - { - const auto prev = context.prev; - auto& curr = context.buffer[context.position]; - - if (curr == BidiType::nsm) - { - if (context.position == 0) - curr.setBidiType (getEmbeddingDirection (context.paragraphLevel)); + if (baseLevel % 2 == 0) + doFlip (runs.begin(), runs.end()); else - curr.setBidiType (isIsolate (prev) ? BidiType::on : prev); + doFlip (std::make_reverse_iterator (runs.end()), std::make_reverse_iterator (runs.begin())); } } - static void w2 (const WeakContext& context) +private: + ParagraphPtr paragraph; + LinePtr line; +}; + +class BidiParagraph +{ +public: + using ParagraphPtr = BidiLine::ParagraphPtr; + + explicit BidiParagraph (ParagraphPtr p) + : paragraph (std::move (p)) { - const auto& buffer = context.buffer; - auto& curr = context.buffer[context.position]; + } - if (curr == BidiType::en) + size_t getOffset() const + { + return SBParagraphGetOffset (paragraph.get()); + } + + size_t getLength() const + { + return SBParagraphGetLength (paragraph.get()); + } + + Span getResolvedLevels() const + { + return { SBParagraphGetLevelsPtr (paragraph.get()), getLength() }; + } + + BidiLine createLine (size_t offset, size_t length) const + { + jassert (getOffset() <= offset); + jassert (length <= getLength()); + return BidiLine { ParagraphPtr { SBParagraphRetain (paragraph.get()) }, + BidiLine::LinePtr { SBParagraphCreateLine (paragraph.get(), offset, length) } }; + } + +private: + ParagraphPtr paragraph; +}; + +class BidiAlgorithm +{ +public: + using AlgorithmPtr = std::unique_ptr, FunctionPointerDestructor>; + + explicit BidiAlgorithm (Span t) + : text (t.begin(), t.end()) + { + } + + size_t getLength() const + { + return text.size(); + } + + BidiParagraph createParagraph (size_t offset, std::optional d = {}) const + { + BidiParagraph::ParagraphPtr result { SBAlgorithmCreateParagraph (algorithm.get(), offset, text.size() - offset, [&]() -> SBLevel { - for (int j = (int) context.position - 1; j >= 0; j--) - { - if (buffer[(size_t) j] == BidiType::al) - { - curr.setBidiType (BidiType::an); - break; - } + if (! d.has_value()) + return SBLevelDefaultLTR; + return *d == TextDirection::rtl ? 1 : 0; + }()) }; - if (isStrong (buffer[(size_t) j])) - break; - } + jassert (result != nullptr); + + return BidiParagraph { std::move (result) }; + } + + template + void forEachParagraph (Fn&& callback, std::optional dir = {}) const + { + for (size_t i = 0; i < text.size();) + { + const auto paragraph = createParagraph (i, dir); + callback (paragraph); + i += paragraph.getLength(); } } - static void w3 (const WeakContext& context) +private: + std::vector text; + AlgorithmPtr algorithm { [&] { - auto& curr = context.buffer[context.position]; - - if (curr == BidiType::al) - curr.setBidiType (BidiType::rtl); - } - - static void w4 (const WeakContext& context) - { - const auto prevBidiType = context.prev; - const auto nextBidiType = context.next; - - auto& curr = context.buffer[context.position]; - - if (curr == BidiType::es || curr == BidiType::cs) - { - if (prevBidiType == BidiType::en && nextBidiType == BidiType::en) - { - curr.setBidiType (BidiType::en); - } - else if (curr == BidiType::cs) - { - if (prevBidiType == BidiType::an && nextBidiType == BidiType::an) - curr.setBidiType (BidiType::an); - } - } - } - - static void w5 (const WeakContext& context) - { - const auto& buffer = context.buffer; - const auto& curr = buffer[context.position]; - - if (curr == BidiType::en) - { - // seek backwards - for (int j = (int) context.position - 1; j >= 0; j--) - { - if (buffer[(size_t) j] == BidiType::et) - buffer[(size_t) j].setBidiType (BidiType::en); - else - break; - } - - // seek forwards - for (size_t j = context.position + 1; j < buffer.size(); j++) - { - if (buffer[j] == BidiType::et) - buffer[j].setBidiType (BidiType::en); - else - break; - } - } - } - - static void w6 (const WeakContext& context) - { - auto& curr = context.buffer[context.position]; - - if (contains ({ BidiType::et, BidiType::cs, BidiType::es }, curr.getBidiType())) - curr.setBidiType (BidiType::on); - } - - static void w7 (const WeakContext& context) - { - const auto& buffer = context.buffer; - auto& curr = buffer[context.position]; - - if (curr == BidiType::en) - { - bool strongFound = false; - - for (int j = (int) context.position - 1; j >= 0; j--) - { - if (buffer[(size_t) j] == BidiType::ltr) - { - curr.setBidiType (BidiType::ltr); - strongFound = true; - break; - } - - if (isStrong (buffer[(size_t) j])) - break; - } - - if (! strongFound && getEmbeddingDirection (context.paragraphLevel) == BidiType::ltr) - curr.setBidiType (BidiType::ltr); - } - } - - static void resolveWeakTypes (Span buffer, int paragraphLevel) - { - for (size_t i = 0; i < buffer.size(); i++) - { - const auto sos = i == 0; - const auto eos = i == buffer.size() - 1; - - auto context = WeakContext { i, - buffer, - sos ? BidiType::on : buffer[i - 1].getBidiType(), - eos ? BidiType::on : buffer[i + 1].getBidiType(), - paragraphLevel }; - - w1 (context); - w2 (context); - w3 (context); - w4 (context); - w5 (context); - w6 (context); - w7 (context); - } - } - - static void resolveImplicitTypes (Span buffer) - { - // I1, I2 - // https://www.unicode.org/reports/tr9/#Resolving_Implicit_Levels - for (auto& point : buffer) - { - const auto level = point.embeddingLevel; - const auto isEven = ! isOdd (level); - - if (point.getGeneralCategory() != SBGeneralCategoryPC) - { - JUCE_BEGIN_IGNORE_WARNINGS_GCC_LIKE ("-Wswitch-enum") - switch (point.getBidiType()) - { - case BidiType::ltr: point.embeddingLevel = (uint16_t) (isEven ? level : level + 1); break; - case BidiType::rtl: point.embeddingLevel = (uint16_t) (isEven ? level + 1 : level ); break; - case BidiType::an: point.embeddingLevel = (uint16_t) (isEven ? level + 2 : level + 1); break; - case BidiType::en: point.embeddingLevel = (uint16_t) (isEven ? level + 2 : level + 1); break; - - default: break; - } - JUCE_END_IGNORE_WARNINGS_GCC_LIKE - } - } - } - - static void resolveExplicitLevels (Span buffer, int paragraphLevel) - { - struct DirectionalState - { - enum DirectionalOverride { Neutral, rtl, ltr }; - - DirectionalState (int level, DirectionalOverride dir, bool isolate) - : embeddingLevel (level), - directionalOverride (dir), - isolateStatus (isolate) - { - } - - auto getEmbeddingLevel() const { return embeddingLevel; } - auto getDirectionalOverride() const { return directionalOverride; } - auto getIsolateStatus() const { return isolateStatus; } - - void setEmbeddingLevel (int level) { embeddingLevel = level; } - void setDirectionalOverride (DirectionalOverride dir) { directionalOverride = dir; } - void setIsolateStatus (bool status) { isolateStatus = status; } - - private: - int embeddingLevel; - DirectionalOverride directionalOverride; - bool isolateStatus; - }; - - // X1 - struct OverflowState - { - int isolate = 0; - int embedded = 0; - }; - - // https://www.unicode.org/reports/tr9/#BD2 - static constexpr auto maxStackSize = 125; - std::vector stack; - stack.reserve (maxStackSize); - OverflowState overflowCounter; - int validIsolate = 0; - int previousEmbeddingLevel = paragraphLevel; - - static auto getEmbeddingDirection = [] (BidiType bt) - { - JUCE_BEGIN_IGNORE_WARNINGS_GCC_LIKE ("-Wswitch-enum") - switch (bt) - { - case BidiType::lre: return BidiType::ltr; - case BidiType::lro: return BidiType::ltr; - case BidiType::lri: return BidiType::ltr; - case BidiType::rle: return BidiType::rtl; - case BidiType::rlo: return BidiType::rtl; - case BidiType::rli: return BidiType::rtl; - - default: break; - } - JUCE_END_IGNORE_WARNINGS_GCC_LIKE - - jassertfalse; - return BidiType::ltr; - }; - - [[maybe_unused]] const auto canPush = [&stack] { return stack.size() < maxStackSize; }; - const auto isValid = [&] (auto value) { return (value < maxStackSize) && (overflowCounter.isolate == 0 && overflowCounter.embedded == 0); }; - - stack.push_back ({ paragraphLevel, DirectionalState::Neutral, false }); - - for (auto& atom : buffer) - { - // X2-X5b - if (contains ({ BidiType::lre, BidiType::lro, BidiType::rle, BidiType::rlo, BidiType::rli, BidiType::lri }, atom.getBidiType())) - { - if (stack.empty()) - break; - - const auto isIsolate = contains ({ BidiType::rli, BidiType::lri }, atom.getBidiType()); - const auto isOverride = contains ({ BidiType::lro, BidiType::rlo }, atom.getBidiType()); - - auto head = stack.back(); - - // This has to be applied regardless of the new embeddeding level's validity - if (isIsolate) - { - // X5ab: - // Set the RLIs embedding level to the embedding level of the last entry on the directional status stack. - atom.embeddingLevel = (uint16_t) head.getEmbeddingLevel(); - - // If the directional override status of the last entry on the directional status stack is not neutral, reset - // the current character type from RLI to L if the override status is left-to-right, and to R if the override - // status is right-to-left. - if (head.getDirectionalOverride() != DirectionalState::Neutral) - atom.setBidiType (head.getDirectionalOverride() == DirectionalState::ltr ? BidiType::ltr : BidiType::rtl); - } - else - { - atom.embeddingLevel = (uint16_t) previousEmbeddingLevel; - } - - // X2-5b: - // Compute the least odd/even embedding level greater than the embedding level of the last entry on the directional - // status stack. - const auto isRTL = getEmbeddingDirection (atom.getBidiType()) == BidiType::rtl; - const auto newLevel = isRTL ? computeLeastOdd (head.getEmbeddingLevel()) - : computeLeastEven (head.getEmbeddingLevel()); - - // X2-5b: - // If this new level would be valid, and the overflow isolate count and overflow embedding count are both zero, - // then this is valid. - if (isValid (newLevel)) - { - // Push an entry consisting of the new embedding level. - head.setEmbeddingLevel (newLevel); - - // X5ab: - // Increment the valid isolate count by one and true directional isolate status. - validIsolate += isIsolate ? 1 : 0; - head.setIsolateStatus (isIsolate); - - // X4-X5: - // Push an entry consisting of the new embedding level, RTL/LTR directional override status - if (isOverride) - head.setDirectionalOverride (isRTL ? DirectionalState::rtl : DirectionalState::ltr); - else - head.setDirectionalOverride (DirectionalState::Neutral); - - jassert (canPush()); - stack.push_back (head); - } - - // Otherwise, this is an overflow. If the overflow isolate count is zero, increment the overflow - // embedding count by one. Leave all other variables unchanged. - else if (overflowCounter.isolate == 0) - { - overflowCounter.embedded++; - } - } - - // X6 - //B, BN, RLE, LRE, RLO, LRO, PDF, RLI, LRI, FSI, and PDI - if (! contains ({ BidiType::b, BidiType::bn, BidiType::rle, BidiType::lre, BidiType::rlo, BidiType::lro, BidiType::pdf, - BidiType::rli, BidiType::lri, BidiType::fsi, BidiType::pdi }, atom.getBidiType())) - { - if (stack.empty()) - break; - - auto head = stack.back(); - atom.embeddingLevel = (uint16_t) head.getEmbeddingLevel(); - - previousEmbeddingLevel = stack.empty() ? paragraphLevel : stack.back().getEmbeddingLevel(); - - if (head.getDirectionalOverride() != DirectionalState::Neutral) - atom.setBidiType (head.getDirectionalOverride() == DirectionalState::ltr ? BidiType::ltr : BidiType::rtl); - } - - // X6a: Terminating Isolates - if (atom == BidiType::pdi) - { - // If the overflow isolate count is greater than zero, this PDI matches an overflow isolate - // initiator. Decrement the overflow isolate count by one. - if (overflowCounter.isolate > 0) - { - overflowCounter.isolate--; - } - // Otherwise, if the valid isolate count is > 0, this PDI matches a valid isolate initiator. - else if (validIsolate > 0) - { - // Reset the overflow embedding count to zero. - overflowCounter.embedded = 0; - - // While the directional isolate status of the last entry on the stack is false, pop the - // last entry from the directional status stack. - while (! stack.empty() && ! stack.back().getIsolateStatus()) - stack.pop_back(); - - // Pop the last entry from the directional status stack and decrement the valid isolate - // count by one. - if (! stack.empty()) - stack.pop_back(); - - validIsolate--; - } - - if (stack.empty()) - break; - - // In all cases, look up the last entry on the directional status stack left after the steps above and: - auto& head = stack.back(); - - // Set the PDIs level to the entry's embedding level. - atom.embeddingLevel = (uint16_t) head.getEmbeddingLevel(); - - // If the entry's directional override status is not neutral, reset the current character type from PDI - // to L if the override status is left-to-right, and to R if the override status is right-to-left. - if (head.getDirectionalOverride() != DirectionalState::Neutral) - atom.setBidiType (head.getDirectionalOverride() == DirectionalState::ltr ? BidiType::ltr : BidiType::rtl); - } - - // X7 - else if (atom == BidiType::pdf) - { - atom.embeddingLevel = (uint16_t) previousEmbeddingLevel; - - // If the overflow isolate count is greater than zero, do nothing. - // Otherwise, if the overflow embedding count is greater than zero, decrement it by one. - if (overflowCounter.isolate == 0 && overflowCounter.embedded > 0) - { - overflowCounter.embedded--; - } - else if (stack.size() >= 2 && ! stack.back().getIsolateStatus()) - { - stack.pop_back(); - } - } - - // X8 - else if (atom == BidiType::b) - { - if (stack.empty()) - break; - - atom.embeddingLevel = (uint16_t) stack.back().getEmbeddingLevel(); - - overflowCounter.embedded = 0; - overflowCounter.isolate = 0; - validIsolate = 0; - previousEmbeddingLevel = paragraphLevel; - - stack.clear(); - stack.push_back ({ paragraphLevel, DirectionalState::Neutral, false }); - } - } - } - - static void l1 (Span buffer, int paragraphLevel) - { - auto reset = [buffer, paragraphLevel] (size_t position) - { - for (int i = (int) position; i >= 0; i--) - { - auto& atom = buffer[(size_t) i]; - - if (contains ({ BidiType::s, BidiType::b, BidiType::ws, BidiType::fsi, BidiType::lri, BidiType::rli, BidiType::pdi }, atom.getBidiType())) - atom.embeddingLevel = (uint16_t) paragraphLevel; - else - break; - } - }; - - // On each line, reset the embedding level of the following characters to the paragraph embedding level: - for (size_t i = 0; i < buffer.size(); i++) - { - // Segment separators, Paragraph separators. - // Any sequence of whitespace characters and/or isolate formatting characters (FSI, LRI, RLI, and PDI). - if (contains ({ BidiType::s, BidiType::b }, buffer[i].getBidiType())) - reset (i); - } - - // Any sequence of whitespace characters and/or isolate formatting characters (FSI, LRI, RLI, and PDI) at the end of the line. - reset (buffer.size() - 1); - } - - static void l2 (Span result, Span levels) - { - jassert (levels.size() == result.size()); - - std::iota (result.begin(), result.end(), 0); - const auto high = *std::max_element (levels.begin(), levels.end()); - - for (int level = high; level > 0; level--) - { - for (size_t i = 0; i < levels.size();) - { - if (levels[i] >= level) - { - // Find the longest consecutive run of the current level and above - // 1111 = 4 - // 1001 = 1 - // 1123 = 4 - const auto start = i; - - while (i < levels.size() && levels[i] >= level) - i++; - - std::reverse (result.begin() + start, result.begin() + i); - continue; - } - - i++; - } - } - } - - static void resolveReorderedIndices (std::vector& result, - const Span buffer, - int paragraphLevel) - { - std::vector levels; - - result.resize (buffer.size()); - levels.resize (buffer.size()); - std::transform (buffer.begin(), buffer.end(), levels.begin(), [] (auto& point) { return (int) point.embeddingLevel; }); - - l1 (buffer, paragraphLevel); - l2 (result, levels); - } + SBCodepointSequence sequence { SBStringEncodingUTF32, text.data(), text.size() }; + return SBAlgorithmCreate (&sequence); + }() }; }; //============================================================================== //============================================================================== + #if JUCE_UNIT_TESTS class BidiTests : public UnitTest @@ -705,430 +223,88 @@ public: void runTest() override { - using BT = BidiType; - - const String LRE { L"\u202A" }; // LTR embed - const String RLE { L"\u202B" }; // RTL embed - const String PDF { L"\u202C" }; // Pop directional embedding - const String RLO { L"\u202E" }; // RTL override - const String RLI { L"\u2067" }; // RTL isolate - const String PDI { L"\u2069" }; // Pop Directional isolate - - beginTest ("TR9 N1"); + beginTest ("visual order rtl"); { - // Examples from TR9 - expect (n1 (0, { BT::ltr, BT::b, BT::ltr }, { BT::ltr, BT::ltr, BT::ltr })); - expect (n1 (0, { BT::rtl, BT::b, BT::rtl }, { BT::rtl, BT::rtl, BT::rtl })); - expect (n1 (0, { BT::rtl, BT::b, BT::an }, { BT::rtl, BT::rtl, BT::an })); - expect (n1 (0, { BT::rtl, BT::b, BT::en }, { BT::rtl, BT::rtl, BT::en })); - expect (n1 (0, { BT::an, BT::b, BT::rtl }, { BT::an, BT::rtl, BT::rtl })); - expect (n1 (0, { BT::an, BT::b, BT::an }, { BT::an, BT::rtl, BT::an })); - expect (n1 (0, { BT::an, BT::b, BT::en }, { BT::an, BT::rtl, BT::en })); - expect (n1 (0, { BT::en, BT::b, BT::rtl }, { BT::en, BT::rtl, BT::rtl })); - expect (n1 (0, { BT::en, BT::b, BT::an }, { BT::en, BT::rtl, BT::an })); - expect (n1 (0, { BT::en, BT::b, BT::en }, { BT::en, BT::rtl, BT::en })); - - // NI affected by sos/eos - expect (n1 (0, { BT::b, BT::ltr }, { BT::ltr, BT::ltr })); - expect (n1 (1, { BT::b, BT::rtl }, { BT::rtl, BT::rtl })); - - expect (n1 (0, { BT::ltr, BT::b }, { BT::ltr, BT::ltr })); - expect (n1 (0, { BT::b, BT::b }, { BT::ltr, BT::ltr })); - - // NI not surrounded by similar types should not change. - expect (n1 (0, { BT::rtl, BT::b, BT::ltr }, { BT::rtl, BT::b, BT::ltr })); - expect (n1 (0, { BT::ltr, BT::b, BT::rtl }, { BT::ltr, BT::b, BT::rtl })); + const CharPointer_UTF8 text ("\xd9\x85\xd9\x85\xd9\x85 colour " + "\xd9\x85\xd9\x85\xd9\x85\xd9\x85\xd9\x85\xd9\x85\xd9\x85\xd9\x85\n"); + const std::vector result { 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 4, 5, 6, 7, 8, 9, 3, 2, 1, 0 }; + expect (computeVisualOrder (text) == result); } - beginTest ("TR9 N2"); + beginTest ("visual order ltr"); { - // Examples from TR9 - expect (n2 (0, { BT::ltr, BT::b }, { BT::ltr, BT::ltr })); - expect (n2 (1, { BT::ltr, BT::b }, { BT::ltr, BT::rtl })); - expect (n2 (0, { BT::rtl, BT::b }, { BT::rtl, BT::ltr })); - expect (n2 (1, { BT::rtl, BT::b }, { BT::rtl, BT::rtl })); - expect (n2 (0, { BT::b, BT::ltr }, { BT::ltr, BT::ltr })); - expect (n2 (1, { BT::b, BT::ltr }, { BT::rtl, BT::ltr })); - expect (n2 (0, { BT::b, BT::rtl }, { BT::ltr, BT::rtl })); - expect (n2 (1, { BT::b, BT::rtl }, { BT::rtl, BT::rtl })); + const CharPointer_UTF8 text ("hello \xd9\x85\xd9\x85\xd9\x85 world\n"); + const std::vector result { 0, 1, 2, 3, 4, 5, 8, 7, 6, 9, 10, 11, 12, 13, 14, 15 }; + expect (computeVisualOrder (text) == result); } - beginTest ("TR9 Paragraph Embedding Level"); + beginTest ("visual order core algorithm"); { - // Examples from TR9 - expect (resolveParagraphEmbeddingLevel ({ BT::ltr, BT::ltr, BT::ltr }, 0)); - expect (resolveParagraphEmbeddingLevel ({ BT::rtl, BT::ltr, BT::ltr }, 1)); - expect (resolveParagraphEmbeddingLevel ({ BT::ltr, BT::rtl, BT::ltr, BT::rtl }, 0)); - expect (resolveParagraphEmbeddingLevel ({ BT::rtl, BT::ltr, BT::rtl, BT::ltr }, 1)); - expect (resolveParagraphEmbeddingLevel ({}, 0)); - expect (resolveParagraphEmbeddingLevel ({ BT::ltr }, 0)); - expect (resolveParagraphEmbeddingLevel ({ BT::rtl }, 1)); - expect (resolveParagraphEmbeddingLevel ({ BT::ltr, BT::rtl, BT::rtl }, 0)); - expect (resolveParagraphEmbeddingLevel ({ BT::rtl, BT::ltr, BT::ltr }, 1)); - expect (resolveParagraphEmbeddingLevel ({ BT::ltr, BT::rtl, BT::ltr, BT::rtl, BT::ltr }, 0)); - expect (resolveParagraphEmbeddingLevel ({ BT::rtl, BT::ltr, BT::rtl, BT::ltr, BT::rtl }, 1)); - } + const char testInput[] { "DID YOU SAY 'he said \"car MEANS CAR\"'?" }; + const int testLevels[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 1, 1 }; + const char expectedOutput[] { "?'he said \"RAC SNAEM car\"' YAS UOY DID" }; - { - constexpr auto neutral = BT::bn; + static_assert (std::size (testInput) == std::size (expectedOutput)); + static_assert (std::size (testInput) - 1 == std::size (testLevels)); // ignore null terminator - beginTest ("TR9 W1"); - expect (resolveWeakTypes (TR9::w1, 1, { BT::al, BT::nsm, BT::nsm }, { BT::al, BT::al, BT::al })); - expect (resolveWeakTypes (TR9::w1, 1, { BT::nsm }, { BT::rtl })); - expect (resolveWeakTypes (TR9::w1, 1, { BT::lri, BT::nsm }, { BT::lri, BT::on })); - expect (resolveWeakTypes (TR9::w1, 1, { BT::pdi, BT::nsm }, { BT::pdi, BT::on })); + const auto [baseLevel, runs] = createRunsFromLevels (testLevels); - beginTest ("TR9 W2"); - expect (resolveWeakTypes (TR9::w2, 0, { BT::al, BT::en }, { BT::al, BT::an })); - expect (resolveWeakTypes (TR9::w2, 0, { BT::al, neutral, BT::en }, { BT::al, neutral, BT::an })); - expect (resolveWeakTypes (TR9::w2, 0, { neutral, BT::en }, { neutral, BT::en })); - expect (resolveWeakTypes (TR9::w2, 0, { BT::ltr, neutral, BT::en }, { BT::ltr, neutral, BT::en })); - expect (resolveWeakTypes (TR9::w2, 0, { BT::rtl, neutral, BT::en }, { BT::rtl, neutral, BT::en })); + std::vector result; + BidiLine::computeResultVector (0, std::size (testLevels), baseLevel, runs, result); - beginTest ("TR9 W3"); - expect (resolveWeakTypes (TR9::w3, 0, { BT::al }, { BT::rtl })); + std::vector output; - beginTest ("TR9 W4"); - expect (resolveWeakTypes (TR9::w4, 0, { BT::en, BT::es, BT::en }, { BT::en, BT::en, BT::en })); - expect (resolveWeakTypes (TR9::w4, 0, { BT::en, BT::cs, BT::en }, { BT::en, BT::en, BT::en })); - expect (resolveWeakTypes (TR9::w4, 0, { BT::an, BT::cs, BT::an }, { BT::an, BT::an, BT::an })); + for (auto i : result) + output.push_back (testInput[i]); - beginTest ("TR9 W5"); - expect (resolveWeakTypes (TR9::w5, 0, { BT::et, BT::et, BT::en }, { BT::en, BT::en, BT::en })); - expect (resolveWeakTypes (TR9::w5, 0, { BT::en, BT::et, BT::et }, { BT::en, BT::en, BT::en })); - expect (resolveWeakTypes (TR9::w5, 0, { BT::an, BT::et, BT::en }, { BT::an, BT::en, BT::en })); - - beginTest ("TR9 W6"); - expect (resolveWeakTypes (TR9::w6, 0, { BT::an, BT::et }, { BT::an, BT::on })); - expect (resolveWeakTypes (TR9::w6, 0, { BT::ltr, BT::es, BT::en }, { BT::ltr, BT::on, BT::en })); - expect (resolveWeakTypes (TR9::w6, 0, { BT::en, BT::cs, BT::an }, { BT::en, BT::on, BT::an })); - expect (resolveWeakTypes (TR9::w6, 0, { BT::et, BT::an }, { BT::on, BT::an })); - - beginTest ("TR9 W7"); - expect (resolveWeakTypes (TR9::w7, 0, { BT::ltr, neutral, BT::en }, { BT::ltr, neutral, BT::ltr })); - expect (resolveWeakTypes (TR9::w7, 1, { BT::rtl, neutral, BT::en }, { BT::rtl, neutral, BT::en })); - } - - beginTest ("TR9 I1"); - { - expect (resolveImplicitTypes (0, { BT::rtl, BT::an, BT::en }, { 1, 2, 2 })); - expect (resolveImplicitTypes (0, { BT::ltr, BT::ltr, BT::ltr }, { 0, 0, 0 })); - expect (resolveImplicitTypes (0, { BT::an, BT::ltr, BT::rtl }, { 2, 0, 1 })); - } - - beginTest ("TR9 I2"); - { - expect (resolveImplicitTypes (1, { BT::ltr, BT::en, BT::an }, { 2, 2, 2 })); - expect (resolveImplicitTypes (1, { BT::rtl, BT::rtl, BT::rtl }, { 1, 1, 1 })); - expect (resolveImplicitTypes (1, { BT::an }, { 2 })); - } - - beginTest ("X1 - X8"); - { - expect (resolveExplicitLevels (0, String { "zero" } + RLE + "one" + PDF + "zero", - "0000" "0" "111" "1" "0000")); - - expect (resolveExplicitLevels (1, String { "one" } + PDF + "one", - "111" "1" "111")); - - // Nested embedding: - expect (resolveExplicitLevels (0, String { "zero" } + RLE + "one" + RLE + "333" + PDF + PDF + "zero", - "0000" "0" "111" "1" "333" "3" "3" "0000")); - - // Directional override: - expect (resolveExplicitLevels (0, String { "abc" } + RLO + "def" + PDF + "ghi", - "000" "0" "111" "1" "000")); - // Mixed embedding and overrides: - expect (resolveExplicitLevels (0, String { "abc" } + LRE + "lmn" + PDF + "def" + RLO + "ghi" + PDF + "jkl", - "000" "0" "222" "2" "000" "0" "111" "1" "000")); - - // Multiple PDFs: - expect (resolveExplicitLevels (0, String { "abc" } + RLE + "def" + RLE + "ghi" + PDF + PDF + "jkl", - "000" "0" "111" "1" "333" "3" "3" "000")); - - // Isolates: - expect (resolveExplicitLevels (0, String { "abc" } + RLI + "def" + PDI + "ghi", - "000" "0" "111" "0" "000")); - - // Overflows and isolates: - // PDIs are not removed from the output string - expect (resolveExplicitLevels (0, String { "abc" } + LRE + "rlm" + RLE + "def" + PDF + "ghi" + PDI + "jkl", - "000" "0" "222" "2" "333" "3" "222" "2" "222")); - - // Paragraph separator: - expect (resolveExplicitLevels (0, String { "abc" } + RLE + "def" + PDF + "\nxyz", - "000" "0" "111" "1" "0000")); - - // Complex nesting: - expect (resolveExplicitLevels (0, String { "abc" } + RLE + "lmn" + RLO + "opq" + PDF + "xyz" + PDF + "jkl", - "000" "0" "111" "1" "333" "3" "111" "1" "000")); - - // Multiple embeddings: - expect (resolveExplicitLevels (0, String { "abc" } + RLE + "lmn" + RLE + "lpq" + PDF + "rs" + PDF + "xyz", - "000" "0" "111" "1" "333" "3" "11" "1" "000")); - } - - beginTest ("TR9 L1"); - { - expect (l1 (0, { BidiType::ws, BidiType::s }, "10", "00")); - expect (l1 (1, { BidiType::ws, BidiType::s }, "10", "11")); - expect (l1 (4, { BidiType::ws, BidiType::ws, BidiType::ws }, "000", "444")); - - expect (l1 (0, { BidiType::fsi, BidiType::pdi, BidiType::rli, BidiType::lri }, "1111", "0000")); - expect (l1 (0, { BidiType::fsi, BidiType::pdi, BidiType::ws, BidiType::lri }, "1111", "0000")); - - expect (l1 (0, { BidiType::fsi, BidiType::an, BidiType::rli, BidiType::lri }, "1100", "1100")); - } - - beginTest ("TR9 L2"); - { - expect (l2 ("abc", "000", "abc")); - expect (l2 ("abc", "111", "cba")); - - expect (l2 ("car MEANS CAR.", - "22211111111111", - ".RAC SNAEM car")); - - expect (l2 ("he said \"car MEANS CAR.\" \"IT DOES,\" she agreed.", - "00000000022211111111110000111111100000000000000", - "he said \"RAC SNAEM car.\" \"SEOD TI,\" she agreed.")); - - expect (l2 ("DID YOU SAY \'he said \"car MEANS CAR\"\'?", - "11111111111112222222224443333333333211", - "?\'he said \"RAC SNAEM car\"\' YAS UOY DID")); + expect (std::equal (output.begin(), output.end(), expectedOutput)); } } - static std::vector generateTestUAPs (std::initializer_list bts, int embeddingLevel = 0) + static std::vector computeVisualOrder (const String& text) { - static const std::unordered_map map = + std::vector chars; + + for (const auto t : text) + chars.push_back (t); + + BidiAlgorithm algorithm { chars }; + auto paragraph = algorithm.createParagraph (0); + auto line = paragraph.createLine (0, paragraph.getLength()); + + std::vector order; + line.computeVisualOrder (order); + return order; + } + + static std::pair> createRunsFromLevels (Span levels) + { + std::vector runs; + + for (size_t i = 0; i < levels.size();) { - { BidiType::ltr, 0x0041 }, - { BidiType::rtl, 0x05d0 }, - { BidiType::b, 0x2029 }, - { BidiType::s, 0x001F }, - { BidiType::en, 0x0032 }, - { BidiType::an, 0x0664 }, - { BidiType::es, '+' }, - { BidiType::et, '%' }, - { BidiType::cs, '.' }, - { BidiType::ws, ' ' }, - { BidiType::nsm, 0x0300 }, - { BidiType::al, 0x0642 }, - { BidiType::bn, 0x0000 }, - { BidiType::pdi, 0x2069 }, - { BidiType::lri, 0x2066 }, - { BidiType::rli, 0x2067 }, - { BidiType::fsi, 0x2068 } - }; + const auto level = levels[i]; - std::vector out; - out.reserve (bts.size()); - - for (const auto& bt : bts) - { - const auto character = map.find (bt); - - if (character == map.end()) + for (size_t j = i + 1; j < levels.size(); ++j) { - jassertfalse; - return {}; + const auto lastElement = j == levels.size() - 1; + const auto endIndex = lastElement ? j + 1 : j; + + if (levels[j] != level || lastElement) + { + runs.push_back ({ (SBUInteger) i, (SBUInteger) (endIndex - i), (SBLevel) level }); + i = endIndex; + break; + } } - - // Sanity check! - const auto realBT = UnicodeDataTable::getDataForCodepoint (character->second).bidi; - - if (bt != realBT) - { - jassertfalse; - return {}; - } - - UnicodeEntry entry{}; - entry.bidi = bt; - - out.emplace_back (character->second, std::move (entry)); } - for (auto& p : out) - p.embeddingLevel = (uint16_t) embeddingLevel; + const auto baseLevel = std::size (levels) == 0 ? 0 : *std::min_element (std::begin (levels), std::end (levels)); - return out; - } + if (baseLevel % 2 != 0) + std::reverse (runs.begin(), runs.end()); - static bool n1 (int paragraphLevel, std::initializer_list input, std::initializer_list expected) - { - auto uaps = generateTestUAPs (input); - - if (uaps.empty()) - return false; - - TR9::n1 (uaps, paragraphLevel); - return checkUAPs (uaps, expected); - } - - static bool n2 (int embeddingLevel, std::initializer_list input, std::initializer_list expected) - { - auto uaps = generateTestUAPs (input, embeddingLevel); - - if (uaps.empty()) - return false; - - TR9::n2 (uaps); - return checkUAPs (uaps, expected); - } - - static bool l1 (int paragraphLevel, std::initializer_list bts, const String& levels, const String& expected) - { - auto uaps = generateTestUAPs (bts); - - if (uaps.empty()) - return false; - - if ((int) uaps.size() != levels.length() || (int) uaps.size() != expected.length()) - { - jassertfalse; - return false; - } - - for (int i = 0; i < (int) uaps.size(); i++) - uaps[(size_t) i].embeddingLevel = (uint16_t) levels.substring (i, i + 1).getIntValue(); - - TR9::l1 (uaps, paragraphLevel); - - const auto result = [&uaps] - { - String s; - - for (auto uap : uaps) - s << (int) uap.embeddingLevel; - - return s; - }(); - - return result == expected; - } - - static bool l2 (const String& text, const String& levels, const String& expected) - { - const auto utf32 = text.toUTF32(); - const auto length = utf32.length(); - - std::vector levelVec; - std::vector reorderVec; - - levelVec.resize (length); - reorderVec.resize (length); - - for (size_t i = 0; i < length; i++) - { - reorderVec[0] = 0; - levelVec[i] = (uint16_t) levels.substring ((int) i, (int) i + 1).getIntValue(); - } - - TR9::l2 (reorderVec, levelVec); - - const auto result = [&reorderVec, text] - { - String s; - - for (auto level : reorderVec) - s << text[level]; - - return s; - }(); - - return result == expected; - } - - static bool resolveParagraphEmbeddingLevel (std::initializer_list bts, int expected) - { - auto uaps = generateTestUAPs (bts); - - if (bts.size() > 0 && uaps.empty()) - return false; - - return TR9::resolveParagraphEmbeddingLevel (uaps) == expected; - } - - static bool resolveWeakTypes (void (*func) (const TR9::WeakContext&), int paragraphLevel, std::initializer_list bts, - std::initializer_list expected) - { - auto uaps = generateTestUAPs (bts); - - if (uaps.empty()) - return false; - - for (size_t i = 0; i < uaps.size(); i++) - { - const auto sos = i == 0; - const auto eos = i == uaps.size() - 1; - - auto context = TR9::WeakContext { i, - uaps, - sos ? BidiType::on : uaps[i - 1].getBidiType(), - eos ? BidiType::on : uaps[i + 1].getBidiType(), - paragraphLevel }; - - func (context); - } - - return checkUAPs (uaps, expected); - } - - static bool resolveImplicitTypes (int embeddingLevel, std::initializer_list bts, std::initializer_list expected) - { - auto uaps = generateTestUAPs (bts, embeddingLevel); - - if (uaps.empty()) - return false; - - TR9::resolveImplicitTypes (uaps); - - for (size_t i = 0; i < uaps.size(); i++) - if (uaps[i].embeddingLevel != expected.begin()[i]) - return false; - - return true; - } - - static bool resolveExplicitLevels (int embeddingLevel, const String& input, const String& expectedLevels) - { - std::vector uaps; - - const auto inputChars = input.toUTF32(); - uaps.reserve (inputChars.length()); - - for (size_t i = 0; i < inputChars.length(); i++) - { - const auto codepoint = inputChars[(int) i]; - auto data = UnicodeDataTable::getDataForCodepoint ((uint32_t) codepoint); - - UnicodeAnalysisPoint uap { (char32_t) codepoint, data }; - uap.embeddingLevel = (uint16_t) embeddingLevel; - uaps.push_back (uap); - } - - TR9::resolveExplicitLevels (uaps, embeddingLevel); - - const auto levels = [&] { - String s; - - for (auto point : uaps) - s << (int) point.embeddingLevel; - - return s; - }(); - - return levels == expectedLevels; - } - - static bool checkUAPs (Span uaps, std::initializer_list expected) - { - return std::equal (uaps.begin(), uaps.end(), expected.begin(), [] (UnicodeAnalysisPoint uap, BidiType bt) - { - return uap.getBidiType() == bt; - }); + return { (SBLevel) baseLevel, runs }; } }; diff --git a/modules/juce_graphics/unicode/juce_UnicodeUtils.cpp b/modules/juce_graphics/unicode/juce_UnicodeUtils.cpp index 7011833f2c..8a08882b15 100644 --- a/modules/juce_graphics/unicode/juce_UnicodeUtils.cpp +++ b/modules/juce_graphics/unicode/juce_UnicodeUtils.cpp @@ -49,7 +49,6 @@ struct UnicodeAnalysisPoint { char32_t character = 0; UnicodeEntry data{}; - uint16_t embeddingLevel = 0; UnicodeAnalysisPoint (char32_t characterIn, UnicodeEntry entry) : character { characterIn }, @@ -66,25 +65,10 @@ struct UnicodeAnalysisPoint return SBCodepointGetGeneralCategory (character); } - BidiType getBidiType() const - { - return data.bidi; - } - - void setBidiType (BidiType newType) - { - data.bidi = newType; - } - auto getScriptType() const { return SBCodepointGetScript (character); } - - bool operator== (const BidiType& b) const - { - return getBidiType() == b; - } }; //==============================================================================