From 57d33150d3453709065a7609665a98399baf39d1 Mon Sep 17 00:00:00 2001 From: Oliver James Date: Fri, 12 Jul 2024 15:29:14 +0100 Subject: [PATCH] Unicode: Fix incorrect character ordering in Latin text This addresses issues that could occur when re-ordering text that contained brackets or numerical separators. --- .../juce_graphics/unicode/juce_Unicode.cpp | 9 + .../unicode/juce_UnicodeBidi.cpp | 1247 ++++++++++++----- .../unicode/juce_UnicodeUtils.cpp | 2 +- 3 files changed, 879 insertions(+), 379 deletions(-) diff --git a/modules/juce_graphics/unicode/juce_Unicode.cpp b/modules/juce_graphics/unicode/juce_Unicode.cpp index e215460c89..0b4024d6be 100644 --- a/modules/juce_graphics/unicode/juce_Unicode.cpp +++ b/modules/juce_graphics/unicode/juce_Unicode.cpp @@ -311,6 +311,15 @@ public: doTest ("1_3(1)"); doTest ("-12323"); doTest ("8784-43_-33"); + doTest ("[v = get()](vector _arr) -> v2 { return _arr[5]; };"); + doTest (R"([(lambda x: (x, len(x), x.upper(), x[::-1]))(word) for word in "JUCE is great".split()])"); + doTest (R"(table.concat({table.unpack({string.reverse(string.gsub("JUCE is great", "%a", string.upper))})}, " "))"); + doTest (R"(result = sum([(mod(i, 2) * i**2, i = 1, 100)], mask = [(mod(i, 2) == 0, i = 1, 100)]))"); + doTest ("100 +100"); + doTest ("100+ 100"); + doTest ("100 - +100"); + doTest ("abs= +100"); + doTest ("1.19.0 [1]"); } }; diff --git a/modules/juce_graphics/unicode/juce_UnicodeBidi.cpp b/modules/juce_graphics/unicode/juce_UnicodeBidi.cpp index 1fe70f9ed3..f98392b332 100644 --- a/modules/juce_graphics/unicode/juce_UnicodeBidi.cpp +++ b/modules/juce_graphics/unicode/juce_UnicodeBidi.cpp @@ -52,73 +52,66 @@ public: std::optional directionOverride = {}) { // BD1 - const auto baseLevel = directionOverride.has_value() ? (*directionOverride == TextDirection::rtl ? 1 : 0) - : resolveParagraphEmbeddingLevel (stream); + const auto paragraphLevel = directionOverride.has_value() ? (*directionOverride == TextDirection::rtl ? 1 : 0) + : resolveParagraphEmbeddingLevel (stream); - std::for_each (stream.begin(), stream.end(), [baseLevel] (auto& atom) - { - atom.bidiLevel = (uint16_t) baseLevel; - }); + for (auto& atom : stream) + atom.embeddingLevel = (uint16_t) paragraphLevel; - resolveExplicitLevels (stream, baseLevel); + // X1-8 + resolveExplicitLevels (stream, paragraphLevel); - // X9 replace override characters - const auto pred = [] (auto atom) { return contains ({ BidiType::rle, BidiType::lre, - BidiType::lro, BidiType::rlo, - BidiType::pdf }, atom.getBidiType()); }; - - std::for_each (stream.begin(), - stream.end(), - [&pred] (auto& atom) - { - if (! pred (atom)) - return; - - auto copy = atom; - copy.setBidiType (BidiType::bn); - - atom = copy; - }); + // X9 + for (auto& atom : stream) + if (contains ({ BidiType::rle, BidiType::lre, BidiType::rlo, BidiType::lro, BidiType::pdf }, atom.getBidiType())) + atom.setBidiType (BidiType::bn); // W1-W7 - resolveWeakTypes (stream, {}); + resolveWeakTypes (stream, paragraphLevel); - // N0-N2 - resolveNeutralTypes (stream, baseLevel, {}); + // N1-N2 + resolveNeutralTypes (stream, paragraphLevel); // I1-I2 - resolveImplicitTypes (stream, baseLevel, {}); + resolveImplicitTypes (stream); - output.embeddingLevel = baseLevel; + output.embeddingLevel = paragraphLevel; output.resolvedLevels.clear(); for (const auto& atom : stream) - output.resolvedLevels.push_back (atom.bidiLevel); + output.resolvedLevels.push_back (atom.embeddingLevel); - resolveReorderedIndices (output.visualOrder, stream, baseLevel, {}); + resolveReorderedIndices (output.visualOrder, stream, paragraphLevel); } -private: - enum EmbeddingLevel - { - left = 0, - right = 1 - }; + static auto isOdd (int level) { return bool (level & 1); } + static auto computeLeastEven (int level) { return isOdd (level) ? level + 1 : level + 2; } + static auto computeLeastOdd (int level) { return isOdd (level) ? level + 2 : level + 1; } - static auto isOdd (int level) { return bool (level & 1); } - static auto computeLeastEven (int level) { return isOdd (level) ? level + 1 : level + 2; } - static auto computeLeastOdd (int level) { return isOdd (level) ? level + 2 : level + 1; } - static auto getEmbeddingDirection (int level) { return isOdd (level) ? BidiType::rtl : BidiType::ltr; } + static BidiType getEmbeddingDirection (int level) + { + return isOdd (level) ? BidiType::rtl : BidiType::ltr; + } + + static bool isNeutralIsolate (BidiType x) + { + return isNeutral (x) || isIsolate (x); + } + + static bool isNeutral (BidiType x) + { + return contains ({ BidiType::b, BidiType::s, BidiType::ws, BidiType::on }, x); + } + + static bool isIsolate (BidiType x) + { + return isIsolateInitiator (x) || isIsolateTerminator (x); + } static bool isStrong (const UnicodeAnalysisPoint& x) { - return contains ({ BidiType::rtl, BidiType::ltr, BidiType::al }, x.getBidiType()); - } - - static bool isNeutral (const UnicodeAnalysisPoint& x) - { - return contains ({ BidiType::b, BidiType::s, BidiType::ws, BidiType::on }, x.getBidiType()); + return contains ({ BidiType::rtl, BidiType::ltr, BidiType::al }, x.getBidiType()); } static bool isIsolateInitiator (BidiType x) @@ -131,15 +124,55 @@ private: return contains ({ BidiType::pdi }, x); } - static bool isIsolate (BidiType x) + static void resolveNeutralTypes (Span buffer, int paragraphLevel) { - return isIsolateInitiator (x) || isIsolateTerminator (x); + n1 (buffer, paragraphLevel); + n2 (buffer); } - static int resolveParagraphEmbeddingLevel (const Span buffer, Range range = {}) + static void n1 (Span buffer, int paragraphLevel) { - range = range.isEmpty() ? Range { 0, (int) buffer.size() } : range; + static auto getStrongType = [] (BidiType bt) + { + if (bt == BidiType::rtl || bt == BidiType::en || bt == BidiType::an) + return BidiType::rtl; + return BidiType::ltr; + }; + + const auto begin = buffer.begin(); + const auto end = buffer.end(); + + const auto iso = getEmbeddingDirection (paragraphLevel); + + for (auto iter = begin; iter != end;) + { + const auto predicate = [] (const UnicodeAnalysisPoint& uap) { return isNeutralIsolate (uap.getBidiType()); }; + const auto niBegin = std::find_if (iter, end, predicate); + + if (niBegin == end) + break; + + const auto niEnd = std::find_if_not (niBegin, end, predicate); + const auto pre = niBegin == begin ? iso : getStrongType ((niBegin - 1)->getBidiType()); + const auto post = niEnd == end ? iso : getStrongType ((niEnd)->getBidiType()); + + if (pre == post) + std::for_each (niBegin, niEnd, [pre] (auto& uap) { uap.setBidiType (pre); }); + + iter = niEnd; + } + } + + static void n2 (Span buffer) + { + for (auto& uap : buffer) + if (isNeutralIsolate (uap.getBidiType())) + uap.setBidiType (getEmbeddingDirection (uap.embeddingLevel)); + } + + static int resolveParagraphEmbeddingLevel (const Span buffer) + { auto seek = [buffer] (BidiType type, Range seekRange) -> int { for (int i = seekRange.getStart(); i < seekRange.getEnd(); i++) @@ -151,249 +184,201 @@ private: return seekRange.getEnd(); }; - for (int i = range.getStart(); i < range.getEnd(); i++) + const auto bufferSize = (int) buffer.size(); + + for (int i = 0; i < bufferSize; i++) { const auto& atom = buffer[(size_t) i]; if (isStrong (atom)) - return atom == BidiType::ltr ? EmbeddingLevel::left : EmbeddingLevel::right; + return atom == BidiType::ltr ? 0 : 1; if (isIsolateInitiator (atom.getBidiType())) { // skip to past matching PDI or EOS - const auto end = seek (BidiType::pdi, { i, range.getEnd() }); - i = end != range.getEnd() ? end + 1 : range.getEnd(); + const auto end = seek (BidiType::pdi, { i, bufferSize }); + i = end != bufferSize ? end + 1 : bufferSize; } } - return EmbeddingLevel::left; + return 0; } - static void resolveNeutralTypes (Span buffer, int embeddingLevel, Range range) + struct WeakContext { - range = range.isEmpty() ? Range { 0, (int) buffer.size() } : range; + size_t position; + Span buffer; + BidiType prev, next; + int paragraphLevel; + }; - // BD13: - const auto bracketRanges = [buffer, range] + static void w1 (const WeakContext& context) + { + const auto prev = context.prev; + auto& curr = context.buffer[context.position]; + + if (curr == BidiType::nsm) { - struct Bracket + if (context.position == 0) + curr.setBidiType (getEmbeddingDirection (context.paragraphLevel)); + else + curr.setBidiType (isIsolate (prev) ? BidiType::on : prev); + } + } + + static void w2 (const WeakContext& context) + { + const auto& buffer = context.buffer; + auto& curr = context.buffer[context.position]; + + if (curr == BidiType::en) + { + for (int j = (int) context.position - 1; j >= 0; j--) { - int position; - uint32_t character; - Brackets::Kind type; - }; - - // https://www.unicode.org/reports/tr9/#BD16 - constexpr auto maxStackSize = 63; - std::vector stack; - stack.reserve (maxStackSize); - std::vector> brackets; - - for (int i = range.getStart(); i < range.getEnd(); i++) - { - const auto& curr = buffer[(size_t) i]; - - if (curr == BidiType::on) + if (buffer[(size_t) j] == BidiType::al) { - const auto type = Brackets::getKind (curr.character); - - if (type == Brackets::Kind::open) - { - if (stack.size() == stack.capacity()) - return brackets; - - stack.push_back ({ i, curr.character, Brackets::Kind::open }); - } - else if (type == Brackets::Kind::close) - { - while (! stack.empty()) - { - const auto head = stack.back(); - stack.pop_back(); - - if (head.type == Brackets::Kind::open) - { - if (Brackets::isMatchingPair (head.character, curr.character)) - { - brackets.push_back ({ head.position, i }); - break; - } - } - } - } + curr.setBidiType (BidiType::an); + break; } + + if (isStrong (buffer[(size_t) j])) + break; } + } + } - return brackets; - }(); + static void w3 (const WeakContext& context) + { + auto& curr = context.buffer[context.position]; - // N0: - for (auto bracketRange : bracketRanges) + if (curr == BidiType::al) + curr.setBidiType (BidiType::rtl); + } + + static void w4 (const WeakContext& context) + { + const auto prevBidiType = context.prev; + const auto nextBidiType = context.next; + + auto& curr = context.buffer[context.position]; + + if (curr == BidiType::es || curr == BidiType::cs) { - const auto dir = getEmbeddingDirection (embeddingLevel); - const Span span { buffer.data() + bracketRange.getStart(), (size_t) bracketRange.getLength() }; - const auto strong = std::find_if (span.begin(), span.end(), [] (const UnicodeAnalysisPoint& atom) + if (prevBidiType == BidiType::en && nextBidiType == BidiType::en) { - return isStrong (atom); - }); + curr.setBidiType (BidiType::en); + } + else if (curr == BidiType::cs) + { + if (prevBidiType == BidiType::an && nextBidiType == BidiType::an) + curr.setBidiType (BidiType::an); + } + } + } - if (strong != span.end()) + static void w5 (const WeakContext& context) + { + const auto& buffer = context.buffer; + const auto& curr = buffer[context.position]; + + if (curr == BidiType::en) + { + // seek backwards + for (int j = (int) context.position - 1; j >= 0; j--) { - if (*strong == dir) - { - // B: - buffer[(size_t) bracketRange.getStart()].setBidiType (dir); - buffer[(size_t) bracketRange.getEnd()].setBidiType (dir); - } + if (buffer[(size_t) j] == BidiType::et) + buffer[(size_t) j].setBidiType (BidiType::en); else - { - // C: - const auto strongContext = [&buf = std::as_const (buffer), - start = bracketRange.getStart(), - dir] - { - for (int i = start; i >= 0; i--) - { - if (isStrong (buf[(size_t) i])) - return buf[(size_t) i].getBidiType(); - } - - return dir; - }(); - - buffer[(size_t) bracketRange.getStart()].setBidiType (strongContext); - buffer[(size_t) bracketRange.getEnd()].setBidiType (strongContext); - } + break; } - } - // N1: - for (int i = range.getStart(); i < range.getEnd(); i++) - { - const auto curr = buffer[(size_t) i]; - const auto prevBidiType = i > 0 ? buffer[(size_t) i - 1].getBidiType() : BidiType::none; // SOS type? - - if (isNeutral (curr) || isIsolate (curr.getBidiType())) + // seek forwards + for (size_t j = context.position + 1; j < buffer.size(); j++) { - const auto endIndex = [buffer, start = i, end = range.getEnd()] - { - for (int j = start; j < end; j++) - { - const auto atom = buffer[(size_t) j]; - - if (! (isNeutral (atom) || isIsolate (atom.getBidiType()))) - return j; - } - - return end - 1; - }(); - - auto isNumber = [] (BidiType type) { return contains ({ BidiType::an, BidiType::en }, type); }; - const auto start = isNumber (prevBidiType) ? BidiType::rtl : prevBidiType; - const auto end = isNumber (buffer[(size_t) endIndex].getBidiType()) ? BidiType::rtl : buffer[(size_t) endIndex].getBidiType(); - - const auto type = start == end ? start : getEmbeddingDirection (embeddingLevel); - std::for_each (buffer.begin() + i, buffer.begin() + endIndex, [type] (UnicodeAnalysisPoint& atom) - { - atom.setBidiType (type); - }); - - i = endIndex; + if (buffer[j] == BidiType::et) + buffer[j].setBidiType (BidiType::en); + else + break; } } } - static void resolveWeakTypes (Span buffer, Range range) + static void w6 (const WeakContext& context) { - range = range.isEmpty() ? Range { 0, static_cast (buffer.size()) } : range; + auto& curr = context.buffer[context.position]; - for (int i = range.getStart(); i < range.getEnd(); i++) + if (contains ({ BidiType::et, BidiType::cs, BidiType::es }, curr.getBidiType())) + curr.setBidiType (BidiType::on); + } + + static void w7 (const WeakContext& context) + { + const auto& buffer = context.buffer; + auto& curr = buffer[context.position]; + + if (curr == BidiType::en) { - auto& curr = buffer[(size_t) i]; - const auto prevBidiType = i > 0 ? buffer[(size_t) i - 1].getBidiType() : BidiType::none; - const auto nextBidiType = i < range.getEnd() - 1 ? buffer[(size_t) i + 1].getBidiType() : BidiType::none; + bool strongFound = false; - // W1: - if (curr == BidiType::nsm) - curr.setBidiType (isIsolate (prevBidiType) ? BidiType::on : prevBidiType); - - // W2: - else if (curr == BidiType::en) + for (int j = (int) context.position - 1; j >= 0; j--) { - for (int j = i - 1; j >= 1; j--) + if (buffer[(size_t) j] == BidiType::ltr) { - if (buffer[(size_t) j] == BidiType::al) - curr.setBidiType (BidiType::al); - - if (isStrong (buffer[(size_t) j])) - break; + curr.setBidiType (BidiType::ltr); + strongFound = true; + break; } + + if (isStrong (buffer[(size_t) j])) + break; } - // W3: - else if (curr == BidiType::al) - curr.setBidiType (BidiType::rtl); - - // W4: - else if (curr == BidiType::es || curr == BidiType::cs) - { - if (prevBidiType == BidiType::en && nextBidiType == BidiType::en) - { - curr.setBidiType (BidiType::en); - } - else if (curr == BidiType::cs) - { - if (prevBidiType == BidiType::an && nextBidiType == BidiType::an) - curr.setBidiType (BidiType::an); - } - } - - // W5: - else if (curr == BidiType::et) - { - if (prevBidiType == BidiType::en || nextBidiType == BidiType::en) - curr.setBidiType (BidiType::en); - } - - // W6 - if (contains ({ BidiType::es, BidiType::cs }, curr.getBidiType())) - curr.setBidiType (BidiType::on); - - // W7: - else if (curr == BidiType::en) - { - for (int j = i - 1; j >= 1; j--) - { - if (buffer[(size_t) j] == BidiType::ltr) - curr.setBidiType (BidiType::ltr); - - if (isStrong (buffer[(size_t) j])) - break; - } - } + if (! strongFound && getEmbeddingDirection (context.paragraphLevel) == BidiType::ltr) + curr.setBidiType (BidiType::ltr); } } - static void resolveImplicitTypes (Span buffer, int embeddingLevel, Range range) + static void resolveWeakTypes (Span buffer, int paragraphLevel) { - range = range.isEmpty() ? Range { 0, static_cast (buffer.size()) } : range; + for (size_t i = 0; i < buffer.size(); i++) + { + const auto sos = i == 0; + const auto eos = i == buffer.size() - 1; + auto context = WeakContext { i, + buffer, + sos ? BidiType::on : buffer[i - 1].getBidiType(), + eos ? BidiType::on : buffer[i + 1].getBidiType(), + paragraphLevel }; + + w1 (context); + w2 (context); + w3 (context); + w4 (context); + w5 (context); + w6 (context); + w7 (context); + } + } + + static void resolveImplicitTypes (Span buffer) + { // I1, I2 // https://www.unicode.org/reports/tr9/#Resolving_Implicit_Levels - for (int i = range.getStart(); i < range.getEnd(); i++) + for (auto& point : buffer) { - auto& curr = buffer[(size_t) i]; + const auto level = point.embeddingLevel; + const auto isEven = ! isOdd (level); - const auto level = (uint16_t) embeddingLevel; - const auto isEven = isOdd (level) == false; - - if (curr.getGeneralCategory() != GeneralCategory::pc) + if (point.getGeneralCategory() != GeneralCategory::pc) { JUCE_BEGIN_IGNORE_WARNINGS_GCC_LIKE ("-Wswitch-enum") - switch (curr.getBidiType()) + switch (point.getBidiType()) { - case BidiType::ltr: curr.bidiLevel = (isEven ? level : level + 1); break; - case BidiType::rtl: curr.bidiLevel = (isEven ? level + 1 : level ); break; + case BidiType::ltr: point.embeddingLevel = (uint16_t) (isEven ? level : level + 1); break; + case BidiType::rtl: point.embeddingLevel = (uint16_t) (isEven ? level + 1 : level ); break; + case BidiType::an: point.embeddingLevel = (uint16_t) (isEven ? level + 2 : level + 1); break; + case BidiType::en: point.embeddingLevel = (uint16_t) (isEven ? level + 2 : level + 1); break; default: break; } @@ -402,11 +387,28 @@ private: } } - static void resolveExplicitLevels (Span buffer, int embeddingLevel) + static void resolveExplicitLevels (Span buffer, int paragraphLevel) { - struct State + struct DirectionalState { enum DirectionalOverride { Neutral, rtl, ltr }; + + DirectionalState (int level, DirectionalOverride dir, bool isolate) + : embeddingLevel (level), + directionalOverride (dir), + isolateStatus (isolate) + { + } + + auto getEmbeddingLevel() const { return embeddingLevel; } + auto getDirectionalOverride() const { return directionalOverride; } + auto getIsolateStatus() const { return isolateStatus; } + + void setEmbeddingLevel (int level) { embeddingLevel = level; } + void setDirectionalOverride (DirectionalOverride dir) { directionalOverride = dir; } + void setIsolateStatus (bool status) { isolateStatus = status; } + + private: int embeddingLevel; DirectionalOverride directionalOverride; bool isolateStatus; @@ -421,132 +423,179 @@ private: // https://www.unicode.org/reports/tr9/#BD2 static constexpr auto maxStackSize = 125; - std::vector stack; + std::vector stack; stack.reserve (maxStackSize); - OverflowState overflow; + OverflowState overflowCounter; + int validIsolate = 0; + int previousEmbeddingLevel = paragraphLevel; + + static auto getEmbeddingDirection = [] (BidiType bt) + { + JUCE_BEGIN_IGNORE_WARNINGS_GCC_LIKE ("-Wswitch-enum") + switch (bt) + { + case BidiType::lre: return BidiType::ltr; + case BidiType::lro: return BidiType::ltr; + case BidiType::lri: return BidiType::ltr; + case BidiType::rle: return BidiType::rtl; + case BidiType::rlo: return BidiType::rtl; + case BidiType::rli: return BidiType::rtl; + + default: break; + } + JUCE_END_IGNORE_WARNINGS_GCC_LIKE + + jassertfalse; + return BidiType::ltr; + }; [[maybe_unused]] const auto canPush = [&stack] { return stack.size() < maxStackSize; }; - const auto isValid = [&] (auto value) { return (value < maxStackSize) && (overflow.isolate == 0 && overflow.embedded == 0); }; + const auto isValid = [&] (auto value) { return (value < maxStackSize) && (overflowCounter.isolate == 0 && overflowCounter.embedded == 0); }; - stack.push_back ({ embeddingLevel, State::Neutral, false }); + stack.push_back ({ paragraphLevel, DirectionalState::Neutral, false }); - // X2-X6a for (auto& atom : buffer) { - // X2-X3: Explicit embeddings - if (atom == BidiType::rle || atom == BidiType::lre) + // X2-X5b + if (contains ({ BidiType::lre, BidiType::lro, BidiType::rle, BidiType::rlo, BidiType::rli, BidiType::lri }, atom.getBidiType())) { if (stack.empty()) break; - auto& head = stack.back(); - head.embeddingLevel = atom == BidiType::rle ? computeLeastOdd (head.embeddingLevel) - : computeLeastEven (head.embeddingLevel); + const auto isIsolate = contains ({ BidiType::rli, BidiType::lri }, atom.getBidiType()); + const auto isOverride = contains ({ BidiType::lro, BidiType::rlo }, atom.getBidiType()); - if (isValid (head.embeddingLevel)) + auto head = stack.back(); + + // This has to be applied regardless of the new embeddeding level's validity + if (isIsolate) { - head.directionalOverride = State::Neutral; - head.isolateStatus = false; + // X5ab: + // Set the RLIs embedding level to the embedding level of the last entry on the directional status stack. + atom.embeddingLevel = (uint16_t) head.getEmbeddingLevel(); - jassert (canPush()); - - stack.push_back (stack.back()); - } - else if (overflow.isolate == 0) - { - overflow.embedded++; - } - } - - // X4-X5: Explicit Overrides - else if (atom == BidiType::rlo || atom == BidiType::lro) - { - if (stack.empty()) - break; - - auto& head = stack.back(); - head.embeddingLevel = atom == BidiType::rlo ? computeLeastOdd (head.embeddingLevel) - : computeLeastEven (head.embeddingLevel); - - if (isValid (head.embeddingLevel)) - { - head.directionalOverride = atom == BidiType::rlo ? State::rtl : State::ltr; - head.isolateStatus = false; - - jassert (canPush()); - stack.push_back (stack.back()); - } - else if (overflow.isolate == 0) - { - overflow.embedded++; - } - } - - // X5a-X5b: Isolates - else if (atom == BidiType::rli || atom == BidiType::lri) - { - if (stack.empty()) - break; - - auto& head = stack.back(); - head.embeddingLevel = atom == BidiType::rli ? computeLeastOdd (head.embeddingLevel) - : computeLeastEven (head.embeddingLevel); - - if (head.directionalOverride == State::ltr) - atom.setBidiType (BidiType::ltr); - else if (head.directionalOverride == State::rtl) - atom.setBidiType (BidiType::rtl); - - if (isValid (head.embeddingLevel)) - { - head.directionalOverride = State::Neutral; - head.isolateStatus = true; - - jassert (canPush()); - stack.push_back (stack.back()); + // If the directional override status of the last entry on the directional status stack is not neutral, reset + // the current character type from RLI to L if the override status is left-to-right, and to R if the override + // status is right-to-left. + if (head.getDirectionalOverride() != DirectionalState::Neutral) + atom.setBidiType (head.getDirectionalOverride() == DirectionalState::ltr ? BidiType::ltr : BidiType::rtl); } else { - overflow.isolate++; + atom.embeddingLevel = (uint16_t) previousEmbeddingLevel; } + + // X2-5b: + // Compute the least odd/even embedding level greater than the embedding level of the last entry on the directional + // status stack. + const auto isRTL = getEmbeddingDirection (atom.getBidiType()) == BidiType::rtl; + const auto newLevel = isRTL ? computeLeastOdd (head.getEmbeddingLevel()) + : computeLeastEven (head.getEmbeddingLevel()); + + // X2-5b: + // If this new level would be valid, and the overflow isolate count and overflow embedding count are both zero, + // then this is valid. + if (isValid (newLevel)) + { + // Push an entry consisting of the new embedding level. + head.setEmbeddingLevel (newLevel); + + // X5ab: + // Increment the valid isolate count by one and true directional isolate status. + validIsolate += isIsolate ? 1 : 0; + head.setIsolateStatus (isIsolate); + + // X4-X5: + // Push an entry consisting of the new embedding level, RTL/LTR directional override status + if (isOverride) + head.setDirectionalOverride (isRTL ? DirectionalState::rtl : DirectionalState::ltr); + else + head.setDirectionalOverride (DirectionalState::Neutral); + + jassert (canPush()); + stack.push_back (head); + } + + // Otherwise, this is an overflow. If the overflow isolate count is zero, increment the overflow + // embedding count by one. Leave all other variables unchanged. + else if (overflowCounter.isolate == 0) + { + overflowCounter.embedded++; + } + } + + // X6 + //B, BN, RLE, LRE, RLO, LRO, PDF, RLI, LRI, FSI, and PDI + if (! contains ({ BidiType::b, BidiType::bn, BidiType::rle, BidiType::lre, BidiType::rlo, BidiType::lro, BidiType::pdf, + BidiType::rli, BidiType::lri, BidiType::fsi, BidiType::pdi }, atom.getBidiType())) + { + if (stack.empty()) + break; + + auto head = stack.back(); + atom.embeddingLevel = (uint16_t) head.getEmbeddingLevel(); + + previousEmbeddingLevel = stack.empty() ? paragraphLevel : stack.back().getEmbeddingLevel(); + + if (head.getDirectionalOverride() != DirectionalState::Neutral) + atom.setBidiType (head.getDirectionalOverride() == DirectionalState::ltr ? BidiType::ltr : BidiType::rtl); } // X6a: Terminating Isolates - else if (atom == BidiType::pdi) + if (atom == BidiType::pdi) { - if (overflow.isolate > 0) + // If the overflow isolate count is greater than zero, this PDI matches an overflow isolate + // initiator. Decrement the overflow isolate count by one. + if (overflowCounter.isolate > 0) { - overflow.isolate--; + overflowCounter.isolate--; } - else + // Otherwise, if the valid isolate count is > 0, this PDI matches a valid isolate initiator. + else if (validIsolate > 0) { - overflow.embedded = 0; + // Reset the overflow embedding count to zero. + overflowCounter.embedded = 0; - while (! stack.empty() && ! stack.back().isolateStatus) + // While the directional isolate status of the last entry on the stack is false, pop the + // last entry from the directional status stack. + while (! stack.empty() && ! stack.back().getIsolateStatus()) stack.pop_back(); + // Pop the last entry from the directional status stack and decrement the valid isolate + // count by one. if (! stack.empty()) stack.pop_back(); + + validIsolate--; } if (stack.empty()) break; - atom.bidiLevel = (uint16_t) stack.back().embeddingLevel; + // In all cases, look up the last entry on the directional status stack left after the steps above and: + auto& head = stack.back(); + + // Set the PDIs level to the entry's embedding level. + atom.embeddingLevel = (uint16_t) head.getEmbeddingLevel(); + + // If the entry's directional override status is not neutral, reset the current character type from PDI + // to L if the override status is left-to-right, and to R if the override status is right-to-left. + if (head.getDirectionalOverride() != DirectionalState::Neutral) + atom.setBidiType (head.getDirectionalOverride() == DirectionalState::ltr ? BidiType::ltr : BidiType::rtl); } // X7 else if (atom == BidiType::pdf) { - if (overflow.isolate > 0) + atom.embeddingLevel = (uint16_t) previousEmbeddingLevel; + + // If the overflow isolate count is greater than zero, do nothing. + // Otherwise, if the overflow embedding count is greater than zero, decrement it by one. + if (overflowCounter.isolate == 0 && overflowCounter.embedded > 0) { - overflow.isolate--; + overflowCounter.embedded--; } - else if (overflow.embedded > 0) - { - overflow.embedded--; - } - else if (stack.size() >= 2 && stack.back().isolateStatus == false) + else if (stack.size() >= 2 && ! stack.back().getIsolateStatus()) { stack.pop_back(); } @@ -558,91 +607,533 @@ private: if (stack.empty()) break; - atom.bidiLevel = (uint16_t) stack.back().embeddingLevel; + atom.embeddingLevel = (uint16_t) stack.back().getEmbeddingLevel(); + + overflowCounter.embedded = 0; + overflowCounter.isolate = 0; + validIsolate = 0; + previousEmbeddingLevel = paragraphLevel; - overflow.embedded = 0; - overflow.isolate = 0; stack.clear(); - stack.push_back ({ embeddingLevel, State::Neutral, false }); - } - - // X6: Everything else - // ! (B | BN | RLE | LRE | RLO | LRO | PDF | RLI | LRI | FSI | PDI) - else - { - if (stack.empty()) - break; - - atom.bidiLevel = (uint16_t) stack.back().embeddingLevel; + stack.push_back ({ paragraphLevel, DirectionalState::Neutral, false }); } } } - static void resolveReorderedIndices (std::vector& result, - const Span buffer, - int embeddingLevel, - Range range = {}) + static void l1 (Span buffer, int paragraphLevel) { - range = range.isEmpty() ? Range { 0, static_cast (buffer.size()) } : range; - - std::vector levels ((size_t) range.getLength()); - - for (int i = range.getStart(); i < range.getEnd(); i++) - levels[(size_t) i] = buffer[(size_t) i].bidiLevel; - - // L1: - for (int i = range.getStart(); i < range.getEnd(); i++) + auto reset = [buffer, paragraphLevel] (size_t position) { - auto curr = buffer[(size_t) i]; - - if (contains ({ BidiType::s, BidiType::b }, curr.getBidiType())) - levels[(size_t) i] = embeddingLevel; - - for (int j = i - 1; j >= 0; j--) + for (int i = (int) position; i >= 0; i--) { - curr = buffer[(size_t) j]; + auto& atom = buffer[(size_t) i]; - if (! isIsolate (curr.getBidiType())) + if (contains ({ BidiType::s, BidiType::b, BidiType::ws, BidiType::fsi, BidiType::lri, BidiType::rli, BidiType::pdi }, atom.getBidiType())) + atom.embeddingLevel = (uint16_t) paragraphLevel; + else break; - - levels[(size_t) j] = embeddingLevel; } + }; + + // On each line, reset the embedding level of the following characters to the paragraph embedding level: + for (size_t i = 0; i < buffer.size(); i++) + { + // Segment separators, Paragraph separators. + // Any sequence of whitespace characters and/or isolate formatting characters (FSI, LRI, RLI, and PDI). + if (contains ({ BidiType::s, BidiType::b }, buffer[i].getBidiType())) + reset (i); } - // L2: - result.resize ((size_t) range.getLength()); + // Any sequence of whitespace characters and/or isolate formatting characters (FSI, LRI, RLI, and PDI) at the end of the line. + reset (buffer.size() - 1); + } + + static void l2 (Span result, Span levels) + { + jassert (levels.size() == result.size()); + std::iota (result.begin(), result.end(), 0); const auto high = *std::max_element (levels.begin(), levels.end()); for (int level = high; level > 0; level--) { - for (int i = 0; i < range.getLength(); i++) + for (size_t i = 0; i < levels.size();) { - const auto indexLevel = levels[(size_t) i]; - - if (level > 0 && (indexLevel >= level)) + if (levels[i] >= level) { // Find the longest consecutive run of the current level and above // 1111 = 4 // 1001 = 1 // 1123 = 4 const auto start = i; - const auto end = [start, levels, level] - { - auto e = (size_t) start + 1; - while (e < levels.size() && levels[e] >= level) - e++; + while (i < levels.size() && levels[i] >= level) + i++; - return e; - }(); - - std::reverse (result.begin() + start, result.begin() + (int) end); - i = (int) end; + std::reverse (result.begin() + start, result.begin() + i); + continue; } + + i++; } } } + + static void resolveReorderedIndices (std::vector& result, + const Span buffer, + int paragraphLevel) + { + std::vector levels; + + result.resize (buffer.size()); + levels.resize (buffer.size()); + std::transform (buffer.begin(), buffer.end(), levels.begin(), [] (auto& point) { return (int) point.embeddingLevel; }); + + l1 (buffer, paragraphLevel); + l2 (result, levels); + } }; +//============================================================================== +//============================================================================== +#if JUCE_UNIT_TESTS + +class BidiTests : public UnitTest +{ +public: + BidiTests() : UnitTest ("Unicode Bidi", UnitTestCategories::text) {} + + void runTest() override + { + using BT = BidiType; + + const String LRE { L"\u202A" }; // LTR embed + const String RLE { L"\u202B" }; // RTL embed + const String PDF { L"\u202C" }; // Pop directional embedding + const String RLO { L"\u202E" }; // RTL override + const String RLI { L"\u2067" }; // RTL isolate + const String PDI { L"\u2069" }; // Pop Directional isolate + + beginTest ("TR9 N1"); + { + // Examples from TR9 + expect (n1 (0, { BT::ltr, BT::b, BT::ltr }, { BT::ltr, BT::ltr, BT::ltr })); + expect (n1 (0, { BT::rtl, BT::b, BT::rtl }, { BT::rtl, BT::rtl, BT::rtl })); + expect (n1 (0, { BT::rtl, BT::b, BT::an }, { BT::rtl, BT::rtl, BT::an })); + expect (n1 (0, { BT::rtl, BT::b, BT::en }, { BT::rtl, BT::rtl, BT::en })); + expect (n1 (0, { BT::an, BT::b, BT::rtl }, { BT::an, BT::rtl, BT::rtl })); + expect (n1 (0, { BT::an, BT::b, BT::an }, { BT::an, BT::rtl, BT::an })); + expect (n1 (0, { BT::an, BT::b, BT::en }, { BT::an, BT::rtl, BT::en })); + expect (n1 (0, { BT::en, BT::b, BT::rtl }, { BT::en, BT::rtl, BT::rtl })); + expect (n1 (0, { BT::en, BT::b, BT::an }, { BT::en, BT::rtl, BT::an })); + expect (n1 (0, { BT::en, BT::b, BT::en }, { BT::en, BT::rtl, BT::en })); + + // NI affected by sos/eos + expect (n1 (0, { BT::b, BT::ltr }, { BT::ltr, BT::ltr })); + expect (n1 (1, { BT::b, BT::rtl }, { BT::rtl, BT::rtl })); + + expect (n1 (0, { BT::ltr, BT::b }, { BT::ltr, BT::ltr })); + expect (n1 (0, { BT::b, BT::b }, { BT::ltr, BT::ltr })); + + // NI not surrounded by similar types should not change. + expect (n1 (0, { BT::rtl, BT::b, BT::ltr }, { BT::rtl, BT::b, BT::ltr })); + expect (n1 (0, { BT::ltr, BT::b, BT::rtl }, { BT::ltr, BT::b, BT::rtl })); + } + + beginTest ("TR9 N2"); + { + // Examples from TR9 + expect (n2 (0, { BT::ltr, BT::b }, { BT::ltr, BT::ltr })); + expect (n2 (1, { BT::ltr, BT::b }, { BT::ltr, BT::rtl })); + expect (n2 (0, { BT::rtl, BT::b }, { BT::rtl, BT::ltr })); + expect (n2 (1, { BT::rtl, BT::b }, { BT::rtl, BT::rtl })); + expect (n2 (0, { BT::b, BT::ltr }, { BT::ltr, BT::ltr })); + expect (n2 (1, { BT::b, BT::ltr }, { BT::rtl, BT::ltr })); + expect (n2 (0, { BT::b, BT::rtl }, { BT::ltr, BT::rtl })); + expect (n2 (1, { BT::b, BT::rtl }, { BT::rtl, BT::rtl })); + } + + beginTest ("TR9 Paragraph Embedding Level"); + { + // Examples from TR9 + expect (resolveParagraphEmbeddingLevel ({ BT::ltr, BT::ltr, BT::ltr }, 0)); + expect (resolveParagraphEmbeddingLevel ({ BT::rtl, BT::ltr, BT::ltr }, 1)); + expect (resolveParagraphEmbeddingLevel ({ BT::ltr, BT::rtl, BT::ltr, BT::rtl }, 0)); + expect (resolveParagraphEmbeddingLevel ({ BT::rtl, BT::ltr, BT::rtl, BT::ltr }, 1)); + expect (resolveParagraphEmbeddingLevel ({}, 0)); + expect (resolveParagraphEmbeddingLevel ({ BT::ltr }, 0)); + expect (resolveParagraphEmbeddingLevel ({ BT::rtl }, 1)); + expect (resolveParagraphEmbeddingLevel ({ BT::ltr, BT::rtl, BT::rtl }, 0)); + expect (resolveParagraphEmbeddingLevel ({ BT::rtl, BT::ltr, BT::ltr }, 1)); + expect (resolveParagraphEmbeddingLevel ({ BT::ltr, BT::rtl, BT::ltr, BT::rtl, BT::ltr }, 0)); + expect (resolveParagraphEmbeddingLevel ({ BT::rtl, BT::ltr, BT::rtl, BT::ltr, BT::rtl }, 1)); + } + + { + constexpr auto neutral = BT::bn; + + beginTest ("TR9 W1"); + expect (resolveWeakTypes (TR9::w1, 1, { BT::al, BT::nsm, BT::nsm }, { BT::al, BT::al, BT::al })); + expect (resolveWeakTypes (TR9::w1, 1, { BT::nsm }, { BT::rtl })); + expect (resolveWeakTypes (TR9::w1, 1, { BT::lri, BT::nsm }, { BT::lri, BT::on })); + expect (resolveWeakTypes (TR9::w1, 1, { BT::pdi, BT::nsm }, { BT::pdi, BT::on })); + + beginTest ("TR9 W2"); + expect (resolveWeakTypes (TR9::w2, 0, { BT::al, BT::en }, { BT::al, BT::an })); + expect (resolveWeakTypes (TR9::w2, 0, { BT::al, neutral, BT::en }, { BT::al, neutral, BT::an })); + expect (resolveWeakTypes (TR9::w2, 0, { neutral, BT::en }, { neutral, BT::en })); + expect (resolveWeakTypes (TR9::w2, 0, { BT::ltr, neutral, BT::en }, { BT::ltr, neutral, BT::en })); + expect (resolveWeakTypes (TR9::w2, 0, { BT::rtl, neutral, BT::en }, { BT::rtl, neutral, BT::en })); + + beginTest ("TR9 W3"); + expect (resolveWeakTypes (TR9::w3, 0, { BT::al }, { BT::rtl })); + + beginTest ("TR9 W4"); + expect (resolveWeakTypes (TR9::w4, 0, { BT::en, BT::es, BT::en }, { BT::en, BT::en, BT::en })); + expect (resolveWeakTypes (TR9::w4, 0, { BT::en, BT::cs, BT::en }, { BT::en, BT::en, BT::en })); + expect (resolveWeakTypes (TR9::w4, 0, { BT::an, BT::cs, BT::an }, { BT::an, BT::an, BT::an })); + + beginTest ("TR9 W5"); + expect (resolveWeakTypes (TR9::w5, 0, { BT::et, BT::et, BT::en }, { BT::en, BT::en, BT::en })); + expect (resolveWeakTypes (TR9::w5, 0, { BT::en, BT::et, BT::et }, { BT::en, BT::en, BT::en })); + expect (resolveWeakTypes (TR9::w5, 0, { BT::an, BT::et, BT::en }, { BT::an, BT::en, BT::en })); + + beginTest ("TR9 W6"); + expect (resolveWeakTypes (TR9::w6, 0, { BT::an, BT::et }, { BT::an, BT::on })); + expect (resolveWeakTypes (TR9::w6, 0, { BT::ltr, BT::es, BT::en }, { BT::ltr, BT::on, BT::en })); + expect (resolveWeakTypes (TR9::w6, 0, { BT::en, BT::cs, BT::an }, { BT::en, BT::on, BT::an })); + expect (resolveWeakTypes (TR9::w6, 0, { BT::et, BT::an }, { BT::on, BT::an })); + + beginTest ("TR9 W7"); + expect (resolveWeakTypes (TR9::w7, 0, { BT::ltr, neutral, BT::en }, { BT::ltr, neutral, BT::ltr })); + expect (resolveWeakTypes (TR9::w7, 1, { BT::rtl, neutral, BT::en }, { BT::rtl, neutral, BT::en })); + } + + beginTest ("TR9 I1"); + { + expect (resolveImplicitTypes (0, { BT::rtl, BT::an, BT::en }, { 1, 2, 2 })); + expect (resolveImplicitTypes (0, { BT::ltr, BT::ltr, BT::ltr }, { 0, 0, 0 })); + expect (resolveImplicitTypes (0, { BT::an, BT::ltr, BT::rtl }, { 2, 0, 1 })); + } + + beginTest ("TR9 I2"); + { + expect (resolveImplicitTypes (1, { BT::ltr, BT::en, BT::an }, { 2, 2, 2 })); + expect (resolveImplicitTypes (1, { BT::rtl, BT::rtl, BT::rtl }, { 1, 1, 1 })); + expect (resolveImplicitTypes (1, { BT::an }, { 2 })); + } + + beginTest ("X1 - X8"); + { + expect (resolveExplicitLevels (0, String { "zero" } + RLE + "one" + PDF + "zero", + "0000" "0" "111" "1" "0000")); + + expect (resolveExplicitLevels (1, String { "one" } + PDF + "one", + "111" "1" "111")); + + // Nested embedding: + expect (resolveExplicitLevels (0, String { "zero" } + RLE + "one" + RLE + "333" + PDF + PDF + "zero", + "0000" "0" "111" "1" "333" "3" "3" "0000")); + + // Directional override: + expect (resolveExplicitLevels (0, String { "abc" } + RLO + "def" + PDF + "ghi", + "000" "0" "111" "1" "000")); + // Mixed embedding and overrides: + expect (resolveExplicitLevels (0, String { "abc" } + LRE + "lmn" + PDF + "def" + RLO + "ghi" + PDF + "jkl", + "000" "0" "222" "2" "000" "0" "111" "1" "000")); + + // Multiple PDFs: + expect (resolveExplicitLevels (0, String { "abc" } + RLE + "def" + RLE + "ghi" + PDF + PDF + "jkl", + "000" "0" "111" "1" "333" "3" "3" "000")); + + // Isolates: + expect (resolveExplicitLevels (0, String { "abc" } + RLI + "def" + PDI + "ghi", + "000" "0" "111" "0" "000")); + + // Overflows and isolates: + // PDIs are not removed from the output string + expect (resolveExplicitLevels (0, String { "abc" } + LRE + "rlm" + RLE + "def" + PDF + "ghi" + PDI + "jkl", + "000" "0" "222" "2" "333" "3" "222" "2" "222")); + + // Paragraph separator: + expect (resolveExplicitLevels (0, String { "abc" } + RLE + "def" + PDF + "\nxyz", + "000" "0" "111" "1" "0000")); + + // Complex nesting: + expect (resolveExplicitLevels (0, String { "abc" } + RLE + "lmn" + RLO + "opq" + PDF + "xyz" + PDF + "jkl", + "000" "0" "111" "1" "333" "3" "111" "1" "000")); + + // Multiple embeddings: + expect (resolveExplicitLevels (0, String { "abc" } + RLE + "lmn" + RLE + "lpq" + PDF + "rs" + PDF + "xyz", + "000" "0" "111" "1" "333" "3" "11" "1" "000")); + } + + beginTest ("TR9 L1"); + { + expect (l1 (0, { BidiType::ws, BidiType::s }, "10", "00")); + expect (l1 (1, { BidiType::ws, BidiType::s }, "10", "11")); + expect (l1 (4, { BidiType::ws, BidiType::ws, BidiType::ws }, "000", "444")); + + expect (l1 (0, { BidiType::fsi, BidiType::pdi, BidiType::rli, BidiType::lri }, "1111", "0000")); + expect (l1 (0, { BidiType::fsi, BidiType::pdi, BidiType::ws, BidiType::lri }, "1111", "0000")); + + expect (l1 (0, { BidiType::fsi, BidiType::an, BidiType::rli, BidiType::lri }, "1100", "1100")); + } + + beginTest ("TR9 L2"); + { + expect (l2 ("abc", "000", "abc")); + expect (l2 ("abc", "111", "cba")); + + expect (l2 ("car MEANS CAR.", + "22211111111111", + ".RAC SNAEM car")); + + expect (l2 ("he said \"car MEANS CAR.\" \"IT DOES,\" she agreed.", + "00000000022211111111110000111111100000000000000", + "he said \"RAC SNAEM car.\" \"SEOD TI,\" she agreed.")); + + expect (l2 ("DID YOU SAY \'he said \"car MEANS CAR\"\'?", + "11111111111112222222224443333333333211", + "?\'he said \"RAC SNAEM car\"\' YAS UOY DID")); + } + } + + static std::vector generateTestUAPs (std::initializer_list bts, int embeddingLevel = 0) + { + static const std::unordered_map map = + { + { BidiType::ltr, 0x0041 }, + { BidiType::rtl, 0x05d0 }, + { BidiType::b, 0x2029 }, + { BidiType::s, 0x001F }, + { BidiType::en, 0x0032 }, + { BidiType::an, 0x0664 }, + { BidiType::es, '+' }, + { BidiType::et, '%' }, + { BidiType::cs, '.' }, + { BidiType::ws, ' ' }, + { BidiType::nsm, 0x0300 }, + { BidiType::al, 0x0642 }, + { BidiType::bn, 0x0000 }, + { BidiType::pdi, 0x2069 }, + { BidiType::lri, 0x2066 }, + { BidiType::rli, 0x2067 }, + { BidiType::fsi, 0x2068 } + }; + + std::vector out; + out.reserve (bts.size()); + + for (const auto& bt : bts) + { + const auto character = map.find (bt); + + if (character == map.end()) + { + jassertfalse; + return {}; + } + + // Sanity check! + const auto realBT = UnicodeDataTable::getDataForCodepoint (character->second).bidi; + + if (bt != realBT) + { + jassertfalse; + return {}; + } + + UnicodeEntry entry{}; + entry.bidi = bt; + + out.emplace_back (character->second, std::move (entry)); + } + + for (auto& p : out) + p.embeddingLevel = (uint16_t) embeddingLevel; + + return out; + } + + static bool n1 (int paragraphLevel, std::initializer_list input, std::initializer_list expected) + { + auto uaps = generateTestUAPs (input); + + if (uaps.empty()) + return false; + + TR9::n1 (uaps, paragraphLevel); + return checkUAPs (uaps, expected); + } + + static bool n2 (int embeddingLevel, std::initializer_list input, std::initializer_list expected) + { + auto uaps = generateTestUAPs (input, embeddingLevel); + + if (uaps.empty()) + return false; + + TR9::n2 (uaps); + return checkUAPs (uaps, expected); + } + + static bool l1 (int paragraphLevel, std::initializer_list bts, const String& levels, const String& expected) + { + auto uaps = generateTestUAPs (bts); + + if (uaps.empty()) + return false; + + if ((int) uaps.size() != levels.length() || (int) uaps.size() != expected.length()) + { + jassertfalse; + return false; + } + + for (int i = 0; i < (int) uaps.size(); i++) + uaps[(size_t) i].embeddingLevel = (uint16_t) levels.substring (i, i + 1).getIntValue(); + + TR9::l1 (uaps, paragraphLevel); + + const auto result = [&uaps] + { + String s; + + for (auto uap : uaps) + s << (int) uap.embeddingLevel; + + return s; + }(); + + return result == expected; + } + + static bool l2 (const String& text, const String& levels, const String& expected) + { + const auto utf32 = text.toUTF32(); + const auto length = utf32.length(); + + std::vector levelVec; + std::vector reorderVec; + + levelVec.resize (length); + reorderVec.resize (length); + + for (size_t i = 0; i < length; i++) + { + reorderVec[0] = 0; + levelVec[i] = (uint16_t) levels.substring ((int) i, (int) i + 1).getIntValue(); + } + + TR9::l2 (reorderVec, levelVec); + + const auto result = [&reorderVec, text] + { + String s; + + for (auto level : reorderVec) + s << text[level]; + + return s; + }(); + + return result == expected; + } + + static bool resolveParagraphEmbeddingLevel (std::initializer_list bts, int expected) + { + auto uaps = generateTestUAPs (bts); + + if (bts.size() > 0 && uaps.empty()) + return false; + + return TR9::resolveParagraphEmbeddingLevel (uaps) == expected; + } + + static bool resolveWeakTypes (void (*func) (const TR9::WeakContext&), int paragraphLevel, std::initializer_list bts, + std::initializer_list expected) + { + auto uaps = generateTestUAPs (bts); + + if (uaps.empty()) + return false; + + for (size_t i = 0; i < uaps.size(); i++) + { + const auto sos = i == 0; + const auto eos = i == uaps.size() - 1; + + auto context = TR9::WeakContext { i, + uaps, + sos ? BidiType::on : uaps[i - 1].getBidiType(), + eos ? BidiType::on : uaps[i + 1].getBidiType(), + paragraphLevel }; + + func (context); + } + + return checkUAPs (uaps, expected); + } + + static bool resolveImplicitTypes (int embeddingLevel, std::initializer_list bts, std::initializer_list expected) + { + auto uaps = generateTestUAPs (bts, embeddingLevel); + + if (uaps.empty()) + return false; + + TR9::resolveImplicitTypes (uaps); + + for (size_t i = 0; i < uaps.size(); i++) + if (uaps[i].embeddingLevel != expected.begin()[i]) + return false; + + return true; + } + + static bool resolveExplicitLevels (int embeddingLevel, const String& input, const String& expectedLevels) + { + std::vector uaps; + + const auto inputChars = input.toUTF32(); + uaps.reserve (inputChars.length()); + + for (size_t i = 0; i < inputChars.length(); i++) + { + const auto codepoint = inputChars[(int) i]; + auto data = UnicodeDataTable::getDataForCodepoint ((uint32_t) codepoint); + + UnicodeAnalysisPoint uap { (char32_t) codepoint, data }; + uap.embeddingLevel = (uint16_t) embeddingLevel; + uaps.push_back (uap); + } + + TR9::resolveExplicitLevels (uaps, embeddingLevel); + + const auto levels = [&] { + String s; + + for (auto point : uaps) + s << (int) point.embeddingLevel; + + return s; + }(); + + return levels == expectedLevels; + } + + static bool checkUAPs (Span uaps, std::initializer_list expected) + { + return std::equal (uaps.begin(), uaps.end(), expected.begin(), [] (UnicodeAnalysisPoint uap, BidiType bt) + { + return uap.getBidiType() == bt; + }); + } +}; + +static BidiTests bidiTests; + +#endif + } // namespace juce diff --git a/modules/juce_graphics/unicode/juce_UnicodeUtils.cpp b/modules/juce_graphics/unicode/juce_UnicodeUtils.cpp index 180581931b..d6da995fce 100644 --- a/modules/juce_graphics/unicode/juce_UnicodeUtils.cpp +++ b/modules/juce_graphics/unicode/juce_UnicodeUtils.cpp @@ -49,7 +49,7 @@ struct UnicodeAnalysisPoint { char32_t character = 0; UnicodeEntry data{}; - uint16_t bidiLevel = 0; + uint16_t embeddingLevel = 0; UnicodeAnalysisPoint (char32_t characterIn, UnicodeEntry entry) : character { characterIn },