1
0
Fork 0
mirror of https://github.com/juce-framework/JUCE.git synced 2026-01-11 23:54:18 +00:00
JUCE/modules/juce_graphics/unicode/juce_UnicodeBidi.cpp
Oliver James 44a750df40 Unicode: Ignore punctuation when resolving implicit characters
This commit implements fix for an issue where mixed punctuation can be rendered in the wrong order.

A regression test has been added to catch this in the future.
2024-05-23 12:54:13 +01:00

648 lines
22 KiB
C++

/*
==============================================================================
This file is part of the JUCE framework.
Copyright (c) Raw Material Software Limited
JUCE is an open source framework subject to commercial or open source
licensing.
By downloading, installing, or using the JUCE framework, or combining the
JUCE framework with any other source code, object code, content or any other
copyrightable work, you agree to the terms of the JUCE End User Licence
Agreement, and all incorporated terms including the JUCE Privacy Policy and
the JUCE Website Terms of Service, as applicable, which will bind you. If you
do not agree to the terms of these agreements, we will not license the JUCE
framework to you, and you must discontinue the installation or download
process and cease use of the JUCE framework.
JUCE End User Licence Agreement: https://juce.com/legal/juce-8-licence/
JUCE Privacy Policy: https://juce.com/juce-privacy-policy
JUCE Website Terms of Service: https://juce.com/juce-website-terms-of-service/
Or:
You may also use this code under the terms of the AGPLv3:
https://www.gnu.org/licenses/agpl-3.0.en.html
THE JUCE FRAMEWORK IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL
WARRANTIES, WHETHER EXPRESSED OR IMPLIED, INCLUDING WARRANTY OF
MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED.
==============================================================================
*/
namespace juce
{
class TR9
{
public:
TR9() = delete;
struct BidiOutput
{
int embeddingLevel = -1;
std::vector<int> resolvedLevels;
std::vector<int> visualOrder;
};
static void analyseBidiRun (BidiOutput& output,
Span<UnicodeAnalysisPoint> stream,
std::optional<TextDirection> directionOverride = {})
{
// BD1
const auto baseLevel = directionOverride.has_value() ? (*directionOverride == TextDirection::rtl ? 1 : 0)
: resolveParagraphEmbeddingLevel (stream);
std::for_each (stream.begin(), stream.end(), [baseLevel] (auto& atom)
{
atom.bidiLevel = (uint16_t) baseLevel;
});
resolveExplicitLevels (stream, baseLevel);
// X9 replace override characters
const auto pred = [] (auto atom) { return contains ({ BidiType::rle, BidiType::lre,
BidiType::lro, BidiType::rlo,
BidiType::pdf }, atom.getBidiType()); };
std::for_each (stream.begin(),
stream.end(),
[&pred] (auto& atom)
{
if (! pred (atom))
return;
auto copy = atom;
copy.setBidiType (BidiType::bn);
atom = copy;
});
// W1-W7
resolveWeakTypes (stream, {});
// N0-N2
resolveNeutralTypes (stream, baseLevel, {});
// I1-I2
resolveImplicitTypes (stream, baseLevel, {});
output.embeddingLevel = baseLevel;
output.resolvedLevels.clear();
for (const auto& atom : stream)
output.resolvedLevels.push_back (atom.bidiLevel);
resolveReorderedIndices (output.visualOrder, stream, baseLevel, {});
}
private:
enum EmbeddingLevel
{
left = 0,
right = 1
};
static auto isOdd (int level) { return bool (level & 1); }
static auto computeLeastEven (int level) { return isOdd (level) ? level + 1 : level + 2; }
static auto computeLeastOdd (int level) { return isOdd (level) ? level + 2 : level + 1; }
static auto getEmbeddingDirection (int level) { return isOdd (level) ? BidiType::rtl : BidiType::ltr; }
static bool isStrong (const UnicodeAnalysisPoint& x)
{
return contains ({ BidiType::rtl, BidiType::ltr, BidiType::al }, x.getBidiType());
}
static bool isNeutral (const UnicodeAnalysisPoint& x)
{
return contains ({ BidiType::b, BidiType::s, BidiType::ws, BidiType::on }, x.getBidiType());
}
static bool isIsolateInitiator (BidiType x)
{
return contains ({ BidiType::lri, BidiType::rli, BidiType::fsi }, x);
}
static bool isIsolateTerminator (BidiType x)
{
return contains ({ BidiType::pdi }, x);
}
static bool isIsolate (BidiType x)
{
return isIsolateInitiator (x) || isIsolateTerminator (x);
}
static int resolveParagraphEmbeddingLevel (const Span<UnicodeAnalysisPoint> buffer, Range<int> range = {})
{
range = range.isEmpty() ? Range<int> { 0, (int) buffer.size() } : range;
auto seek = [buffer] (BidiType type, Range<int> seekRange) -> int
{
for (int i = seekRange.getStart(); i < seekRange.getEnd(); i++)
{
if (buffer[(size_t) i].data.bidi == type)
return i;
}
return seekRange.getEnd();
};
for (int i = range.getStart(); i < range.getEnd(); i++)
{
const auto& atom = buffer[(size_t) i];
if (isStrong (atom))
return atom == BidiType::ltr ? EmbeddingLevel::left : EmbeddingLevel::right;
if (isIsolateInitiator (atom.getBidiType()))
{
// skip to past matching PDI or EOS
const auto end = seek (BidiType::pdi, { i, range.getEnd() });
i = end != range.getEnd() ? end + 1 : range.getEnd();
}
}
return EmbeddingLevel::left;
}
static void resolveNeutralTypes (Span<UnicodeAnalysisPoint> buffer, int embeddingLevel, Range<int> range)
{
range = range.isEmpty() ? Range<int> { 0, (int) buffer.size() } : range;
// BD13:
const auto bracketRanges = [buffer, range]
{
struct Bracket
{
int position;
uint32_t character;
Brackets::Kind type;
};
// https://www.unicode.org/reports/tr9/#BD16
constexpr auto maxStackSize = 63;
std::vector<Bracket> stack;
stack.reserve (maxStackSize);
std::vector<Range<int>> brackets;
for (int i = range.getStart(); i < range.getEnd(); i++)
{
const auto& curr = buffer[(size_t) i];
if (curr == BidiType::on)
{
const auto type = Brackets::getKind (curr.character);
if (type == Brackets::Kind::open)
{
if (stack.size() == stack.capacity())
return brackets;
stack.push_back ({ i, curr.character, Brackets::Kind::open });
}
else if (type == Brackets::Kind::close)
{
while (! stack.empty())
{
const auto head = stack.back();
stack.pop_back();
if (head.type == Brackets::Kind::open)
{
if (Brackets::isMatchingPair (head.character, curr.character))
{
brackets.push_back ({ head.position, i });
break;
}
}
}
}
}
}
return brackets;
}();
// N0:
for (auto bracketRange : bracketRanges)
{
const auto dir = getEmbeddingDirection (embeddingLevel);
const Span span { buffer.data() + bracketRange.getStart(), (size_t) bracketRange.getLength() };
const auto strong = std::find_if (span.begin(), span.end(), [] (const UnicodeAnalysisPoint& atom)
{
return isStrong (atom);
});
if (strong != span.end())
{
if (*strong == dir)
{
// B:
buffer[(size_t) bracketRange.getStart()].setBidiType (dir);
buffer[(size_t) bracketRange.getEnd()].setBidiType (dir);
}
else
{
// C:
const auto strongContext = [&buf = std::as_const (buffer),
start = bracketRange.getStart(),
dir]
{
for (int i = start; i >= 0; i--)
{
if (isStrong (buf[(size_t) i]))
return buf[(size_t) i].getBidiType();
}
return dir;
}();
buffer[(size_t) bracketRange.getStart()].setBidiType (strongContext);
buffer[(size_t) bracketRange.getEnd()].setBidiType (strongContext);
}
}
}
// N1:
for (int i = range.getStart(); i < range.getEnd(); i++)
{
const auto curr = buffer[(size_t) i];
const auto prevBidiType = i > 0 ? buffer[(size_t) i - 1].getBidiType() : BidiType::none; // SOS type?
if (isNeutral (curr) || isIsolate (curr.getBidiType()))
{
const auto endIndex = [buffer, start = i, end = range.getEnd()]
{
for (int j = start; j < end; j++)
{
const auto atom = buffer[(size_t) j];
if (! (isNeutral (atom) || isIsolate (atom.getBidiType())))
return j;
}
return end - 1;
}();
auto isNumber = [] (BidiType type) { return contains ({ BidiType::an, BidiType::en }, type); };
const auto start = isNumber (prevBidiType) ? BidiType::rtl : prevBidiType;
const auto end = isNumber (buffer[(size_t) endIndex].getBidiType()) ? BidiType::rtl : buffer[(size_t) endIndex].getBidiType();
const auto type = start == end ? start : getEmbeddingDirection (embeddingLevel);
std::for_each (buffer.begin() + i, buffer.begin() + endIndex, [type] (UnicodeAnalysisPoint& atom)
{
atom.setBidiType (type);
});
i = endIndex;
}
}
}
static void resolveWeakTypes (Span<UnicodeAnalysisPoint> buffer, Range<int> range)
{
range = range.isEmpty() ? Range<int> { 0, static_cast<int> (buffer.size()) } : range;
for (int i = range.getStart(); i < range.getEnd(); i++)
{
auto& curr = buffer[(size_t) i];
const auto prevBidiType = i > 0 ? buffer[(size_t) i - 1].getBidiType() : BidiType::none;
const auto nextBidiType = i < range.getEnd() - 1 ? buffer[(size_t) i + 1].getBidiType() : BidiType::none;
// W1:
if (curr == BidiType::nsm)
curr.setBidiType (isIsolate (prevBidiType) ? BidiType::on : prevBidiType);
// W2:
else if (curr == BidiType::en)
{
for (int j = i - 1; j >= 1; j--)
{
if (buffer[(size_t) j] == BidiType::al)
curr.setBidiType (BidiType::al);
if (isStrong (buffer[(size_t) j]))
break;
}
}
// W3:
else if (curr == BidiType::al)
curr.setBidiType (BidiType::rtl);
// W4:
else if (curr == BidiType::es || curr == BidiType::cs)
{
if (prevBidiType == BidiType::en && nextBidiType == BidiType::en)
{
curr.setBidiType (BidiType::en);
}
else if (curr == BidiType::cs)
{
if (prevBidiType == BidiType::an && nextBidiType == BidiType::an)
curr.setBidiType (BidiType::an);
}
}
// W5:
else if (curr == BidiType::et)
{
if (prevBidiType == BidiType::en || nextBidiType == BidiType::en)
curr.setBidiType (BidiType::en);
}
// W6
if (contains ({ BidiType::es, BidiType::cs }, curr.getBidiType()))
curr.setBidiType (BidiType::on);
// W7:
else if (curr == BidiType::en)
{
for (int j = i - 1; j >= 1; j--)
{
if (buffer[(size_t) j] == BidiType::ltr)
curr.setBidiType (BidiType::ltr);
if (isStrong (buffer[(size_t) j]))
break;
}
}
}
}
static void resolveImplicitTypes (Span<UnicodeAnalysisPoint> buffer, int embeddingLevel, Range<int> range)
{
range = range.isEmpty() ? Range<int> { 0, static_cast<int> (buffer.size()) } : range;
// I1, I2
// https://www.unicode.org/reports/tr9/#Resolving_Implicit_Levels
for (int i = range.getStart(); i < range.getEnd(); i++)
{
auto& curr = buffer[(size_t) i];
const auto level = (uint16_t) embeddingLevel;
const auto isEven = isOdd (level) == false;
if (curr.getGeneralCategory() != GeneralCategory::pc)
{
JUCE_BEGIN_IGNORE_WARNINGS_GCC_LIKE ("-Wswitch-enum")
switch (curr.getBidiType())
{
case BidiType::ltr: curr.bidiLevel = (isEven ? level : level + 1); break;
case BidiType::rtl: curr.bidiLevel = (isEven ? level + 1 : level ); break;
default: break;
}
JUCE_END_IGNORE_WARNINGS_GCC_LIKE
}
}
}
static void resolveExplicitLevels (Span<UnicodeAnalysisPoint> buffer, int embeddingLevel)
{
struct State
{
enum DirectionalOverride { Neutral, rtl, ltr };
int embeddingLevel;
DirectionalOverride directionalOverride;
bool isolateStatus;
};
// X1
struct OverflowState
{
int isolate = 0;
int embedded = 0;
};
// https://www.unicode.org/reports/tr9/#BD2
static constexpr auto maxStackSize = 125;
std::vector<State> stack;
stack.reserve (maxStackSize);
OverflowState overflow;
[[maybe_unused]] const auto canPush = [&stack] { return stack.size() < maxStackSize; };
const auto isValid = [&] (auto value) { return (value < maxStackSize) && (overflow.isolate == 0 && overflow.embedded == 0); };
stack.push_back ({ embeddingLevel, State::Neutral, false });
// X2-X6a
for (auto& atom : buffer)
{
// X2-X3: Explicit embeddings
if (atom == BidiType::rle || atom == BidiType::lre)
{
if (stack.empty())
break;
auto& head = stack.back();
head.embeddingLevel = atom == BidiType::rle ? computeLeastOdd (head.embeddingLevel)
: computeLeastEven (head.embeddingLevel);
if (isValid (head.embeddingLevel))
{
head.directionalOverride = State::Neutral;
head.isolateStatus = false;
jassert (canPush());
stack.push_back (stack.back());
}
else if (overflow.isolate == 0)
{
overflow.embedded++;
}
}
// X4-X5: Explicit Overrides
else if (atom == BidiType::rlo || atom == BidiType::lro)
{
if (stack.empty())
break;
auto& head = stack.back();
head.embeddingLevel = atom == BidiType::rlo ? computeLeastOdd (head.embeddingLevel)
: computeLeastEven (head.embeddingLevel);
if (isValid (head.embeddingLevel))
{
head.directionalOverride = atom == BidiType::rlo ? State::rtl : State::ltr;
head.isolateStatus = false;
jassert (canPush());
stack.push_back (stack.back());
}
else if (overflow.isolate == 0)
{
overflow.embedded++;
}
}
// X5a-X5b: Isolates
else if (atom == BidiType::rli || atom == BidiType::lri)
{
if (stack.empty())
break;
auto& head = stack.back();
head.embeddingLevel = atom == BidiType::rli ? computeLeastOdd (head.embeddingLevel)
: computeLeastEven (head.embeddingLevel);
if (head.directionalOverride == State::ltr)
atom.setBidiType (BidiType::ltr);
else if (head.directionalOverride == State::rtl)
atom.setBidiType (BidiType::rtl);
if (isValid (head.embeddingLevel))
{
head.directionalOverride = State::Neutral;
head.isolateStatus = true;
jassert (canPush());
stack.push_back (stack.back());
}
else
{
overflow.isolate++;
}
}
// X6a: Terminating Isolates
else if (atom == BidiType::pdi)
{
if (overflow.isolate > 0)
{
overflow.isolate--;
}
else
{
overflow.embedded = 0;
while (! stack.empty() && ! stack.back().isolateStatus)
stack.pop_back();
if (! stack.empty())
stack.pop_back();
}
if (stack.empty())
break;
atom.bidiLevel = (uint16_t) stack.back().embeddingLevel;
}
// X7
else if (atom == BidiType::pdf)
{
if (overflow.isolate > 0)
{
overflow.isolate--;
}
else if (overflow.embedded > 0)
{
overflow.embedded--;
}
else if (stack.size() >= 2 && stack.back().isolateStatus == false)
{
stack.pop_back();
}
}
// X8
else if (atom == BidiType::b)
{
if (stack.empty())
break;
atom.bidiLevel = (uint16_t) stack.back().embeddingLevel;
overflow.embedded = 0;
overflow.isolate = 0;
stack.clear();
stack.push_back ({ embeddingLevel, State::Neutral, false });
}
// X6: Everything else
// ! (B | BN | RLE | LRE | RLO | LRO | PDF | RLI | LRI | FSI | PDI)
else
{
if (stack.empty())
break;
atom.bidiLevel = (uint16_t) stack.back().embeddingLevel;
}
}
}
static void resolveReorderedIndices (std::vector<int>& result,
const Span<UnicodeAnalysisPoint> buffer,
int embeddingLevel,
Range<int> range = {})
{
range = range.isEmpty() ? Range<int> { 0, static_cast<int> (buffer.size()) } : range;
std::vector<int> levels ((size_t) range.getLength());
for (int i = range.getStart(); i < range.getEnd(); i++)
levels[(size_t) i] = buffer[(size_t) i].bidiLevel;
// L1:
for (int i = range.getStart(); i < range.getEnd(); i++)
{
auto curr = buffer[(size_t) i];
if (contains ({ BidiType::s, BidiType::b }, curr.getBidiType()))
levels[(size_t) i] = embeddingLevel;
for (int j = i - 1; j >= 0; j--)
{
curr = buffer[(size_t) j];
if (! isIsolate (curr.getBidiType()))
break;
levels[(size_t) j] = embeddingLevel;
}
}
// L2:
result.resize ((size_t) range.getLength());
std::iota (result.begin(), result.end(), 0);
const auto high = *std::max_element (levels.begin(), levels.end());
for (int level = high; level > 0; level--)
{
for (int i = 0; i < range.getLength(); i++)
{
const auto indexLevel = levels[(size_t) i];
if (level > 0 && (indexLevel >= level))
{
// Find the longest consecutive run of the current level and above
// 1111 = 4
// 1001 = 1
// 1123 = 4
const auto start = i;
const auto end = [start, levels, level]
{
auto e = (size_t) start + 1;
while (e < levels.size() && levels[e] >= level)
e++;
return e;
}();
std::reverse (result.begin() + start, result.begin() + (int) end);
i = (int) end;
}
}
}
}
};
} // namespace juce