1
0
Fork 0
mirror of https://github.com/juce-framework/JUCE.git synced 2026-01-10 23:44:24 +00:00

UnicodeBidi: Replace implementation with SheenBidi

This commit is contained in:
reuk 2024-07-09 20:21:19 +01:00
parent 107a383bb4
commit 9076639742
No known key found for this signature in database
GPG key ID: FCB43929F012EE5C
4 changed files with 292 additions and 1247 deletions

View file

@ -289,12 +289,6 @@ constexpr hb_script_t getScriptTag (TextScript type)
return HB_SCRIPT_COMMON;
}
struct ResolvedFontRun
{
Font font;
Span<const Unicode::Codepoint> run;
};
SimpleShapedText::SimpleShapedText (const String* data,
const ShapedTextOptions& options)
: string (*data)
@ -393,7 +387,7 @@ static std::vector<ShapedGlyph> lowLevelShape (const String& string,
const Font& font,
TextScript script,
const String& language,
TextDirection direction)
uint8_t embeddingLevel)
{
HbBuffer buffer { hb_buffer_create() };
hb_buffer_clear_contents (buffer.get());
@ -403,7 +397,7 @@ static std::vector<ShapedGlyph> lowLevelShape (const String& string,
hb_buffer_set_language (buffer.get(), hb_language_from_string (language.toRawUTF8(), -1));
hb_buffer_set_direction (buffer.get(),
direction == TextDirection::ltr ? HB_DIRECTION_LTR : HB_DIRECTION_RTL);
(embeddingLevel % 2) == 0 ? HB_DIRECTION_LTR : HB_DIRECTION_RTL);
Utf8Lookup utf8Lookup { string };
@ -495,7 +489,7 @@ static std::vector<ShapedGlyph> lowLevelShape (const String& string,
// control characters that JUCE doesn't know how to handle appropriately.
jassert (unknownGlyph == infos.end());
[[maybe_unused]] const auto trackingAmount = (! HB_DIRECTION_IS_VERTICAL (direction) && ! trackingIsDefault)
[[maybe_unused]] const auto trackingAmount = ! trackingIsDefault
? font.getHeight() * tracking
: 0;
@ -505,7 +499,7 @@ static std::vector<ShapedGlyph> lowLevelShape (const String& string,
for (size_t i = 0; i < infos.size(); ++i)
{
const auto j = direction == TextDirection::ltr ? i : infos.size() - 1 - i;
const auto j = (embeddingLevel % 2) == 0 ? i : infos.size() - 1 - i;
const auto glyphId = infos[j].codepoint;
const auto xAdvance = positions[j].x_advance;
@ -644,7 +638,7 @@ struct ShapingParams
{
TextScript script;
String language;
TextDirection direction;
uint8_t embeddingLevel;
Font resolvedFont;
};
@ -784,7 +778,7 @@ private:
shapingParams.resolvedFont,
shapingParams.script,
shapingParams.language,
shapingParams.direction);
shapingParams.embeddingLevel);
recalculateAdvances();
}
@ -820,50 +814,6 @@ struct LineChunkInLogicalOrder
int bidiLevel{};
};
static auto getStartingVisualIndices (const std::vector<LineChunkInLogicalOrder>& chunks,
const Array<Unicode::Codepoint>& analysis)
{
std::vector<size_t> indices (chunks.size());
std::transform (chunks.cbegin(),
chunks.cend(),
indices.begin(),
[&analysis] (auto& c) { return analysis[(int) c.textRange.getStart()].visualIndex; });
return indices;
}
static auto getChunkIndicesInVisualOrder (const std::vector<LineChunkInLogicalOrder>& chunks,
const Array<Unicode::Codepoint>& analysis)
{
const auto startingVisualIndices = getStartingVisualIndices (chunks, analysis);
struct ChunkIndexWithVisualIndex
{
size_t chunkIndex{};
size_t visualIndex{};
};
std::vector<ChunkIndexWithVisualIndex> sortableIndices;
sortableIndices.reserve (std::size (startingVisualIndices));
for (const auto [i, visualIndex] : enumerate (startingVisualIndices))
sortableIndices.push_back (ChunkIndexWithVisualIndex { (size_t) i, visualIndex });
std::sort (sortableIndices.begin(),
sortableIndices.end(),
[] (const auto& a, const auto& b) { return a.visualIndex < b.visualIndex; });
std::vector<size_t> result (std::size (sortableIndices));
std::transform (sortableIndices.begin(),
sortableIndices.end(),
result.begin(),
[] (auto x) { return x.chunkIndex; });
return result;
}
// Used to avoid signedness warning for types for which std::size() is int
template <typename T>
static auto makeSpan (T& array)
@ -960,7 +910,11 @@ void SimpleShapedText::shape (const String& data,
std::vector<LineChunkInLogicalOrder> lineChunks;
int64 numGlyphsInLine = 0;
const auto analysis = Unicode::performAnalysis (data, options.getReadingDirection());
const auto analysis = Unicode::performAnalysis (data);
std::vector<juce_wchar> data32 ((size_t) data.length());
data.copyToUTF32 (data32.data(), data32.size() * sizeof (juce_wchar));
const BidiAlgorithm bidiAlgorithm (data32);
IntegralCanBreakBeforeIterator softBreakIterator { makeSpan (analysis) };
@ -969,9 +923,44 @@ void SimpleShapedText::shape (const String& data,
auto remainingWidth = options.getMaxWidth().has_value() ? (*options.getMaxWidth() - options.getFirstLineIndent())
: std::optional<float>{};
const auto commitLine = [&]
std::vector<size_t> visualOrder;
const auto commitLine = [&] (const BidiParagraph& bidiParagraph)
{
const auto indicesInVisualOrder = getChunkIndicesInVisualOrder (lineChunks, analysis);
if (lineChunks.empty())
return;
const auto begin = (size_t) lineChunks.front().textRange.getStart();
const auto end = (size_t) lineChunks.back().textRange.getEnd();
const auto bidiLine = bidiParagraph.createLine (begin, end - begin);
bidiLine.computeVisualOrder (visualOrder);
const auto indicesInVisualOrder = [&]
{
std::vector<size_t> result;
result.reserve (lineChunks.size());
for (auto it = visualOrder.begin(); it != visualOrder.end();)
{
const auto logicalIndex = *it;
const auto chunk = std::lower_bound (lineChunks.begin(),
lineChunks.end(),
logicalIndex,
[] (const LineChunkInLogicalOrder& c, size_t x)
{
return (size_t) c.textRange.getEnd() <= x;
});
jassert (chunk != lineChunks.end());
result.push_back ((size_t) std::distance (lineChunks.begin(), chunk));
it += std::min ((ptrdiff_t) std::distance (it, visualOrder.end()),
(ptrdiff_t) chunk->textRange.getLength());
}
return result;
}();
for (auto chunkIndex : indicesInVisualOrder)
{
@ -1020,11 +1009,11 @@ void SimpleShapedText::shape (const String& data,
remainingWidth = options.getMaxWidth();
};
const auto append = [&] (Range<int64> range, const ShapingParams& shapingParams)
const auto append = [&] (const BidiParagraph& bidiParagraph, Range<int64> range, const ShapingParams& shapingParams)
{
jassert (! range.isEmpty());
auto glyphsToConsume = ConsumableGlyphs { data, range, shapingParams };
ConsumableGlyphs glyphsToConsume { data, range, shapingParams };
while (! glyphsToConsume.isEmpty())
{
@ -1035,7 +1024,7 @@ void SimpleShapedText::shape (const String& data,
{
int64 breakBefore{};
// We need to use maybeIgnoringWhitespace in comparisions, but
// We need to use maybeIgnoringWhitespace in comparisons, but
// includingTrailingWhitespace when using subtraction to calculate the remaining
// space.
LineAdvance advance{};
@ -1049,7 +1038,7 @@ void SimpleShapedText::shape (const String& data,
static constexpr auto floatMax = std::numeric_limits<float>::max();
for (auto breakBefore = softBreakIterator.next();
breakBefore.has_value() && (lineNumbers.size() == 0
breakBefore.has_value() && (lineNumbers.isEmpty()
|| (int64) lineNumbers.size() < options.getMaxNumLines() - 1);
breakBefore = softBreakIterator.next())
{
@ -1069,7 +1058,7 @@ void SimpleShapedText::shape (const String& data,
shapingParams.resolvedFont,
shapingParams.script,
shapingParams.language,
shapingParams.direction);
shapingParams.embeddingLevel);
const auto beyondEnd = [&]
{
@ -1121,12 +1110,10 @@ void SimpleShapedText::shape (const String& data,
const auto textRange = glyphsToConsume.getCodepointRange().withEnd (bestMatch->breakBefore);
const auto createFakeBidiNestingLevel = [] (TextDirection dir) { return dir == TextDirection::ltr ? 0 : 1; };
lineChunks.push_back ({ textRange,
{ glyphs.begin(), glyphs.end() },
shapingParams.resolvedFont,
createFakeBidiNestingLevel (shapingParams.direction) });
shapingParams.embeddingLevel });
numGlyphsInLine += (int64) glyphs.size();
@ -1142,14 +1129,14 @@ void SimpleShapedText::shape (const String& data,
if (numGlyphsInLine == 0 && exactlyEqual (remainingWidth, options.getMaxWidth()))
consumeBestMatch();
commitLine();
commitLine (bidiParagraph);
}
else
{
consumeBestMatch();
if (! glyphsToConsume.isEmpty())
commitLine();
commitLine (bidiParagraph);
}
}
};
@ -1172,29 +1159,51 @@ void SimpleShapedText::shape (const String& data,
return resolved;
}();
for (Unicode::LineBreakIterator lineIter { makeSpan (analysis) }; auto lineRun = lineIter.next();)
bidiAlgorithm.forEachParagraph ([&] (const BidiParagraph& bidiParagraph)
{
for (Unicode::ScriptRunIterator scriptIter { *lineRun };
auto scriptRun = scriptIter.next();)
const auto bidiLevels = bidiParagraph.getResolvedLevels();
const Span paragraphSpan { analysis.getRawDataPointer() + bidiParagraph.getOffset(), bidiParagraph.getLength() };
for (Unicode::LineBreakIterator lineIter { paragraphSpan }; auto lineRun = lineIter.next();)
{
for (Unicode::BidiRunIterator bidiIter { *scriptRun }; auto bidiRun = bidiIter.next();)
for (Unicode::ScriptRunIterator scriptIter { *lineRun }; auto scriptRun = scriptIter.next();)
{
for (const auto& [range, font] : fontsWithFallback.getIntersectionsWith (spanLookup.getRange (*bidiRun)))
const auto offsetInText = (size_t) std::distance (analysis.getRawDataPointer(), scriptRun->data());
const auto offsetInParagraph = offsetInText - bidiParagraph.getOffset();
const auto length = scriptRun->size();
const auto begin = bidiLevels.data() + offsetInParagraph;
const auto end = begin + length;
for (auto it = begin; it != end;)
{
append (range, { scriptRun->front().script,
const auto next = std::find_if (it, end, [&] (const auto& l) { return l != *it; });
const auto bidiRunOffset = std::distance (begin, it);
const auto bidiRunLength = std::distance (it, next);
const Span bidiRun { analysis.getRawDataPointer() + bidiRunOffset + offsetInText, (size_t) bidiRunLength };
for (const auto& [range, font] : fontsWithFallback.getIntersectionsWith (spanLookup.getRange (bidiRun)))
{
append (bidiParagraph,
range,
{ scriptRun->front().script,
options.getLanguage(),
bidiRun->front().direction,
*it,
font });
}
it = next;
}
}
if (! lineChunks.empty())
commitLine();
commitLine (bidiParagraph);
}
if (! lineChunks.empty())
commitLine();
commitLine (bidiParagraph);
}, options.getReadingDirection());
}
Range<int64> SimpleShapedText::getGlyphRangeForLine (size_t line) const

View file

@ -41,14 +41,7 @@ namespace juce
*/
class Unicode
{
struct Key
{
String text;
std::optional<TextDirection> directionOverride;
auto tie() const { return std::tie (text, directionOverride); }
bool operator< (const Key& other) const { return tie() < other.tie(); }
};
using Key = String;
public:
Unicode() = delete;
@ -61,13 +54,8 @@ public:
{
uint32_t codepoint;
size_t logicalIndex; // Index of the character in the source string
size_t visualIndex;
TextBreakType breaking; // Breaking characteristics of this codepoint
TextDirection direction; // Direction of this codepoint
TextScript script; // Script class for this codepoint
};
@ -88,14 +76,14 @@ public:
/* Performs unicode analysis on a piece of text and returns an array of Codepoints
in logical order.
*/
static Array<Codepoint> performAnalysis (const String& string, std::optional<TextDirection> textDirection = {})
static Array<Codepoint> performAnalysis (const String& string)
{
if (string.isEmpty())
return {};
thread_local LruCache<Key, Array<Unicode::Codepoint>> cache;
return cache.get ({ string, textDirection }, analysisCallback);
return cache.get (string, analysisCallback);
}
//==============================================================================
@ -125,20 +113,6 @@ public:
Span<ValueType> data;
};
struct BidiTraits
{
using ValueType = const Codepoint;
static bool compare (const Codepoint& t1, const Codepoint& t2)
{
return t1.direction == t2.direction;
}
static bool includeBreakingIndex() { return false; }
};
using BidiRunIterator = Iterator<BidiTraits>;
struct LineTraits
{
using ValueType = const Codepoint;
@ -182,43 +156,13 @@ public:
using ScriptRunIterator = Iterator<ScriptTraits>;
private:
struct ParagraphIterator
{
explicit ParagraphIterator (Span<UnicodeAnalysisPoint> Span) : data (Span) {}
std::optional<Range<int>> next()
{
const auto start = head;
auto end = start;
if ((size_t) start >= data.size())
return std::nullopt;
while ((size_t) end < data.size())
{
constexpr auto paragraphSeparator = 0x2029;
if (data[(size_t) end].character == paragraphSeparator)
break;
end++;
}
head = end + 1;
return std::make_optional (Range<int> { start, end });
}
Span<UnicodeAnalysisPoint> data;
int head = 0;
};
static Array<Unicode::Codepoint> analysisCallback (const Key& key)
{
auto analysisBuffer = [&key]
{
std::vector<UnicodeAnalysisPoint> points;
const auto data = key.text.toUTF32();
const auto data = key.toUTF32();
const auto length = data.length();
points.reserve (length);
@ -256,76 +200,8 @@ private:
result.getReference (index).breaking = type;
});
ParagraphIterator iter { analysisBuffer };
TR9::BidiOutput bidiOutput;
while (auto range = iter.next())
{
const auto run = Span { analysisBuffer.data() + (size_t) range->getStart(), (size_t) range->getLength() };
TR9::analyseBidiRun (bidiOutput, run, key.directionOverride);
for (size_t i = 0; i < (size_t) range->getLength(); i++)
{
auto& point = result.getReference ((int) i + range->getStart());
point.direction = bidiOutput.resolvedLevels[i] % 2 == 0 ? TextDirection::ltr : TextDirection::rtl;
point.logicalIndex = (size_t) range->getStart() + i;
point.visualIndex = (size_t) bidiOutput.visualOrder[i];
}
}
return result;
}
};
#if JUCE_UNIT_TESTS
class NumericalVisualOrderTest : UnitTest
{
public:
NumericalVisualOrderTest() : UnitTest ("NumericalVisualOrderTest", UnitTestCategories::text)
{
}
void runTest() override
{
auto doTest = [this] (const String& text)
{
String visual;
String logical;
for (auto cp : Unicode::performAnalysis (text))
{
visual << text[(int) cp.visualIndex];
logical << text[(int) cp.logicalIndex];
}
beginTest (text);
expectEquals (visual, logical);
};
doTest ("12345");
doTest ("12345_00001");
doTest ("1_3(1)");
doTest ("-12323");
doTest ("8784-43_-33");
doTest ("[v = get()](vector<int1> _arr) -> v2 { return _arr[5]; };");
doTest (R"([(lambda x: (x, len(x), x.upper(), x[::-1]))(word) for word in "JUCE is great".split()])");
doTest (R"(table.concat({table.unpack({string.reverse(string.gsub("JUCE is great", "%a", string.upper))})}, " "))");
doTest (R"(result = sum([(mod(i, 2) * i**2, i = 1, 100)], mask = [(mod(i, 2) == 0, i = 1, 100)]))");
doTest ("100 +100");
doTest ("100+ 100");
doTest ("100 - +100");
doTest ("abs= +100");
doTest ("1.19.0 [1]");
}
};
static NumericalVisualOrderTest visualOrderTest;
#endif
} // namespace juce

File diff suppressed because it is too large Load diff

View file

@ -49,7 +49,6 @@ struct UnicodeAnalysisPoint
{
char32_t character = 0;
UnicodeEntry data{};
uint16_t embeddingLevel = 0;
UnicodeAnalysisPoint (char32_t characterIn, UnicodeEntry entry)
: character { characterIn },
@ -66,25 +65,10 @@ struct UnicodeAnalysisPoint
return SBCodepointGetGeneralCategory (character);
}
BidiType getBidiType() const
{
return data.bidi;
}
void setBidiType (BidiType newType)
{
data.bidi = newType;
}
auto getScriptType() const
{
return SBCodepointGetScript (character);
}
bool operator== (const BidiType& b) const
{
return getBidiType() == b;
}
};
//==============================================================================