1
0
Fork 0
mirror of https://github.com/juce-framework/JUCE.git synced 2026-01-10 23:44:24 +00:00

Represent tab characters with non-breaking space during shaping

This avoids assertions raised when shaping text containing tabs.
This commit is contained in:
attila 2024-06-19 14:37:53 +02:00 committed by Attila Szarvas
parent 0d8f2c63ec
commit ef8417023e
2 changed files with 91 additions and 32 deletions

View file

@ -1,5 +1,35 @@
# JUCE breaking changes
# develop
## Change
The tab width when rendering text with the GlyphArrangement and TextLayout
classes now equals the width of a space. Previously it equaled the width of a
tofu character used for missing glyphs.
**Possible Issues**
User interfaces using the GlyphArrangement and TextLayout classes directly to
render text containing tabs will look differently. The TextEditor and
CodeEditorComponent classes have special logic for replacing the tabs prior to
rendering, and consequently, these are not affected.
**Workaround**
Replace the tab characters prior to rendering and substitute them with the
required number of non-breaking spaces.
**Rationale**
Since the Unicode related revamping of JUCE's text rendering classes, tab
characters would raise assertions and would be rendered with the tofu glyph.
This change visually treats tab characters as non-breaking spaces. Since the
JUCE 7 behaviour of using the tofu glyph's width was not a conscious decision,
but rather a side effect of ignoring unresolved glyphs, using a default width
of one space is more reasonable.
# Version 8.0.0
## Change

View file

@ -330,6 +330,49 @@ private:
size_t beyondEnd{};
};
enum class ControlCharacter
{
crFollowedByLf,
cr,
lf,
tab
};
static auto findControlCharacters (Span<juce_wchar> text)
{
constexpr juce_wchar lf = 0x0a;
constexpr juce_wchar cr = 0x0d;
constexpr juce_wchar tab = 0x09;
std::map<size_t, ControlCharacter> result;
const auto iMax = text.size();
for (const auto [i, c] : enumerate (text, size_t{}))
{
if (c == lf)
{
result[i] = ControlCharacter::lf;
continue;
}
if (c == cr)
{
if (iMax - i > 1 && text[i + 1] == lf)
result[i] = ControlCharacter::crFollowedByLf;
else
result[i] = ControlCharacter::cr;
continue;
}
if (c == tab)
result[i] = ControlCharacter::tab;
}
return result;
}
/* Returns glyphs in logical order as that favours wrapping. */
static std::vector<ShapedGlyph> lowLevelShape (const String& string,
Range<int64> range,
@ -358,46 +401,32 @@ static std::vector<ShapedGlyph> lowLevelShape (const String& string,
0,
0);
// Adding the converted portion of the text with hb_buffer_add_utf32() or especially with
// hb_buffer_add() gives us control over cluster numbers. hb_buffer_add_utf32() will increment
// cluster numbers by unicode codepoints (as opposed to UTF8 bytes) starting from 0.
auto utf32Span = Span { string.toUTF32().getAddress() + (size_t) range.getStart(),
(size_t) range.getLength() };
const Span utf32Span { string.toUTF32().getAddress() + (size_t) range.getStart(),
(size_t) range.getLength() };
// We're using a word joiner (zero width non-breaking space) followed by a non-breaking space
// for visual representation. This is so that it's not possible to break the glyph representing
// the line breaking glyph on its own.
static constexpr uint32_t crLf[] = { 0x2060, 0x00A0 };
const auto controlChars = findControlCharacters (utf32Span);
auto nextControlChar = controlChars.begin();
const auto numLineEndsToReplace = [&]
for (const auto pair : enumerate (utf32Span, size_t{}))
{
constexpr auto lf = 0x0a;
constexpr auto cr = 0x0d;
if (! utf32Span.empty() && (utf32Span.back() == lf || utf32Span.back() == cr))
const auto charToAdd = [&]
{
if (utf32Span.size() >= 2 && utf32Span[utf32Span.size() - 2] == cr)
return 2;
if (nextControlChar == controlChars.end() || pair.index != nextControlChar->first)
return pair.value;
return 1;
}
constexpr juce_wchar wordJoiner = 0x2060;
constexpr juce_wchar nonBreakingSpace = 0x00a0;
return 0;
}();
const auto replacement = nextControlChar->second == ControlCharacter::crFollowedByLf
? wordJoiner
: nonBreakingSpace;
hb_buffer_add_utf32 (buffer.get(),
(uint32_t*) utf32Span.data(),
(int) range.getLength() - numLineEndsToReplace,
(unsigned int) 0,
(int) range.getLength() - numLineEndsToReplace);
++nextControlChar;
for (int i = 0; i < numLineEndsToReplace; ++i)
{
// The following gets cluster values right, but this does not follow clearly from harfbuzz documentation.
// Add at least a regression test checking the correctness of cluster values.
hb_buffer_add (buffer.get(),
static_cast<hb_codepoint_t> (*(crLf + (2 - numLineEndsToReplace) + i)),
(unsigned int) ((int) range.getLength() - numLineEndsToReplace + i));
return replacement;
}();
hb_buffer_add (buffer.get(), static_cast<hb_codepoint_t> (charToAdd), (unsigned int) pair.index);
}
const auto postContextByteRange = utf8Lookup.getByteRange (Range<int64> { range.getEnd(), (int64) string.length() });