1
0
Fork 0
mirror of https://github.com/juce-framework/JUCE.git synced 2026-01-09 23:34:20 +00:00

String: Fix mangled decoding of UTF-16 strings containing surrogate pairs in createStringFromData()

This commit is contained in:
reuk 2025-11-19 17:54:20 +00:00
parent e3f3d32fe6
commit f0560cefbb
No known key found for this signature in database

View file

@ -1994,24 +1994,25 @@ String String::createStringFromData (const void* const unknownData, int size)
if (size == 1) if (size == 1)
return charToString ((juce_wchar) data[0]); return charToString ((juce_wchar) data[0]);
if (CharPointer_UTF16::isByteOrderMarkBigEndian (data) const auto bigEndianData = CharPointer_UTF16::isByteOrderMarkBigEndian (data);
|| CharPointer_UTF16::isByteOrderMarkLittleEndian (data))
if (bigEndianData || CharPointer_UTF16::isByteOrderMarkLittleEndian (data))
{ {
const int numChars = size / 2 - 1; const auto numUnits = size / 2 - 1;
const auto src = unalignedPointerCast<const uint16*> (data + 2);
const auto swapBytes = bigEndianData ? ByteOrder::swapIfLittleEndian<uint16>
: ByteOrder::swapIfBigEndian<uint16>;
StringCreationHelper builder ((size_t) numChars); StringCreationHelper builder ((size_t) numUnits);
auto src = unalignedPointerCast<const uint16*> (data + 2); for (int i = 0; i < numUnits;)
if (CharPointer_UTF16::isByteOrderMarkBigEndian (data))
{ {
for (int i = 0; i < numChars; ++i) const uint16 wideBuffer[] { swapBytes (src[i]),
builder.write ((juce_wchar) ByteOrder::swapIfLittleEndian (src[i])); swapBytes ((i + 1 == numUnits) ? (uint16) 0 : src[i + 1]) };
} const CharPointer_UTF16 ptr { reinterpret_cast<const CharPointer_UTF16::CharType*> (wideBuffer) };
else
{ builder.write (*ptr);
for (int i = 0; i < numChars; ++i) i += (int) ((ptr + 1).getAddress() - ptr.getAddress());
builder.write ((juce_wchar) ByteOrder::swapIfBigEndian (src[i]));
} }
builder.write (0); builder.write (0);
@ -3011,6 +3012,45 @@ public:
for (auto c : str) for (auto c : str)
expectEquals (c, parts[index++]); expectEquals (c, parts[index++]);
} }
const CharPointer_UTF8 expectedString { "glass \xc2\xbd full" };
const CharPointer_UTF8 emojiExpectedString { "hello JUCE \xf0\x9f\xa7\x83" };
beginTest ("createStringFromData reads LE UTF-16");
{
constexpr char buffer[] = "\xff\xfe\x67\x00\x6c\x00\x61\x00\x73\x00\x73\x00\x20\x00\xbd\x00\x20\x00\x66\x00\x75\x00\x6c\x00\x6c\x00";
expect (expectedString == String::createStringFromData (buffer, sizeof (buffer)));
constexpr char emojiBuffer[] = "\xff\xfe\x68\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00\x20\x00\x4a\x00\x55\x00\x43\x00\x45\x00\x20\x00\x3e\xd8\xc3\xdd";
const auto emojiActualString = String::createStringFromData (emojiBuffer, sizeof (emojiBuffer));
expect (emojiExpectedString == emojiActualString);
}
beginTest ("createStringFromData reads BE UTF-16");
{
constexpr char buffer[] = "\xfe\xff\x00\x67\x00\x6c\x00\x61\x00\x73\x00\x73\x00\x20\x00\xbd\x00\x20\x00\x66\x00\x75\x00\x6c\x00\x6c";
expect (expectedString == String::createStringFromData (buffer, sizeof (buffer)));
constexpr char emojiBuffer[] = "\xfe\xff\x00\x68\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00\x20\x00\x4a\x00\x55\x00\x43\x00\x45\x00\x20\xd8\x3e\xdd\xc3";
const auto emojiActualString = String::createStringFromData (emojiBuffer, sizeof (emojiBuffer));
expect (emojiExpectedString == emojiActualString);
}
beginTest ("createStringFromData reads UTF-8");
{
constexpr char buffer[] = "glass \xc2\xbd full";
expect (expectedString == String::createStringFromData (buffer, sizeof (buffer)));
constexpr char emojiBuffer[] = "hello JUCE \xf0\x9f\xa7\x83";
const auto emojiActualString = String::createStringFromData (emojiBuffer, sizeof (emojiBuffer));
expect (emojiExpectedString == emojiActualString);
}
beginTest ("createStringFromData reads Windows 1252");
{
constexpr char buffer[] = "glass \xBD full";
expect (expectedString == String::createStringFromData (buffer, sizeof (buffer)));
}
} }
}; };