1
0
Fork 0
mirror of https://github.com/juce-framework/JUCE.git synced 2026-01-10 23:44:24 +00:00

JSON: Use UTF8 encoding by default

This commit is contained in:
Anthony Nicholls 2024-07-17 19:59:24 +01:00
parent 379afb1e3f
commit 1e5c88899e
4 changed files with 288 additions and 71 deletions

View file

@ -120,7 +120,7 @@ void DynamicObject::writeAsJSON (OutputStream& out, const JSON::FormatOptions& f
JSONFormatter::writeSpaces (out, format.getIndentLevel() + JSONFormatter::indentSize);
out << '"';
JSONFormatter::writeString (out, properties.getName (i));
JSONFormatter::writeString (out, properties.getName (i), format.getEncoding());
out << "\":";
if (format.getSpacing() != JSON::Spacing::none)

View file

@ -92,6 +92,69 @@ struct JSONParser
return {};
}
int parseHexDigit()
{
const auto digitValue = CharacterFunctions::getHexDigitValue (readChar());
if (digitValue < 0)
throwError ("Invalid hex character", currentLocation - 1);
return digitValue;
}
CharPointer_UTF16::CharType parseCodeUnit()
{
return (CharPointer_UTF16::CharType) ( parseHexDigit() << 12
| (parseHexDigit() << 8)
| (parseHexDigit() << 4)
| (parseHexDigit()));
}
static constexpr juce_wchar asCodePoint (CharPointer_UTF16::CharType codeUnit)
{
return (juce_wchar) (uint32) (uint16) codeUnit;
}
CharPointer_UTF16::CharType parseLowSurrogateCodeUnit()
{
const auto errorLocation = currentLocation;
const auto throwLowSurrogateError = [&]()
{
throwError ("Expected UTF-16 low surrogate", errorLocation);
};
if (readChar() != '\\' || readChar() != 'u')
throwLowSurrogateError();
const auto lowSurrogate = parseCodeUnit();
if (! CharacterFunctions::isLowSurrogate (asCodePoint (lowSurrogate)))
throwLowSurrogateError();
return lowSurrogate;
}
juce_wchar parseEscapeSequence()
{
const auto errorLocation = currentLocation - 2;
const auto codeUnits = [&]() -> std::array<CharPointer_UTF16::CharType, 2>
{
const auto firstCodeUnit = parseCodeUnit();
if (CharacterFunctions::isNonSurrogateCodePoint (asCodePoint (firstCodeUnit)))
return { firstCodeUnit, 0 };
if (! CharacterFunctions::isHighSurrogate (asCodePoint (firstCodeUnit)))
throwError ("Invalid UTF-16 escape sequence", errorLocation);
return { firstCodeUnit, parseLowSurrogateCodeUnit() };
}();
return CharPointer_UTF16 (codeUnits.data()).getAndAdvance();
}
String parseString (const juce_wchar quoteChar)
{
MemoryOutputStream buffer (256);
@ -105,7 +168,6 @@ struct JSONParser
if (c == '\\')
{
auto errorLocation = currentLocation;
c = readChar();
switch (c)
@ -113,33 +175,18 @@ struct JSONParser
case '"':
case '\'':
case '\\':
case '/': break;
case '/': break;
case 'a': c = '\a'; break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'a': c = '\a'; break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'u':
{
c = 0;
case 'u': c = parseEscapeSequence(); break;
for (int i = 4; --i >= 0;)
{
auto digitValue = CharacterFunctions::getHexDigitValue (readChar());
if (digitValue < 0)
throwError ("Syntax error in unicode escape sequence", errorLocation);
c = (juce_wchar) ((c << 4) + static_cast<juce_wchar> (digitValue));
}
break;
}
default: break;
default: break;
}
}
@ -323,15 +370,15 @@ struct JSONFormatter
out << "\\u" << String::toHexString ((int) value).paddedLeft ('0', 4);
}
static void writeString (OutputStream& out, String::CharPointerType t)
static void writeString (OutputStream& out, String::CharPointerType t, JSON::Encoding encoding)
{
for (;;)
{
auto c = t.getAndAdvance();
const auto c = t.getAndAdvance();
switch (c)
{
case 0: return;
case 0: return;
case '\"': out << "\\\""; break;
case '\\': out << "\\\\"; break;
@ -342,27 +389,42 @@ struct JSONFormatter
case '\n': out << "\\n"; break;
default:
if (c >= 32 && c < 127)
if (CharacterFunctions::isAsciiControlCharacter (c))
{
out << (char) c;
writeEscapedChar (out, (unsigned short) c);
}
else
{
if (CharPointer_UTF16::getBytesRequiredFor (c) > 2)
switch (encoding)
{
CharPointer_UTF16::CharType chars[2];
CharPointer_UTF16 utf16 (chars);
utf16.write (c);
case JSON::Encoding::utf8:
out << String::charToString (c);
break;
for (int i = 0; i < 2; ++i)
writeEscapedChar (out, (unsigned short) chars[i]);
}
else
{
writeEscapedChar (out, (unsigned short) c);
case JSON::Encoding::ascii:
if (CharacterFunctions::isAscii (c))
{
out << String::charToString (c);
}
else if (CharacterFunctions::isPartOfBasicMultilingualPlane (c))
{
if (CharacterFunctions::isNonSurrogateCodePoint (c))
writeEscapedChar (out, (unsigned short) c);
else
jassertfalse; // Illegal unicode character
}
else
{
CharPointer_UTF16::CharType codeUnits[2] = {};
CharPointer_UTF16 utf16 (codeUnits);
utf16.write (c);
for (auto& codeUnit : codeUnits)
writeEscapedChar (out, (unsigned short) codeUnit);
}
break;
}
}
break;
}
}
@ -420,7 +482,7 @@ void JSON::writeToStream (OutputStream& out, const var& v, const FormatOptions&
if (v.isString())
{
out << '"';
JSONFormatter::writeString (out, v.toString().getCharPointer());
JSONFormatter::writeString (out, v.toString().getCharPointer(), opt.getEncoding());
out << '"';
}
else if (v.isVoid())
@ -536,7 +598,7 @@ void JSON::writeToStream (OutputStream& output, const var& data, const bool allO
String JSON::escapeString (StringRef s)
{
MemoryOutputStream mo;
JSONFormatter::writeString (mo, s.text);
JSONFormatter::writeString (mo, s.text, Encoding::ascii);
return mo.toString();
}
@ -650,11 +712,126 @@ public:
}
}
void expectCharacterEncoding (juce_wchar character, const String& expectedOutput, JSON::Encoding encoding)
{
const auto input = String::charToString (character);
const auto quotedOutput = '"' + expectedOutput + '"';
expectEquals (JSON::toString (input, JSON::FormatOptions{}.withEncoding (encoding)), quotedOutput);
expectEquals (JSON::fromString (quotedOutput).toString(), input);
}
void expectNoEscapeSequence (juce_wchar input)
{
const auto inputString = String::charToString (input);
expectCharacterEncoding (input, inputString, JSON::Encoding::ascii);
expectCharacterEncoding (input, inputString, JSON::Encoding::utf8);
}
void expectEscapeSequenceForAllEncodings (juce_wchar input, const String& escapeSequence)
{
expectCharacterEncoding (input, escapeSequence, JSON::Encoding::ascii);
expectCharacterEncoding (input, escapeSequence, JSON::Encoding::utf8);
}
void expectEscapeSequenceForAsciiEncodingOnly (juce_wchar input, const String& escapeSequence)
{
expectCharacterEncoding (input, escapeSequence, JSON::Encoding::ascii);
expectCharacterEncoding (input, String::charToString (input), JSON::Encoding::utf8);
}
void runTest() override
{
beginTest ("Float formatting");
{
beginTest ("JSON");
std::map<double, String> tests;
tests[1] = "1.0";
tests[1.1] = "1.1";
tests[1.01] = "1.01";
tests[0.76378] = "0.76378";
tests[-10] = "-10.0";
tests[10.01] = "10.01";
tests[0.0123] = "0.0123";
tests[-3.7e-27] = "-3.7e-27";
tests[1e+40] = "1.0e40";
tests[-12345678901234567.0] = "-1.234567890123457e16";
tests[192000] = "192000.0";
tests[1234567] = "1.234567e6";
tests[0.00006] = "0.00006";
tests[0.000006] = "6.0e-6";
for (auto& test : tests)
expectEquals (JSON::toString (test.first), test.second);
}
beginTest ("ASCII control characters are always escaped");
{
expectEscapeSequenceForAllEncodings ('\x01', "\\u0001");
expectEscapeSequenceForAllEncodings ('\x02', "\\u0002");
expectEscapeSequenceForAllEncodings ('\x03', "\\u0003");
expectEscapeSequenceForAllEncodings ('\x04', "\\u0004");
expectEscapeSequenceForAllEncodings ('\x05', "\\u0005");
expectEscapeSequenceForAllEncodings ('\x06', "\\u0006");
expectEscapeSequenceForAllEncodings ('\x07', "\\u0007");
expectEscapeSequenceForAllEncodings ('\x08', "\\b");
expectEscapeSequenceForAllEncodings ('\x09', "\\t");
expectEscapeSequenceForAllEncodings ('\x0a', "\\n");
expectEscapeSequenceForAllEncodings ('\x0b', "\\u000b");
expectEscapeSequenceForAllEncodings ('\x0c', "\\f");
expectEscapeSequenceForAllEncodings ('\x0d', "\\r");
expectEscapeSequenceForAllEncodings ('\x0e', "\\u000e");
expectEscapeSequenceForAllEncodings ('\x0f', "\\u000f");
expectEscapeSequenceForAllEncodings ('\x10', "\\u0010");
expectEscapeSequenceForAllEncodings ('\x11', "\\u0011");
expectEscapeSequenceForAllEncodings ('\x12', "\\u0012");
expectEscapeSequenceForAllEncodings ('\x13', "\\u0013");
expectEscapeSequenceForAllEncodings ('\x14', "\\u0014");
expectEscapeSequenceForAllEncodings ('\x15', "\\u0015");
expectEscapeSequenceForAllEncodings ('\x16', "\\u0016");
expectEscapeSequenceForAllEncodings ('\x17', "\\u0017");
expectEscapeSequenceForAllEncodings ('\x18', "\\u0018");
expectEscapeSequenceForAllEncodings ('\x19', "\\u0019");
expectEscapeSequenceForAllEncodings ('\x1a', "\\u001a");
expectEscapeSequenceForAllEncodings ('\x1b', "\\u001b");
expectEscapeSequenceForAllEncodings ('\x1c', "\\u001c");
expectEscapeSequenceForAllEncodings ('\x1d', "\\u001d");
expectEscapeSequenceForAllEncodings ('\x1e', "\\u001e");
expectEscapeSequenceForAllEncodings ('\x1f', "\\u001f");
}
beginTest ("Only special ASCII characters are escaped");
{
for (juce_wchar c = 32; CharacterFunctions::isAscii (c); ++c)
{
if (c != '"')
expectEscapeSequenceForAllEncodings ('"', R"(\")");
else if (c != '\\')
expectEscapeSequenceForAllEncodings ('\\', R"(\\)");
else
expectNoEscapeSequence (c);
}
}
beginTest ("Unicode characters are escaped for ASCII encoding only");
{
// First and last 2 byte UTF-8 code points
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0x0080, "\\u0080");
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0x07FF, "\\u07ff");
// First and last 3 byte UTF-8 code points
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0x0800, "\\u0800");
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0xffff, "\\uffff");
// Code points at the UTF-16 surrogate boundaries
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0xd7ff, "\\ud7ff");
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0xe000, "\\ue000");
// First and last 4 byte UTF-8 code points (also first and last UTF-16 surrogate pairs)
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0x010000, "\\ud800\\udc00");
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0x10ffff, "\\udbff\\udfff");
}
beginTest ("Fuzz tests");
{
auto r = getRandom();
expect (JSON::parse (String()) == var());
@ -681,29 +858,6 @@ public:
expect (asString.isNotEmpty() && parsedString == asString);
}
}
{
beginTest ("Float formatting");
std::map<double, String> tests;
tests[1] = "1.0";
tests[1.1] = "1.1";
tests[1.01] = "1.01";
tests[0.76378] = "0.76378";
tests[-10] = "-10.0";
tests[10.01] = "10.01";
tests[0.0123] = "0.0123";
tests[-3.7e-27] = "-3.7e-27";
tests[1e+40] = "1.0e40";
tests[-12345678901234567.0] = "-1.234567890123457e16";
tests[192000] = "192000.0";
tests[1234567] = "1.234567e6";
tests[0.00006] = "0.00006";
tests[0.000006] = "6.0e-6";
for (auto& test : tests)
expectEquals (JSON::toString (test.first), test.second);
}
}
};

View file

@ -107,6 +107,12 @@ public:
multiLine, ///< Newlines and spaces will be included in the output, in order to make it easy to read for humans
};
enum class Encoding
{
utf8, ///< Use UTF-8 avoiding escape sequences for non-ASCII characters, this is the default behaviour
ascii, ///< Use ASCII characters only, unicode characters will be encoded using UTF-16 escape sequences
};
/**
Allows formatting var objects as JSON with various configurable options.
*/
@ -114,17 +120,34 @@ public:
{
public:
/** Returns a copy of this Formatter with the specified spacing. */
FormatOptions withSpacing (Spacing x) const { return withMember (*this, &FormatOptions::spacing, x); }
FormatOptions withSpacing (Spacing x) const
{
return withMember (*this, &FormatOptions::spacing, x);
}
/** Returns a copy of this Formatter with the specified maximum number of decimal places.
This option determines the precision of floating point numbers in scientific notation.
*/
FormatOptions withMaxDecimalPlaces (int x) const { return withMember (*this, &FormatOptions::maxDecimalPlaces, x); }
FormatOptions withMaxDecimalPlaces (int x) const
{
return withMember (*this, &FormatOptions::maxDecimalPlaces, x);
}
/** Returns a copy of this Formatter with the specified indent level.
This should only be necessary when serialising multiline nested types.
*/
FormatOptions withIndentLevel (int x) const { return withMember (*this, &FormatOptions::indent, x); }
FormatOptions withIndentLevel (int x) const
{
return withMember (*this, &FormatOptions::indent, x);
}
/** Returns a copy of this Formatter with the specified encoding.
Use this to force a JSON to be ASCII characters only.
*/
FormatOptions withEncoding (Encoding x) const
{
return withMember (*this, &FormatOptions::encoding, x);
}
/** Returns the spacing used by this Formatter. */
Spacing getSpacing() const { return spacing; }
@ -135,8 +158,12 @@ public:
/** Returns the indent level of this Formatter. */
int getIndentLevel() const { return indent; }
/** Returns the encoding of this Formatter. */
Encoding getEncoding() const { return encoding; }
private:
Spacing spacing = Spacing::multiLine;
Encoding encoding = Encoding::utf8;
int maxDecimalPlaces = 15;
int indent = 0;
};