mirror of
https://github.com/juce-framework/JUCE.git
synced 2026-01-09 23:34:20 +00:00
JSON: Use UTF8 encoding by default
This commit is contained in:
parent
379afb1e3f
commit
1e5c88899e
4 changed files with 288 additions and 71 deletions
|
|
@ -87,6 +87,42 @@ invokeMethod() non-virtual forces users to add methods with setMethod() instead
|
|||
of overriding invokeMethod(), which is more compatible with QuickJS.
|
||||
|
||||
|
||||
## Change
|
||||
|
||||
The default JSON encoding has changed from ASCII escape sequences to UTF-8.
|
||||
|
||||
**Possible Issues**
|
||||
|
||||
JSON text exchanged with a non-standard compliant parser expecting ASCII
|
||||
encoding, may fail to parse UTF-8 encoded JSON files. Reliance on the raw JSON
|
||||
encoded string literal, for example for file comparison, Base64 encoding, or any
|
||||
encryption, may result in false negatives for JSON data containing the same data
|
||||
between versions of JUCE.
|
||||
|
||||
Note: JSON files that only ever encoded ASCII text will NOT be effected.
|
||||
|
||||
**Workaround**
|
||||
|
||||
Use the `JSON::writeToStream()` or `JSON::toString()` functions that take a
|
||||
`FormatOptions` parameter and call `withEncoding (JSON::Encoding::ascii)` on the
|
||||
`FormatOptions` object.
|
||||
|
||||
**Rationale**
|
||||
|
||||
RFC 8259 states
|
||||
|
||||
> JSON text exchanged between systems that are not part of a closed ecosystem
|
||||
MUST be encoded using UTF-8 [RFC3629].
|
||||
>
|
||||
> Previous specifications of JSON have not required the use of UTF-8 when
|
||||
transmitting JSON text. However, the vast majority of JSON-based software
|
||||
implementations have chosen to use the UTF-8 encoding, to the extent that it is
|
||||
the only encoding that achieves interoperability.
|
||||
|
||||
For this reason UTF-8 encoding has better interoperability than ASCII escape
|
||||
sequences.
|
||||
|
||||
|
||||
## Change
|
||||
|
||||
The ASCII and Unicode BEL character (U+0007) escape sequence has changed in the
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ void DynamicObject::writeAsJSON (OutputStream& out, const JSON::FormatOptions& f
|
|||
JSONFormatter::writeSpaces (out, format.getIndentLevel() + JSONFormatter::indentSize);
|
||||
|
||||
out << '"';
|
||||
JSONFormatter::writeString (out, properties.getName (i));
|
||||
JSONFormatter::writeString (out, properties.getName (i), format.getEncoding());
|
||||
out << "\":";
|
||||
|
||||
if (format.getSpacing() != JSON::Spacing::none)
|
||||
|
|
|
|||
|
|
@ -92,6 +92,69 @@ struct JSONParser
|
|||
return {};
|
||||
}
|
||||
|
||||
int parseHexDigit()
|
||||
{
|
||||
const auto digitValue = CharacterFunctions::getHexDigitValue (readChar());
|
||||
|
||||
if (digitValue < 0)
|
||||
throwError ("Invalid hex character", currentLocation - 1);
|
||||
|
||||
return digitValue;
|
||||
}
|
||||
|
||||
CharPointer_UTF16::CharType parseCodeUnit()
|
||||
{
|
||||
return (CharPointer_UTF16::CharType) ( parseHexDigit() << 12
|
||||
| (parseHexDigit() << 8)
|
||||
| (parseHexDigit() << 4)
|
||||
| (parseHexDigit()));
|
||||
}
|
||||
|
||||
static constexpr juce_wchar asCodePoint (CharPointer_UTF16::CharType codeUnit)
|
||||
{
|
||||
return (juce_wchar) (uint32) (uint16) codeUnit;
|
||||
}
|
||||
|
||||
CharPointer_UTF16::CharType parseLowSurrogateCodeUnit()
|
||||
{
|
||||
const auto errorLocation = currentLocation;
|
||||
|
||||
const auto throwLowSurrogateError = [&]()
|
||||
{
|
||||
throwError ("Expected UTF-16 low surrogate", errorLocation);
|
||||
};
|
||||
|
||||
if (readChar() != '\\' || readChar() != 'u')
|
||||
throwLowSurrogateError();
|
||||
|
||||
const auto lowSurrogate = parseCodeUnit();
|
||||
|
||||
if (! CharacterFunctions::isLowSurrogate (asCodePoint (lowSurrogate)))
|
||||
throwLowSurrogateError();
|
||||
|
||||
return lowSurrogate;
|
||||
}
|
||||
|
||||
juce_wchar parseEscapeSequence()
|
||||
{
|
||||
const auto errorLocation = currentLocation - 2;
|
||||
|
||||
const auto codeUnits = [&]() -> std::array<CharPointer_UTF16::CharType, 2>
|
||||
{
|
||||
const auto firstCodeUnit = parseCodeUnit();
|
||||
|
||||
if (CharacterFunctions::isNonSurrogateCodePoint (asCodePoint (firstCodeUnit)))
|
||||
return { firstCodeUnit, 0 };
|
||||
|
||||
if (! CharacterFunctions::isHighSurrogate (asCodePoint (firstCodeUnit)))
|
||||
throwError ("Invalid UTF-16 escape sequence", errorLocation);
|
||||
|
||||
return { firstCodeUnit, parseLowSurrogateCodeUnit() };
|
||||
}();
|
||||
|
||||
return CharPointer_UTF16 (codeUnits.data()).getAndAdvance();
|
||||
}
|
||||
|
||||
String parseString (const juce_wchar quoteChar)
|
||||
{
|
||||
MemoryOutputStream buffer (256);
|
||||
|
|
@ -105,7 +168,6 @@ struct JSONParser
|
|||
|
||||
if (c == '\\')
|
||||
{
|
||||
auto errorLocation = currentLocation;
|
||||
c = readChar();
|
||||
|
||||
switch (c)
|
||||
|
|
@ -113,33 +175,18 @@ struct JSONParser
|
|||
case '"':
|
||||
case '\'':
|
||||
case '\\':
|
||||
case '/': break;
|
||||
case '/': break;
|
||||
|
||||
case 'a': c = '\a'; break;
|
||||
case 'b': c = '\b'; break;
|
||||
case 'f': c = '\f'; break;
|
||||
case 'n': c = '\n'; break;
|
||||
case 'r': c = '\r'; break;
|
||||
case 't': c = '\t'; break;
|
||||
case 'a': c = '\a'; break;
|
||||
case 'b': c = '\b'; break;
|
||||
case 'f': c = '\f'; break;
|
||||
case 'n': c = '\n'; break;
|
||||
case 'r': c = '\r'; break;
|
||||
case 't': c = '\t'; break;
|
||||
|
||||
case 'u':
|
||||
{
|
||||
c = 0;
|
||||
case 'u': c = parseEscapeSequence(); break;
|
||||
|
||||
for (int i = 4; --i >= 0;)
|
||||
{
|
||||
auto digitValue = CharacterFunctions::getHexDigitValue (readChar());
|
||||
|
||||
if (digitValue < 0)
|
||||
throwError ("Syntax error in unicode escape sequence", errorLocation);
|
||||
|
||||
c = (juce_wchar) ((c << 4) + static_cast<juce_wchar> (digitValue));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default: break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -323,15 +370,15 @@ struct JSONFormatter
|
|||
out << "\\u" << String::toHexString ((int) value).paddedLeft ('0', 4);
|
||||
}
|
||||
|
||||
static void writeString (OutputStream& out, String::CharPointerType t)
|
||||
static void writeString (OutputStream& out, String::CharPointerType t, JSON::Encoding encoding)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
auto c = t.getAndAdvance();
|
||||
const auto c = t.getAndAdvance();
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case 0: return;
|
||||
case 0: return;
|
||||
|
||||
case '\"': out << "\\\""; break;
|
||||
case '\\': out << "\\\\"; break;
|
||||
|
|
@ -342,27 +389,42 @@ struct JSONFormatter
|
|||
case '\n': out << "\\n"; break;
|
||||
|
||||
default:
|
||||
if (c >= 32 && c < 127)
|
||||
if (CharacterFunctions::isAsciiControlCharacter (c))
|
||||
{
|
||||
out << (char) c;
|
||||
writeEscapedChar (out, (unsigned short) c);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (CharPointer_UTF16::getBytesRequiredFor (c) > 2)
|
||||
switch (encoding)
|
||||
{
|
||||
CharPointer_UTF16::CharType chars[2];
|
||||
CharPointer_UTF16 utf16 (chars);
|
||||
utf16.write (c);
|
||||
case JSON::Encoding::utf8:
|
||||
out << String::charToString (c);
|
||||
break;
|
||||
|
||||
for (int i = 0; i < 2; ++i)
|
||||
writeEscapedChar (out, (unsigned short) chars[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
writeEscapedChar (out, (unsigned short) c);
|
||||
case JSON::Encoding::ascii:
|
||||
if (CharacterFunctions::isAscii (c))
|
||||
{
|
||||
out << String::charToString (c);
|
||||
}
|
||||
else if (CharacterFunctions::isPartOfBasicMultilingualPlane (c))
|
||||
{
|
||||
if (CharacterFunctions::isNonSurrogateCodePoint (c))
|
||||
writeEscapedChar (out, (unsigned short) c);
|
||||
else
|
||||
jassertfalse; // Illegal unicode character
|
||||
}
|
||||
else
|
||||
{
|
||||
CharPointer_UTF16::CharType codeUnits[2] = {};
|
||||
CharPointer_UTF16 utf16 (codeUnits);
|
||||
utf16.write (c);
|
||||
|
||||
for (auto& codeUnit : codeUnits)
|
||||
writeEscapedChar (out, (unsigned short) codeUnit);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -420,7 +482,7 @@ void JSON::writeToStream (OutputStream& out, const var& v, const FormatOptions&
|
|||
if (v.isString())
|
||||
{
|
||||
out << '"';
|
||||
JSONFormatter::writeString (out, v.toString().getCharPointer());
|
||||
JSONFormatter::writeString (out, v.toString().getCharPointer(), opt.getEncoding());
|
||||
out << '"';
|
||||
}
|
||||
else if (v.isVoid())
|
||||
|
|
@ -536,7 +598,7 @@ void JSON::writeToStream (OutputStream& output, const var& data, const bool allO
|
|||
String JSON::escapeString (StringRef s)
|
||||
{
|
||||
MemoryOutputStream mo;
|
||||
JSONFormatter::writeString (mo, s.text);
|
||||
JSONFormatter::writeString (mo, s.text, Encoding::ascii);
|
||||
return mo.toString();
|
||||
}
|
||||
|
||||
|
|
@ -650,11 +712,126 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void expectCharacterEncoding (juce_wchar character, const String& expectedOutput, JSON::Encoding encoding)
|
||||
{
|
||||
const auto input = String::charToString (character);
|
||||
const auto quotedOutput = '"' + expectedOutput + '"';
|
||||
expectEquals (JSON::toString (input, JSON::FormatOptions{}.withEncoding (encoding)), quotedOutput);
|
||||
expectEquals (JSON::fromString (quotedOutput).toString(), input);
|
||||
}
|
||||
|
||||
void expectNoEscapeSequence (juce_wchar input)
|
||||
{
|
||||
const auto inputString = String::charToString (input);
|
||||
expectCharacterEncoding (input, inputString, JSON::Encoding::ascii);
|
||||
expectCharacterEncoding (input, inputString, JSON::Encoding::utf8);
|
||||
}
|
||||
|
||||
void expectEscapeSequenceForAllEncodings (juce_wchar input, const String& escapeSequence)
|
||||
{
|
||||
expectCharacterEncoding (input, escapeSequence, JSON::Encoding::ascii);
|
||||
expectCharacterEncoding (input, escapeSequence, JSON::Encoding::utf8);
|
||||
}
|
||||
|
||||
void expectEscapeSequenceForAsciiEncodingOnly (juce_wchar input, const String& escapeSequence)
|
||||
{
|
||||
expectCharacterEncoding (input, escapeSequence, JSON::Encoding::ascii);
|
||||
expectCharacterEncoding (input, String::charToString (input), JSON::Encoding::utf8);
|
||||
}
|
||||
|
||||
void runTest() override
|
||||
{
|
||||
beginTest ("Float formatting");
|
||||
{
|
||||
beginTest ("JSON");
|
||||
std::map<double, String> tests;
|
||||
tests[1] = "1.0";
|
||||
tests[1.1] = "1.1";
|
||||
tests[1.01] = "1.01";
|
||||
tests[0.76378] = "0.76378";
|
||||
tests[-10] = "-10.0";
|
||||
tests[10.01] = "10.01";
|
||||
tests[0.0123] = "0.0123";
|
||||
tests[-3.7e-27] = "-3.7e-27";
|
||||
tests[1e+40] = "1.0e40";
|
||||
tests[-12345678901234567.0] = "-1.234567890123457e16";
|
||||
tests[192000] = "192000.0";
|
||||
tests[1234567] = "1.234567e6";
|
||||
tests[0.00006] = "0.00006";
|
||||
tests[0.000006] = "6.0e-6";
|
||||
|
||||
for (auto& test : tests)
|
||||
expectEquals (JSON::toString (test.first), test.second);
|
||||
}
|
||||
|
||||
beginTest ("ASCII control characters are always escaped");
|
||||
{
|
||||
expectEscapeSequenceForAllEncodings ('\x01', "\\u0001");
|
||||
expectEscapeSequenceForAllEncodings ('\x02', "\\u0002");
|
||||
expectEscapeSequenceForAllEncodings ('\x03', "\\u0003");
|
||||
expectEscapeSequenceForAllEncodings ('\x04', "\\u0004");
|
||||
expectEscapeSequenceForAllEncodings ('\x05', "\\u0005");
|
||||
expectEscapeSequenceForAllEncodings ('\x06', "\\u0006");
|
||||
expectEscapeSequenceForAllEncodings ('\x07', "\\u0007");
|
||||
expectEscapeSequenceForAllEncodings ('\x08', "\\b");
|
||||
expectEscapeSequenceForAllEncodings ('\x09', "\\t");
|
||||
expectEscapeSequenceForAllEncodings ('\x0a', "\\n");
|
||||
expectEscapeSequenceForAllEncodings ('\x0b', "\\u000b");
|
||||
expectEscapeSequenceForAllEncodings ('\x0c', "\\f");
|
||||
expectEscapeSequenceForAllEncodings ('\x0d', "\\r");
|
||||
expectEscapeSequenceForAllEncodings ('\x0e', "\\u000e");
|
||||
expectEscapeSequenceForAllEncodings ('\x0f', "\\u000f");
|
||||
expectEscapeSequenceForAllEncodings ('\x10', "\\u0010");
|
||||
expectEscapeSequenceForAllEncodings ('\x11', "\\u0011");
|
||||
expectEscapeSequenceForAllEncodings ('\x12', "\\u0012");
|
||||
expectEscapeSequenceForAllEncodings ('\x13', "\\u0013");
|
||||
expectEscapeSequenceForAllEncodings ('\x14', "\\u0014");
|
||||
expectEscapeSequenceForAllEncodings ('\x15', "\\u0015");
|
||||
expectEscapeSequenceForAllEncodings ('\x16', "\\u0016");
|
||||
expectEscapeSequenceForAllEncodings ('\x17', "\\u0017");
|
||||
expectEscapeSequenceForAllEncodings ('\x18', "\\u0018");
|
||||
expectEscapeSequenceForAllEncodings ('\x19', "\\u0019");
|
||||
expectEscapeSequenceForAllEncodings ('\x1a', "\\u001a");
|
||||
expectEscapeSequenceForAllEncodings ('\x1b', "\\u001b");
|
||||
expectEscapeSequenceForAllEncodings ('\x1c', "\\u001c");
|
||||
expectEscapeSequenceForAllEncodings ('\x1d', "\\u001d");
|
||||
expectEscapeSequenceForAllEncodings ('\x1e', "\\u001e");
|
||||
expectEscapeSequenceForAllEncodings ('\x1f', "\\u001f");
|
||||
}
|
||||
|
||||
beginTest ("Only special ASCII characters are escaped");
|
||||
{
|
||||
for (juce_wchar c = 32; CharacterFunctions::isAscii (c); ++c)
|
||||
{
|
||||
if (c != '"')
|
||||
expectEscapeSequenceForAllEncodings ('"', R"(\")");
|
||||
else if (c != '\\')
|
||||
expectEscapeSequenceForAllEncodings ('\\', R"(\\)");
|
||||
else
|
||||
expectNoEscapeSequence (c);
|
||||
}
|
||||
}
|
||||
|
||||
beginTest ("Unicode characters are escaped for ASCII encoding only");
|
||||
{
|
||||
// First and last 2 byte UTF-8 code points
|
||||
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0x0080, "\\u0080");
|
||||
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0x07FF, "\\u07ff");
|
||||
|
||||
// First and last 3 byte UTF-8 code points
|
||||
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0x0800, "\\u0800");
|
||||
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0xffff, "\\uffff");
|
||||
|
||||
// Code points at the UTF-16 surrogate boundaries
|
||||
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0xd7ff, "\\ud7ff");
|
||||
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0xe000, "\\ue000");
|
||||
|
||||
// First and last 4 byte UTF-8 code points (also first and last UTF-16 surrogate pairs)
|
||||
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0x010000, "\\ud800\\udc00");
|
||||
expectEscapeSequenceForAsciiEncodingOnly ((juce_wchar) 0x10ffff, "\\udbff\\udfff");
|
||||
}
|
||||
|
||||
beginTest ("Fuzz tests");
|
||||
{
|
||||
auto r = getRandom();
|
||||
|
||||
expect (JSON::parse (String()) == var());
|
||||
|
|
@ -681,29 +858,6 @@ public:
|
|||
expect (asString.isNotEmpty() && parsedString == asString);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
beginTest ("Float formatting");
|
||||
|
||||
std::map<double, String> tests;
|
||||
tests[1] = "1.0";
|
||||
tests[1.1] = "1.1";
|
||||
tests[1.01] = "1.01";
|
||||
tests[0.76378] = "0.76378";
|
||||
tests[-10] = "-10.0";
|
||||
tests[10.01] = "10.01";
|
||||
tests[0.0123] = "0.0123";
|
||||
tests[-3.7e-27] = "-3.7e-27";
|
||||
tests[1e+40] = "1.0e40";
|
||||
tests[-12345678901234567.0] = "-1.234567890123457e16";
|
||||
tests[192000] = "192000.0";
|
||||
tests[1234567] = "1.234567e6";
|
||||
tests[0.00006] = "0.00006";
|
||||
tests[0.000006] = "6.0e-6";
|
||||
|
||||
for (auto& test : tests)
|
||||
expectEquals (JSON::toString (test.first), test.second);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -107,6 +107,12 @@ public:
|
|||
multiLine, ///< Newlines and spaces will be included in the output, in order to make it easy to read for humans
|
||||
};
|
||||
|
||||
enum class Encoding
|
||||
{
|
||||
utf8, ///< Use UTF-8 avoiding escape sequences for non-ASCII characters, this is the default behaviour
|
||||
ascii, ///< Use ASCII characters only, unicode characters will be encoded using UTF-16 escape sequences
|
||||
};
|
||||
|
||||
/**
|
||||
Allows formatting var objects as JSON with various configurable options.
|
||||
*/
|
||||
|
|
@ -114,17 +120,34 @@ public:
|
|||
{
|
||||
public:
|
||||
/** Returns a copy of this Formatter with the specified spacing. */
|
||||
FormatOptions withSpacing (Spacing x) const { return withMember (*this, &FormatOptions::spacing, x); }
|
||||
FormatOptions withSpacing (Spacing x) const
|
||||
{
|
||||
return withMember (*this, &FormatOptions::spacing, x);
|
||||
}
|
||||
|
||||
/** Returns a copy of this Formatter with the specified maximum number of decimal places.
|
||||
This option determines the precision of floating point numbers in scientific notation.
|
||||
*/
|
||||
FormatOptions withMaxDecimalPlaces (int x) const { return withMember (*this, &FormatOptions::maxDecimalPlaces, x); }
|
||||
FormatOptions withMaxDecimalPlaces (int x) const
|
||||
{
|
||||
return withMember (*this, &FormatOptions::maxDecimalPlaces, x);
|
||||
}
|
||||
|
||||
/** Returns a copy of this Formatter with the specified indent level.
|
||||
This should only be necessary when serialising multiline nested types.
|
||||
*/
|
||||
FormatOptions withIndentLevel (int x) const { return withMember (*this, &FormatOptions::indent, x); }
|
||||
FormatOptions withIndentLevel (int x) const
|
||||
{
|
||||
return withMember (*this, &FormatOptions::indent, x);
|
||||
}
|
||||
|
||||
/** Returns a copy of this Formatter with the specified encoding.
|
||||
Use this to force a JSON to be ASCII characters only.
|
||||
*/
|
||||
FormatOptions withEncoding (Encoding x) const
|
||||
{
|
||||
return withMember (*this, &FormatOptions::encoding, x);
|
||||
}
|
||||
|
||||
/** Returns the spacing used by this Formatter. */
|
||||
Spacing getSpacing() const { return spacing; }
|
||||
|
|
@ -135,8 +158,12 @@ public:
|
|||
/** Returns the indent level of this Formatter. */
|
||||
int getIndentLevel() const { return indent; }
|
||||
|
||||
/** Returns the encoding of this Formatter. */
|
||||
Encoding getEncoding() const { return encoding; }
|
||||
|
||||
private:
|
||||
Spacing spacing = Spacing::multiLine;
|
||||
Encoding encoding = Encoding::utf8;
|
||||
int maxDecimalPlaces = 15;
|
||||
int indent = 0;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue