1
0
Fork 0
mirror of https://github.com/juce-framework/JUCE.git synced 2026-01-27 02:20:05 +00:00

Javascript: Move javascript implementation into a separate module

This commit is contained in:
Anthony Nicholls 2024-11-05 13:50:57 +00:00
parent 637226addc
commit df6f3f8e28
69 changed files with 941 additions and 1351 deletions

View file

@ -0,0 +1,397 @@
//
// ██████ ██  ██  ██████  ██████
// ██      ██  ██ ██    ██ ██       ** Classy Header-Only Classes **
// ██  ███████ ██  ██ ██
// ██  ██   ██ ██  ██ ██ https://github.com/Tracktion/choc
//  ██████ ██  ██  ██████   ██████
//
// CHOC is (C)2022 Tracktion Corporation, and is offered under the terms of the ISC license:
//
// Permission to use, copy, modify, and/or distribute this software for any purpose with or
// without fee is hereby granted, provided that the above copyright notice and this permission
// notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
// CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
// WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#ifndef CHOC_FLOAT_TO_STRING_HEADER_INCLUDED
#define CHOC_FLOAT_TO_STRING_HEADER_INCLUDED
#include <cstring>
#include <string>
#include "../math/choc_MathHelpers.h"
namespace choc::text
{
//==============================================================================
/** Converts a 32-bit float to an accurate, round-trip-safe string.
The algorithm used is "Grisu3" from the paper "Printing Floating-Point Numbers
Quickly and Accurately with Integers" by Florian Loitsch.
*/
std::string floatToString (float value);
/** Converts a 64-bit double to an accurate, round-trip-safe string.
The algorithm used is "Grisu3" from the paper "Printing Floating-Point Numbers
Quickly and Accurately with Integers" by Florian Loitsch.
*/
std::string floatToString (double value);
//==============================================================================
/** Converts a 32-bit float to an accurate, round-trip-safe string.
If maxDecimalPlaces is -1, a default is used.
If omitDecimalPointForRoundNumbers is true, then values such as "2.0" are returned
without the decimal point, e.g. simply "2".
The algorithm used is "Grisu3" from the paper "Printing Floating-Point Numbers
Quickly and Accurately with Integers" by Florian Loitsch.
*/
std::string floatToString (float value, int maxDecimalPlaces, bool omitDecimalPointForRoundNumbers = false);
/** Converts a 64-bit double to an accurate, round-trip-safe string.
If maxDecimalPlaces is -1, a default is used.
If omitDecimalPointForRoundNumbers is true, then values such as "2.0" are returned
without the decimal point, e.g. simply "2".
The algorithm used is "Grisu3" from the paper "Printing Floating-Point Numbers
Quickly and Accurately with Integers" by Florian Loitsch.
*/
std::string floatToString (double value, int maxDecimalPlaces, bool omitDecimalPointForRoundNumbers = false);
//==============================================================================
/** Helper class containing its own buffer for converting a float or double to a string.
The algorithm is "Grisu3" from the paper "Printing Floating-Point Numbers
Quickly and Accurately with Integers" by Florian Loitsch.
To use, just construct a FloatToStringBuffer with the value, and use its begin()/end()
methods to iterate the result. Or use the floatToString() functions to just convert a
value directly to a std::string.
*/
template <typename FloatOrDouble>
struct FloatToStringBuffer
{
FloatToStringBuffer (FloatOrDouble value, int maxDecimalPlaces, bool omitPointIfPossible)
: stringEnd (writeAndGetEnd (storage, value, maxDecimalPlaces, omitPointIfPossible)) {}
const char* begin() const { return storage; }
const char* end() const { return stringEnd; }
std::string toString() const { return std::string (begin(), end()); }
private:
//==============================================================================
static_assert (std::is_same<const float, const FloatOrDouble>::value || std::is_same<const double, const FloatOrDouble>::value,
"This class can only handle float or double template types");
char storage[32];
const char* stringEnd;
struct MantissaAndExponent
{
uint64_t mantissa;
int32_t exponent;
static constexpr MantissaAndExponent create (uint64_t floatBits, uint64_t significand)
{
constexpr int exponentBias = (sizeof (FloatOrDouble) == 8 ? 0x3ff : 0x7f) + numSignificandBits;
auto explonentPlusBias = static_cast<int> ((floatBits & exponentMask) >> numSignificandBits);
return explonentPlusBias == 0 ? MantissaAndExponent { significand, 1 - exponentBias }
: MantissaAndExponent { significand + hiddenBit, explonentPlusBias - exponentBias };
}
constexpr MantissaAndExponent operator* (MantissaAndExponent rhs) const
{
auto mantissaProduct = math::multiply128 (mantissa, rhs.mantissa);
return { mantissaProduct.high + (mantissaProduct.low >> 63), exponent + rhs.exponent + 64 };
}
constexpr MantissaAndExponent shiftedUp (int numBits) const { return { mantissa << numBits, exponent - numBits }; }
constexpr MantissaAndExponent normalized() const { return shiftedUp (static_cast<int> (math::countUpperClearBits (mantissa))); }
};
static uint32_t generateDigits (char* buffer, MantissaAndExponent upperBound, uint64_t mantissaDiff, uint64_t delta, int& K)
{
uint32_t length = 0;
const auto one = MantissaAndExponent { 1ull << -upperBound.exponent, upperBound.exponent };
auto p1 = static_cast<uint32_t> (upperBound.mantissa >> -one.exponent);
auto p2 = upperBound.mantissa & (one.mantissa - 1);
auto numDigits = math::getNumDecimalDigits (p1);
for (;;)
{
auto digit = p1;
switch (--numDigits)
{
case 0: p1 = 0; break;
case 1: digit /= powersOf10[1]; p1 %= powersOf10[1]; break;
case 2: digit /= powersOf10[2]; p1 %= powersOf10[2]; break;
case 3: digit /= powersOf10[3]; p1 %= powersOf10[3]; break;
case 4: digit /= powersOf10[4]; p1 %= powersOf10[4]; break;
case 5: digit /= powersOf10[5]; p1 %= powersOf10[5]; break;
case 6: digit /= powersOf10[6]; p1 %= powersOf10[6]; break;
case 7: digit /= powersOf10[7]; p1 %= powersOf10[7]; break;
case 8: digit /= powersOf10[8]; p1 %= powersOf10[8]; break;
default: break;
}
writeDigitIfNotLeadingZero (buffer, length, digit);
auto rest = p2 + (static_cast<uint64_t> (p1) << -one.exponent);
if (rest <= delta)
{
K += numDigits;
roundFinalDigit (buffer, length, delta, rest, static_cast<uint64_t> (powersOf10[numDigits]) << -one.exponent, mantissaDiff);
return length;
}
if (numDigits == 0)
{
for (;;)
{
delta *= 10;
p2 *= 10;
--numDigits;
writeDigitIfNotLeadingZero (buffer, length, static_cast<uint32_t> (p2 >> -one.exponent));
p2 &= one.mantissa - 1;
if (p2 < delta)
{
K += numDigits;
roundFinalDigit (buffer, length, delta, p2, one.mantissa, numDigits > -9 ? mantissaDiff * powersOf10[-numDigits] : 0);
return length;
}
}
}
}
}
static void roundFinalDigit (char* buffer, uint32_t length, uint64_t delta, uint64_t rest, uint64_t tenToPowerNumDigits, uint64_t diff)
{
while (rest < diff && delta - rest >= tenToPowerNumDigits
&& (rest + tenToPowerNumDigits < diff || diff - rest > rest + tenToPowerNumDigits - diff))
{
--(buffer[length - 1]);
rest += tenToPowerNumDigits;
}
}
[[nodiscard]] static char* write (char* dest, char c) { *dest = c; return dest + 1; }
template <typename... Chars> static char* write (char* dest, char first, Chars... others) { return write (write (dest, first), others...); }
[[nodiscard]] static char* writeDigit (char* dest, int digit) { return write (dest, static_cast<char> (digit + '0')); }
template <typename... Chars> static char* writeDigit (char* dest, int d, Chars... others) { return writeDigit (writeDigit (dest, d), others...); }
[[nodiscard]] static char* writeZero (char* dest) { return write (dest, '0', '.', '0'); }
[[nodiscard]] static char* writeExponent (char* dest, int e) { return writeShortInteger (write (dest, 'e'), e); }
static void writeDigitIfNotLeadingZero (char* dest, uint32_t& length, uint32_t digit) { if (digit != 0 || length != 0) dest[length++] = static_cast<char> (digit + '0'); }
[[nodiscard]] static char* writeShortInteger (char* dest, int n)
{
if (n < 0) return writeShortInteger (write (dest, '-'), -n);
if (n >= 100) return writeDigit (dest, n / 100, (n / 10) % 10, n % 10);
if (n >= 10) return writeDigit (dest, n / 10, n % 10);
return writeDigit (dest, n);
}
static void insertChar (char* dest, uint32_t length, char charToInsert, uint32_t numRepetitions)
{
std::memmove (dest + numRepetitions, dest, (size_t) length);
for (uint32_t i = 0; i < numRepetitions; ++i)
dest[i] = charToInsert;
}
static char* writeAsExponentNotation (char* dest, uint32_t totalLength, int exponent)
{
if (totalLength == 1)
return writeExponent (dest + 1, exponent);
insertChar (dest + 1, totalLength - 1, '.', 1);
while (dest[totalLength] == '0' && totalLength > 2)
--totalLength;
return writeExponent (dest + (totalLength + 1), exponent);
}
static char* writeWithoutExponentLessThan1 (char* dest, uint32_t length, int mantissaDigits, int maxDecimalPlaces)
{
auto numPaddingZeros = static_cast<uint32_t> (2 - mantissaDigits);
insertChar (dest, length, '0', numPaddingZeros);
dest[1] = '.';
if (static_cast<int> (length) > maxDecimalPlaces + mantissaDigits)
{
for (int i = maxDecimalPlaces + 1; i > 2; --i)
if (dest[i] != '0')
return dest + (i + 1);
return dest + 3;
}
length += numPaddingZeros;
while (dest[length - 1] == '0' && length > 3)
--length;
return dest + length;
}
static char* writeWithoutExponentGreaterThan1 (char* dest, uint32_t totalLength, uint32_t mantissaLength, int maxDecimalPlaces, int K)
{
if (K >= 0)
{
dest += totalLength;
for (auto i = totalLength; i < mantissaLength; ++i)
dest = write (dest, '0');
return write (dest, '.', '0');
}
insertChar (dest + mantissaLength, totalLength - mantissaLength, '.', 1);
if (K + maxDecimalPlaces >= 0)
return dest + (totalLength + 1);
for (auto i = static_cast<int> (mantissaLength) + maxDecimalPlaces; i > static_cast<int> (mantissaLength + 1); --i)
if (dest[i] != '0')
return dest + (i + 1);
return dest + (mantissaLength + 2);
}
struct Limits
{
constexpr Limits (MantissaAndExponent value)
{
upper = { (value.mantissa << 1) + 1, value.exponent - 1 };
while ((upper.mantissa & (hiddenBit << 1)) == 0)
upper = upper.shiftedUp (1);
upper = upper.shiftedUp (static_cast<int> (sizeof (upper.mantissa) * 8 - numSignificandBits - 2));
lower = value.mantissa == hiddenBit ? MantissaAndExponent { (value.mantissa << 2) - 1, value.exponent - 2 }
: MantissaAndExponent { (value.mantissa << 1) - 1, value.exponent - 1 };
lower.mantissa <<= lower.exponent - upper.exponent;
lower.exponent = upper.exponent;
}
MantissaAndExponent lower, upper;
};
static const char* writeAndGetEnd (char* pos, FloatOrDouble value, int maxDecimalPlaces, bool omitPointIfPossible)
{
auto startPos = pos;
auto floatBits = getFloatBits (value);
if ((floatBits & signMask) == 0)
{
if (isZero (floatBits)) return writeZero (pos);
}
else
{
pos = write (pos, '-');
if (isZero (floatBits)) return writeZero (pos);
value = -value;
floatBits &= ~signMask;
}
if (floatBits == nanBits) return write (pos, 'n', 'a', 'n');
if (floatBits == infBits) return write (pos, 'i', 'n', 'f');
auto v = MantissaAndExponent::create (floatBits, floatBits & significandMask);
Limits limits (v);
int K;
auto powerOf10 = createPowerOf10 (limits.upper.exponent, K);
auto w = powerOf10 * v.normalized();
auto upperBound = powerOf10 * limits.upper;
upperBound.mantissa--;
auto lowerBound = powerOf10 * limits.lower;
lowerBound.mantissa++;
auto totalLength = generateDigits (pos, upperBound, upperBound.mantissa - w.mantissa, upperBound.mantissa - lowerBound.mantissa, K);
auto end = addDecimalPointAndExponent (pos, totalLength, K, maxDecimalPlaces < 0 ? defaultNumDecimalPlaces : maxDecimalPlaces);
if (omitPointIfPossible && end > startPos + 1 && end[-1] == '0' && end[-2] == '.')
end -= 2;
return end;
}
static const char* addDecimalPointAndExponent (char* pos, uint32_t totalLength, int K, int maxDecimalPlaces)
{
auto mantissaDigits = static_cast<int> (totalLength) + K;
if (mantissaDigits < -maxDecimalPlaces) return writeZero (pos);
if (mantissaDigits <= 0 && mantissaDigits > -6) return writeWithoutExponentLessThan1 (pos, totalLength, mantissaDigits, maxDecimalPlaces);
if (mantissaDigits > 0 && mantissaDigits <= 21) return writeWithoutExponentGreaterThan1 (pos, totalLength, static_cast<uint32_t> (mantissaDigits), maxDecimalPlaces, K);
return writeAsExponentNotation (pos, totalLength, mantissaDigits - 1);
}
static uint64_t getFloatBits (double value) { uint64_t i; memcpy (&i, &value, sizeof (i)); return i; }
static uint64_t getFloatBits (float value) { uint32_t i; memcpy (&i, &value, sizeof (i)); return i; }
static bool isZero (uint64_t floatBits) { return (floatBits & (exponentMask | significandMask)) == 0; }
static constexpr int defaultNumDecimalPlaces = 324;
static constexpr int numSignificandBits = sizeof (FloatOrDouble) == 8 ? 52 : 23;
static constexpr uint64_t signMask = 1ull << (sizeof (FloatOrDouble) * 8 - 1);
static constexpr uint64_t hiddenBit = 1ull << numSignificandBits;
static constexpr uint64_t significandMask = hiddenBit - 1;
static constexpr uint64_t exponentMask = sizeof (FloatOrDouble) == 8 ? 0x7ff0000000000000ull : 0x7f800000ull;
static constexpr uint64_t nanBits = sizeof (FloatOrDouble) == 8 ? 0x7ff8000000000000ull : 0x7fc00000ull;
static constexpr uint64_t infBits = sizeof (FloatOrDouble) == 8 ? 0x7ff0000000000000ull : 0x7f800000ull;
static constexpr uint32_t powersOf10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
static MantissaAndExponent createPowerOf10 (int exponentBase2, int& K)
{
static constexpr MantissaAndExponent powerOf10List[] =
{
{ 0xfa8fd5a0081c0288ull, -1220 }, { 0xbaaee17fa23ebf76ull, -1193 }, { 0x8b16fb203055ac76ull, -1166 }, { 0xcf42894a5dce35eaull, -1140 }, { 0x9a6bb0aa55653b2dull, -1113 },
{ 0xe61acf033d1a45dfull, -1087 }, { 0xab70fe17c79ac6caull, -1060 }, { 0xff77b1fcbebcdc4full, -1034 }, { 0xbe5691ef416bd60cull, -1007 }, { 0x8dd01fad907ffc3cull, -980 },
{ 0xd3515c2831559a83ull, -954 }, { 0x9d71ac8fada6c9b5ull, -927 }, { 0xea9c227723ee8bcbull, -901 }, { 0xaecc49914078536dull, -874 }, { 0x823c12795db6ce57ull, -847 },
{ 0xc21094364dfb5637ull, -821 }, { 0x9096ea6f3848984full, -794 }, { 0xd77485cb25823ac7ull, -768 }, { 0xa086cfcd97bf97f4ull, -741 }, { 0xef340a98172aace5ull, -715 },
{ 0xb23867fb2a35b28eull, -688 }, { 0x84c8d4dfd2c63f3bull, -661 }, { 0xc5dd44271ad3cdbaull, -635 }, { 0x936b9fcebb25c996ull, -608 }, { 0xdbac6c247d62a584ull, -582 },
{ 0xa3ab66580d5fdaf6ull, -555 }, { 0xf3e2f893dec3f126ull, -529 }, { 0xb5b5ada8aaff80b8ull, -502 }, { 0x87625f056c7c4a8bull, -475 }, { 0xc9bcff6034c13053ull, -449 },
{ 0x964e858c91ba2655ull, -422 }, { 0xdff9772470297ebdull, -396 }, { 0xa6dfbd9fb8e5b88full, -369 }, { 0xf8a95fcf88747d94ull, -343 }, { 0xb94470938fa89bcfull, -316 },
{ 0x8a08f0f8bf0f156bull, -289 }, { 0xcdb02555653131b6ull, -263 }, { 0x993fe2c6d07b7facull, -236 }, { 0xe45c10c42a2b3b06ull, -210 }, { 0xaa242499697392d3ull, -183 },
{ 0xfd87b5f28300ca0eull, -157 }, { 0xbce5086492111aebull, -130 }, { 0x8cbccc096f5088ccull, -103 }, { 0xd1b71758e219652cull, -77 }, { 0x9c40000000000000ull, -50 },
{ 0xe8d4a51000000000ull, -24 }, { 0xad78ebc5ac620000ull, 3 }, { 0x813f3978f8940984ull, 30 }, { 0xc097ce7bc90715b3ull, 56 }, { 0x8f7e32ce7bea5c70ull, 83 },
{ 0xd5d238a4abe98068ull, 109 }, { 0x9f4f2726179a2245ull, 136 }, { 0xed63a231d4c4fb27ull, 162 }, { 0xb0de65388cc8ada8ull, 189 }, { 0x83c7088e1aab65dbull, 216 },
{ 0xc45d1df942711d9aull, 242 }, { 0x924d692ca61be758ull, 269 }, { 0xda01ee641a708deaull, 295 }, { 0xa26da3999aef774aull, 322 }, { 0xf209787bb47d6b85ull, 348 },
{ 0xb454e4a179dd1877ull, 375 }, { 0x865b86925b9bc5c2ull, 402 }, { 0xc83553c5c8965d3dull, 428 }, { 0x952ab45cfa97a0b3ull, 455 }, { 0xde469fbd99a05fe3ull, 481 },
{ 0xa59bc234db398c25ull, 508 }, { 0xf6c69a72a3989f5cull, 534 }, { 0xb7dcbf5354e9beceull, 561 }, { 0x88fcf317f22241e2ull, 588 }, { 0xcc20ce9bd35c78a5ull, 614 },
{ 0x98165af37b2153dfull, 641 }, { 0xe2a0b5dc971f303aull, 667 }, { 0xa8d9d1535ce3b396ull, 694 }, { 0xfb9b7cd9a4a7443cull, 720 }, { 0xbb764c4ca7a44410ull, 747 },
{ 0x8bab8eefb6409c1aull, 774 }, { 0xd01fef10a657842cull, 800 }, { 0x9b10a4e5e9913129ull, 827 }, { 0xe7109bfba19c0c9dull, 853 }, { 0xac2820d9623bf429ull, 880 },
{ 0x80444b5e7aa7cf85ull, 907 }, { 0xbf21e44003acdd2dull, 933 }, { 0x8e679c2f5e44ff8full, 960 }, { 0xd433179d9c8cb841ull, 986 }, { 0x9e19db92b4e31ba9ull, 1013 },
{ 0xeb96bf6ebadf77d9ull, 1039 }, { 0xaf87023b9bf0ee6bull, 1066 }
};
auto dk = (exponentBase2 + 61) * -0.30102999566398114;
auto ik = static_cast<int> (dk);
auto index = ((ik + (dk > ik ? 348 : 347)) >> 3) + 1;
K = 348 - (index << 3);
return powerOf10List[index];
}
};
inline std::string floatToString (float value) { return FloatToStringBuffer<float> (value, -1, false).toString(); }
inline std::string floatToString (double value) { return FloatToStringBuffer<double> (value, -1, false).toString(); }
inline std::string floatToString (float value, int maxDecimals, bool omitPointIfPossible) { return FloatToStringBuffer<float> (value, maxDecimals, omitPointIfPossible).toString(); }
inline std::string floatToString (double value, int maxDecimals, bool omitPointIfPossible) { return FloatToStringBuffer<double> (value, maxDecimals, omitPointIfPossible).toString(); }
} // namespace choc::text
#endif

View file

@ -0,0 +1,561 @@
//
// ██████ ██  ██  ██████  ██████
// ██      ██  ██ ██    ██ ██       ** Classy Header-Only Classes **
// ██  ███████ ██  ██ ██
// ██  ██   ██ ██  ██ ██ https://github.com/Tracktion/choc
//  ██████ ██  ██  ██████   ██████
//
// CHOC is (C)2022 Tracktion Corporation, and is offered under the terms of the ISC license:
//
// Permission to use, copy, modify, and/or distribute this software for any purpose with or
// without fee is hereby granted, provided that the above copyright notice and this permission
// notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
// CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
// WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#ifndef CHOC_JSON_HEADER_INCLUDED
#define CHOC_JSON_HEADER_INCLUDED
#include <limits>
#include <sstream>
#include <string_view>
#include <stdexcept>
#include "choc_UTF8.h"
#include "choc_FloatToString.h"
#include "../containers/choc_Value.h"
#undef max // It's never a smart idea to include any C headers before your C++ ones, as it
#undef min // risks polluting your namespace with all kinds of dangerous macros like these ones.
namespace choc::json
{
//==============================================================================
/// A parse exception, thrown by choc::json::parse() as needed.
struct ParseError : public std::runtime_error
{
ParseError (const char* message, choc::text::LineAndColumn lc)
: std::runtime_error (message), lineAndColumn (lc) {}
choc::text::LineAndColumn lineAndColumn;
};
/// Parses some JSON text into a choc::value::Value object, using the given pool.
/// Any errors will result in a ParseError exception being thrown.
[[nodiscard]] value::Value parse (text::UTF8Pointer);
/// Parses some JSON text into a choc::value::Value object, using the given pool.
/// Any errors will result in a ParseError exception being thrown.
[[nodiscard]] value::Value parse (std::string_view);
/// Attempts to parse a bare JSON value such as a number, string, object etc
[[nodiscard]] value::Value parseValue (std::string_view);
/// A helper function to create a JSON-friendly Value object with a set of properties.
/// The argument list must be contain pairs of names and values, e.g.
///
/// auto myObject = choc::json::create ("property1", 1234,
/// "property2", "hello",
/// "property3", 100.0f);
///
/// Essentially, this is a shorthand for calling choc::value::createObject()
/// and passing it an empty type name.
template <typename... Properties>
[[nodiscard]] value::Value create (Properties&&... propertyNamesAndValues);
//==============================================================================
/// Formats a value as a JSON string.
/// If useLineBreaks is true, it'll be formatted as multi-line JSON, if false it'll
/// just be returned as a single line.
[[nodiscard]] std::string toString (const value::ValueView&, bool useLineBreaks = false);
/// Writes a version of a string to an output stream, with any illegal or non-ascii
/// written as their equivalent JSON escape sequences.
template <typename OutputStreamType>
void writeWithEscapeCharacters (OutputStreamType&, text::UTF8Pointer sourceString);
/// Returns a version of a string with illegal or non-ascii converted into the
/// equivalent JSON escape sequences.
[[nodiscard]] std::string addEscapeCharacters (text::UTF8Pointer sourceString);
/// Returns a version of a string with illegal or non-ascii converted into the
/// equivalent JSON escape sequences.
[[nodiscard]] std::string addEscapeCharacters (std::string_view sourceString);
/// Returns a version of a string with illegal or non-ascii converted into the
/// equivalent JSON escape sequences.
[[nodiscard]] std::string getEscapedQuotedString (std::string_view sourceString);
/// Converts a double to a JSON-format string representation.
std::string doubleToString (double value);
//==============================================================================
// _ _ _ _
// __| | ___ | |_ __ _ (_)| | ___
// / _` | / _ \| __| / _` || || |/ __|
// | (_| || __/| |_ | (_| || || |\__ \ _ _ _
// \__,_| \___| \__| \__,_||_||_||___/(_)(_)(_)
//
// Code beyond this point is implementation detail...
//
//==============================================================================
template <typename OutputStreamType>
void writeWithEscapeCharacters (OutputStreamType& out, text::UTF8Pointer source)
{
auto writeUnicode = [] (OutputStreamType& o, auto digit)
{
auto hexDigit = [] (auto value) -> char { return "0123456789abcdef"[value & 15]; };
o << "\\u" << hexDigit (digit >> 12) << hexDigit (digit >> 8) << hexDigit (digit >> 4) << hexDigit (digit);
};
for (;;)
{
auto c = *source;
switch (c)
{
case 0: return;
case '\"': out << "\\\""; break;
case '\\': out << "\\\\"; break;
case '\n': out << "\\n"; break;
case '\r': out << "\\r"; break;
case '\t': out << "\\t"; break;
case '\a': out << "\\a"; break;
case '\b': out << "\\b"; break;
case '\f': out << "\\f"; break;
default:
if (c > 31 && c < 127)
{
out << (char) c;
break;
}
if (c >= 0x10000)
{
auto pair = choc::text::splitCodePointIntoSurrogatePair (c);
writeUnicode (out, pair.high);
writeUnicode (out, pair.low);
break;
}
writeUnicode (out, c);
break;
}
++source;
}
}
inline std::string addEscapeCharacters (text::UTF8Pointer source)
{
std::ostringstream result (std::ios::binary);
writeWithEscapeCharacters (result, source);
return result.str();
}
inline std::string addEscapeCharacters (std::string_view source)
{
return addEscapeCharacters (text::UTF8Pointer (std::string (source).c_str()));
}
inline std::string getEscapedQuotedString (std::string_view s)
{
std::ostringstream result (std::ios::binary);
result << '"';
writeWithEscapeCharacters (result, text::UTF8Pointer (std::string (s).c_str()));
result << '"';
return result.str();
}
inline std::string doubleToString (double value)
{
if (std::isfinite (value)) return choc::text::floatToString (value, -1, true);
if (std::isnan (value)) return "\"NaN\"";
return value >= 0 ? "\"Infinity\""
: "\"-Infinity\"";
}
//==============================================================================
template <typename Stream>
struct Writer
{
Stream& out;
uint32_t indentSize, currentIndent = 0;
static constexpr const char newLine = '\n';
std::string getIndent() const { return std::string (currentIndent, ' '); }
void startIndent() { currentIndent += indentSize; out << newLine << getIndent(); }
void endIndent() { currentIndent -= indentSize; out << newLine << getIndent(); }
void dump (const value::ValueView& v)
{
if (v.isVoid()) { out << "null"; return; }
if (v.isString()) { out << getEscapedQuotedString (v.getString()); return; }
if (v.isBool()) { out << (v.getBool() ? "true" : "false"); return; }
if (v.isFloat()) { out << doubleToString (v.get<double>()); return; }
if (v.isInt()) { out << v.get<int64_t>(); return; }
if (v.isObject()) return dumpObject (v);
if (v.isArray() || v.isVector()) return dumpArrayOrVector (v);
}
void dumpArrayOrVector (const value::ValueView& v)
{
out << '[';
auto numElements = v.size();
if (indentSize != 0 && numElements != 0)
{
startIndent();
for (uint32_t i = 0; i < numElements; ++i)
{
dump (v[i]);
if (i != numElements - 1)
out << "," << newLine << getIndent();
}
endIndent();
}
else
{
for (uint32_t i = 0; i < numElements; ++i)
{
if (i != 0) out << ", ";
dump (v[i]);
}
}
out << ']';
}
void dumpObject (const value::ValueView& object)
{
out << '{';
auto numMembers = object.size();
if (indentSize != 0 && numMembers != 0)
{
startIndent();
for (uint32_t i = 0; i < numMembers; ++i)
{
auto member = object.getObjectMemberAt (i);
out << getEscapedQuotedString (member.name) << ": ";
dump (member.value);
if (i != numMembers - 1)
out << "," << newLine << getIndent();
}
endIndent();
}
else
{
for (uint32_t i = 0; i < numMembers; ++i)
{
if (i != 0) out << ", ";
auto member = object.getObjectMemberAt (i);
out << getEscapedQuotedString (member.name) << ": ";
dump (member.value);
}
}
out << '}';
}
};
template <typename Stream>
void writeAsJSON (Stream& output, const value::ValueView& value, bool useMultipleLines)
{
Writer<Stream> { output, useMultipleLines ? 2u : 0u }.dump (value);
}
inline std::string toString (const value::ValueView& v, bool useLineBreaks)
{
std::ostringstream out (std::ios::binary);
writeAsJSON (out, v, useLineBreaks);
return out.str();
}
//==============================================================================
[[noreturn]] static inline void throwParseError (const char* error, text::UTF8Pointer source, text::UTF8Pointer errorPos)
{
throw ParseError (error, text::findLineAndColumn (source, errorPos));
}
inline value::Value parse (text::UTF8Pointer text, bool parseBareValue)
{
struct Parser
{
text::UTF8Pointer source, current;
bool isEOF() const { return current.empty(); }
uint32_t peek() const { return *current; }
uint32_t pop() { return current.popFirstChar(); }
bool popIf (char c) { return current.skipIfStartsWith (c); }
bool popIf (const char* c) { return current.skipIfStartsWith (c); }
static bool isWhitespace (uint32_t c) { return c == ' ' || (c <= 13 && c >= 9); }
void skipWhitespace() { auto p = current; while (isWhitespace (p.popFirstChar())) current = p; }
[[noreturn]] void throwError (const char* error, text::UTF8Pointer errorPos) { throwParseError (error, source, errorPos); }
[[noreturn]] void throwError (const char* error) { throwError (error, current); }
value::Value parseTopLevel()
{
skipWhitespace();
if (popIf ('[')) return parseArray();
if (popIf ('{')) return parseObject();
if (! isEOF()) throwError ("Expected an object or array");
return {};
}
value::Value parseArray()
{
auto result = value::createEmptyArray();
auto arrayStart = current;
skipWhitespace();
if (popIf (']')) return result;
for (;;)
{
skipWhitespace();
if (isEOF()) throwError ("Unexpected EOF in array declaration", arrayStart);
result.addArrayElement (parseValue());
skipWhitespace();
if (popIf (',')) continue;
if (popIf (']')) break;
throwError ("Expected ',' or ']'");
}
return result;
}
value::Value parseObject()
{
auto result = value::createObject ({});
auto objectStart = current;
skipWhitespace();
if (popIf ('}')) return result;
for (;;)
{
skipWhitespace();
if (isEOF()) throwError ("Unexpected EOF in object declaration", objectStart);
if (! popIf ('"')) throwError ("Expected a name");
auto errorPos = current;
auto name = parseString();
if (name.empty())
throwError ("Property names cannot be empty", errorPos);
skipWhitespace();
errorPos = current;
if (! popIf (':')) throwError ("Expected ':'");
result.addMember (std::move (name), parseValue());
skipWhitespace();
if (popIf (',')) continue;
if (popIf ('}')) break;
throwError ("Expected ',' or '}'");
}
return result;
}
value::Value parseValue()
{
skipWhitespace();
auto startPos = current;
switch (pop())
{
case '[': return parseArray();
case '{': return parseObject();
case '"': return value::createString (parseString());
case '-': skipWhitespace(); return parseNumber (true);
case 'n': if (popIf ("ull")) return {}; break;
case 't': if (popIf ("rue")) return value::createBool (true); break;
case 'f': if (popIf ("alse")) return value::createBool (false); break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
current = startPos;
return parseNumber (false);
default: break;
}
throwError ("Syntax error", startPos);
}
value::Value parseNumber (bool negate)
{
auto startPos = current;
bool hadDot = false, hadExponent = false;
for (;;)
{
auto lastPos = current;
auto c = pop();
if (c >= '0' && c <= '9') continue;
if (c == '.' && ! hadDot) { hadDot = true; continue; }
if (! hadExponent && (c == 'e' || c == 'E'))
{
hadDot = true;
hadExponent = true;
popIf ('-');
continue;
}
if (isWhitespace (c) || c == ',' || c == '}' || c == ']' || c == 0)
{
current = lastPos;
char* endOfParsedNumber = nullptr;
if (! (hadDot || hadExponent))
{
auto v = std::strtoll (startPos.data(), &endOfParsedNumber, 10);
if (endOfParsedNumber == lastPos.data()
&& v != std::numeric_limits<long long>::max()
&& v != std::numeric_limits<long long>::min())
return value::createInt64 (static_cast<int64_t> (negate ? -v : v));
}
auto v = std::strtod (startPos.data(), &endOfParsedNumber);
if (endOfParsedNumber == lastPos.data())
return value::createFloat64 (negate ? -v : v);
}
throwError ("Syntax error in number", lastPos);
}
}
std::string parseString()
{
std::ostringstream s (std::ios::binary);
for (;;)
{
auto c = pop();
if (c == '"')
break;
if (c == '\\')
{
auto errorPos = current;
c = pop();
switch (c)
{
case 'a': c = '\a'; break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'u': c = parseUnicodeCharacterNumber (false); break;
case 0: throwError ("Unexpected EOF in string constant", errorPos);
default: break;
}
}
char utf8Bytes[8];
auto numBytes = text::convertUnicodeCodepointToUTF8 (utf8Bytes, c);
for (uint32_t i = 0; i < numBytes; ++i)
s << utf8Bytes[i];
}
return s.str();
}
uint32_t parseUnicodeCharacterNumber (bool isLowSurrogate)
{
uint32_t result = 0;
for (int i = 4; --i >= 0;)
{
auto errorPos = current;
auto digit = pop();
if (digit >= '0' && digit <= '9') digit -= '0';
else if (digit >= 'a' && digit <= 'f') digit = 10 + (digit - 'a');
else if (digit >= 'A' && digit <= 'F') digit = 10 + (digit - 'A');
else throwError ("Syntax error in unicode character", errorPos);
result = (result << 4) + digit;
}
if (isLowSurrogate && ! text::isUnicodeLowSurrogate (result))
throwError ("Expected a unicode low surrogate codepoint");
if (text::isUnicodeHighSurrogate (result))
{
if (! isLowSurrogate && popIf ("\\u"))
return text::createUnicodeFromHighAndLowSurrogates ({ result, parseUnicodeCharacterNumber (true) });
throwError ("Expected a unicode low surrogate codepoint");
}
return result;
}
};
Parser p { text, text };
return parseBareValue ? p.parseValue()
: p.parseTopLevel();
}
inline value::Value parse (const char* text, size_t numbytes, bool parseBareValue)
{
if (text == nullptr)
{
text = "";
numbytes = 0;
}
if (auto error = text::findInvalidUTF8Data (text, numbytes))
throwParseError ("Illegal UTF8 data", text::UTF8Pointer (text), text::UTF8Pointer (error));
return parse (text::UTF8Pointer (text), parseBareValue);
}
inline value::Value parse (std::string_view text) { return parse (text.data(), text.length(), false); }
inline value::Value parseValue (std::string_view text) { return parse (text.data(), text.length(), true); }
template <typename... Properties>
value::Value create (Properties&&... properties)
{
static_assert ((sizeof...(properties) & 1) == 0, "The arguments must be a sequence of name, value pairs");
return choc::value::createObject ({}, std::forward<Properties> (properties)...);
}
} // namespace choc::json
#endif

View file

@ -0,0 +1,600 @@
//
// ██████ ██  ██  ██████  ██████
// ██      ██  ██ ██    ██ ██       ** Classy Header-Only Classes **
// ██  ███████ ██  ██ ██
// ██  ██   ██ ██  ██ ██ https://github.com/Tracktion/choc
//  ██████ ██  ██  ██████   ██████
//
// CHOC is (C)2022 Tracktion Corporation, and is offered under the terms of the ISC license:
//
// Permission to use, copy, modify, and/or distribute this software for any purpose with or
// without fee is hereby granted, provided that the above copyright notice and this permission
// notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
// CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
// WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#ifndef CHOC_STRING_UTILS_HEADER_INCLUDED
#define CHOC_STRING_UTILS_HEADER_INCLUDED
#include <cctype>
#include <string>
#include <vector>
#include <cmath>
#include <chrono>
#include <memory>
#include <algorithm>
#include <cwctype>
#include "../platform/choc_Assert.h"
namespace choc::text
{
//==============================================================================
inline bool isWhitespace (char c) { return c == ' ' || (c <= 13 && c >= 9); }
inline bool isDigit (char c) { return static_cast<uint32_t> (c - '0') < 10; }
/// Replaces all occurrences of a one or more substrings.
/// The arguments must be a sequence of pairs of strings, where the first of each pair is the string to
/// look for, followed by its replacement.
template <typename StringType, typename... OtherReplacements>
[[nodiscard]] std::string replace (StringType textToSearch,
std::string_view firstSubstringToReplace, std::string_view firstReplacement,
OtherReplacements&&... otherPairsOfStringsToReplace);
/// Returns a string with any whitespace trimmed from its start and end.
[[nodiscard]] std::string trim (std::string textToTrim);
/// Returns a string with any whitespace trimmed from its start and end.
[[nodiscard]] std::string_view trim (std::string_view textToTrim);
/// Returns a string with any whitespace trimmed from its start and end.
[[nodiscard]] std::string_view trim (const char* textToTrim);
/// Returns a string with any whitespace trimmed from its start.
[[nodiscard]] std::string trimStart (std::string textToTrim);
/// Returns a string with any whitespace trimmed from its start.
[[nodiscard]] std::string_view trimStart (std::string_view textToTrim);
/// Returns a string with any whitespace trimmed from its start.
[[nodiscard]] std::string_view trimStart (const char* textToTrim);
/// Returns a string with any whitespace trimmed from its end.
[[nodiscard]] std::string trimEnd (std::string textToTrim);
/// Returns a string with any whitespace trimmed from its end.
[[nodiscard]] std::string_view trimEnd (std::string_view textToTrim);
/// Returns a string with any whitespace trimmed from its end.
[[nodiscard]] std::string_view trimEnd (const char* textToTrim);
/// If the string begins with one or more instances of the given character, this
/// skips past them, returning the remainder of the string.
[[nodiscard]] std::string_view trimCharacterAtStart (std::string_view textToTrim, char characterToSkip);
/// If the given character is at the start and end of the string, it trims it away.
[[nodiscard]] std::string removeOuterCharacter (std::string text, char outerChar);
[[nodiscard]] inline std::string removeDoubleQuotes (std::string text) { return removeOuterCharacter (std::move (text), '"'); }
[[nodiscard]] inline std::string removeSingleQuotes (std::string text) { return removeOuterCharacter (std::move (text), '\''); }
[[nodiscard]] inline std::string addDoubleQuotes (std::string text) { return "\"" + std::move (text) + "\""; }
[[nodiscard]] inline std::string addSingleQuotes (std::string text) { return "'" + std::move (text) + "'"; }
[[nodiscard]] std::string toLowerCase (std::string);
[[nodiscard]] std::string toUpperCase (std::string);
template <typename IsDelimiterChar>
[[nodiscard]] std::vector<std::string> splitString (std::string_view textToSplit,
IsDelimiterChar&& isDelimiterChar,
bool includeDelimitersInResult);
template <typename CharStartsDelimiter, typename CharIsInDelimiterBody>
[[nodiscard]] std::vector<std::string> splitString (std::string_view textToSplit,
CharStartsDelimiter&& isDelimiterStart,
CharIsInDelimiterBody&& isDelimiterBody,
bool includeDelimitersInResult);
[[nodiscard]] std::vector<std::string> splitString (std::string_view textToSplit,
char delimiterCharacter,
bool includeDelimitersInResult);
[[nodiscard]] std::vector<std::string> splitAtWhitespace (std::string_view text,
bool keepDelimiters = false);
/// Splits a string at newline characters, returning an array of strings.
[[nodiscard]] std::vector<std::string> splitIntoLines (std::string_view text,
bool includeNewLinesInResult);
/// Joins some kind of array of strings into a single string, adding the given separator
/// between them (but not adding it at the start or end)
template <typename ArrayOfStrings>
[[nodiscard]] std::string joinStrings (const ArrayOfStrings& strings,
std::string_view separator);
/// Returns true if this text contains the given sub-string.
bool contains (std::string_view text, std::string_view possibleSubstring);
/// Returns true if this text starts with the given character.
bool startsWith (std::string_view text, char possibleStart);
/// Returns true if this text starts with the given sub-string.
bool startsWith (std::string_view text, std::string_view possibleStart);
/// Returns true if this text ends with the given sub-string.
bool endsWith (std::string_view text, char possibleEnd);
/// Returns true if this text ends with the given sub-string.
bool endsWith (std::string_view text, std::string_view possibleEnd);
/// Calculates the Levenstein distance between two strings.
template <typename StringType>
size_t getLevenshteinDistance (const StringType& string1,
const StringType& string2);
/// Converts a hex character to a number 0-15, or -1 if it's not a valid hex digit.
int hexDigitToInt (uint32_t unicodeChar);
/// Returns a hex string for the given value.
/// If the minimum number of digits is non-zero, it will be zero-padded to fill this length;
template <typename IntegerType>
std::string createHexString (IntegerType value, int minNumDigits = 0);
/// Returns a truncated, easy-to-read version of a time as hours, seconds or milliseconds,
/// depending on its magnitude. The use-cases include things like logging or console app output.
std::string getDurationDescription (std::chrono::duration<double, std::micro>);
/// Returns an easy-to-read description of a size in bytes. Depending on the magnitude,
/// it might choose different units such as GB, MB, KB or just bytes.
std::string getByteSizeDescription (uint64_t sizeInBytes);
/// Encodes a string as a legal URI, using percent-encoding (aka URL encoding)
std::string percentEncodeURI (std::string_view text);
//==============================================================================
// _ _ _ _
// __| | ___ | |_ __ _ (_)| | ___
// / _` | / _ \| __| / _` || || |/ __|
// | (_| || __/| |_ | (_| || || |\__ \ _ _ _
// \__,_| \___| \__| \__,_||_||_||___/(_)(_)(_)
//
// Code beyond this point is implementation detail...
//
//==============================================================================
inline int hexDigitToInt (uint32_t c)
{
auto d1 = c - static_cast<uint32_t> ('0'); if (d1 < 10u) return static_cast<int> (d1);
auto d2 = d1 + static_cast<uint32_t> ('0' - 'a'); if (d2 < 6u) return static_cast<int> (d2 + 10);
auto d3 = d2 + static_cast<uint32_t> ('a' - 'A'); if (d3 < 6u) return static_cast<int> (d3 + 10);
return -1;
}
template <typename IntegerType>
std::string createHexString (IntegerType v, int minNumDigits)
{
static_assert (std::is_integral<IntegerType>::value, "Need to pass integers into this method");
auto value = static_cast<typename std::make_unsigned<IntegerType>::type> (v);
CHOC_ASSERT (minNumDigits <= 32);
char hex[40];
const auto end = hex + sizeof (hex) - 1;
auto d = end;
*d = 0;
for (;;)
{
*--d = "0123456789abcdef"[static_cast<uint32_t> (value) & 15u];
value = static_cast<decltype (value)> (value >> 4);
--minNumDigits;
if (value == 0 && minNumDigits <= 0)
return std::string (d, end);
}
}
template <typename StringType, typename... OtherReplacements>
std::string replace (StringType textToSearch, std::string_view firstToReplace, std::string_view firstReplacement,
OtherReplacements&&... otherPairsOfStringsToReplace)
{
static_assert ((sizeof... (otherPairsOfStringsToReplace) & 1u) == 0,
"This function expects a list of pairs of strings as its arguments");
if constexpr (std::is_same<const StringType, const std::string_view>::value || std::is_same<const StringType, const char* const>::value)
{
return replace (std::string (textToSearch), firstToReplace, firstReplacement,
std::forward<OtherReplacements> (otherPairsOfStringsToReplace)...);
}
else if constexpr (sizeof... (otherPairsOfStringsToReplace) == 0)
{
size_t pos = 0;
for (;;)
{
pos = textToSearch.find (firstToReplace, pos);
if (pos == std::string::npos)
return textToSearch;
textToSearch.replace (pos, firstToReplace.length(), firstReplacement);
pos += firstReplacement.length();
}
}
else
{
return replace (replace (std::move (textToSearch), firstToReplace, firstReplacement),
std::forward<OtherReplacements> (otherPairsOfStringsToReplace)...);
}
}
inline std::string trim (std::string text) { return trimStart (trimEnd (std::move (text))); }
inline std::string_view trim (std::string_view text) { return trimStart (trimEnd (std::move (text))); }
inline std::string_view trim (const char* text) { return trim (std::string_view (text)); }
inline std::string_view trimStart (const char* text) { return trimStart (std::string_view (text)); }
inline std::string_view trimEnd (const char* text) { return trimEnd (std::string_view (text)); }
inline std::string trimStart (std::string text)
{
auto i = text.begin();
if (i == text.end()) return {};
if (! isWhitespace (*i)) return text;
for (;;)
{
++i;
if (i == text.end()) return {};
if (! isWhitespace (*i)) return { i, text.end() };
}
}
inline std::string_view trimStart (std::string_view text)
{
size_t i = 0;
for (auto c : text)
{
if (! isWhitespace (c))
{
text.remove_prefix (i);
return text;
}
++i;
}
return {};
}
inline std::string trimEnd (std::string text)
{
for (auto i = text.end();;)
{
if (i == text.begin())
return {};
--i;
if (! isWhitespace (*i))
{
text.erase (i + 1, text.end());
return text;
}
}
}
inline std::string_view trimEnd (std::string_view text)
{
for (auto i = text.length(); i != 0; --i)
if (! isWhitespace (text[i - 1]))
return text.substr (0, i);
return {};
}
inline std::string_view trimCharacterAtStart (std::string_view textToTrim, char characterToSkip)
{
for (size_t i = 0; i < textToTrim.length(); ++i)
if (textToTrim[i] != characterToSkip)
return textToTrim.substr (i);
return {};
}
inline std::string removeOuterCharacter (std::string t, char outerChar)
{
if (t.length() >= 2 && t.front() == outerChar && t.back() == outerChar)
return t.substr (1, t.length() - 2);
return t;
}
inline std::string toLowerCase (std::string s)
{
std::transform (s.begin(), s.end(), s.begin(), [] (auto c) { return static_cast<char> (std::tolower (static_cast<unsigned char> (c))); });
return s;
}
inline std::string toUpperCase (std::string s)
{
std::transform (s.begin(), s.end(), s.begin(), [] (auto c) { return static_cast<char> (std::toupper (static_cast<unsigned char> (c))); });
return s;
}
template <typename CharStartsDelimiter, typename CharIsInDelimiterBody>
std::vector<std::string> splitString (std::string_view source,
CharStartsDelimiter&& isDelimiterStart,
CharIsInDelimiterBody&& isDelimiterBody,
bool keepDelimiters)
{
std::vector<std::string> tokens;
auto tokenStart = source.begin();
auto pos = tokenStart;
while (pos != source.end())
{
if (isDelimiterStart (*pos))
{
auto delimiterStart = pos++;
while (pos != source.end() && isDelimiterBody (*pos))
++pos;
if (pos != source.begin())
tokens.push_back ({ tokenStart, keepDelimiters ? pos : delimiterStart });
tokenStart = pos;
}
else
{
++pos;
}
}
if (pos != source.begin())
tokens.push_back ({ tokenStart, pos });
return tokens;
}
template <typename IsDelimiterChar>
std::vector<std::string> splitString (std::string_view source, IsDelimiterChar&& isDelimiterChar, bool keepDelimiters)
{
std::vector<std::string> tokens;
auto tokenStart = source.begin();
auto pos = tokenStart;
while (pos != source.end())
{
if (isDelimiterChar (*pos))
{
tokens.push_back ({ tokenStart, keepDelimiters ? pos + 1 : pos });
tokenStart = ++pos;
}
else
{
++pos;
}
}
if (pos != source.begin())
tokens.push_back ({ tokenStart, pos });
return tokens;
}
inline std::vector<std::string> splitString (std::string_view text, char delimiterCharacter, bool keepDelimiters)
{
return splitString (text, [=] (char c) { return c == delimiterCharacter; }, keepDelimiters);
}
inline std::vector<std::string> splitAtWhitespace (std::string_view text, bool keepDelimiters)
{
return splitString (text,
[] (char c) { return isWhitespace (c); },
[] (char c) { return isWhitespace (c); },
keepDelimiters);
}
inline std::vector<std::string> splitIntoLines (std::string_view text, bool includeNewLinesInResult)
{
return splitString (text, '\n', includeNewLinesInResult);
}
template <typename ArrayOfStrings>
inline std::string joinStrings (const ArrayOfStrings& strings, std::string_view sep)
{
if (strings.empty())
return {};
auto spaceNeeded = sep.length() * strings.size();
for (auto& s : strings)
spaceNeeded += s.length();
std::string result (strings.front());
result.reserve (spaceNeeded);
for (size_t i = 1; i < strings.size(); ++i)
{
result += sep;
result += strings[i];
}
return result;
}
inline bool contains (std::string_view t, std::string_view s) { return t.find (s) != std::string::npos; }
inline bool startsWith (std::string_view t, char s) { return ! t.empty() && t.front() == s; }
inline bool endsWith (std::string_view t, char s) { return ! t.empty() && t.back() == s; }
inline bool startsWith (std::string_view t, std::string_view s)
{
auto len = s.length();
return t.length() >= len && t.substr (0, len) == s;
}
inline bool endsWith (std::string_view t, std::string_view s)
{
auto len1 = t.length(), len2 = s.length();
return len1 >= len2 && t.substr (len1 - len2) == s;
}
inline std::string getDurationDescription (std::chrono::duration<double, std::micro> d)
{
auto microseconds = std::chrono::duration_cast<std::chrono::microseconds> (d).count();
if (microseconds < 0) return "-" + getDurationDescription (-d);
if (microseconds == 0) return "0 sec";
std::string result;
auto addLevel = [&] (int64_t size, std::string_view units, int64_t decimalScale, int64_t modulo) -> bool
{
if (microseconds < size)
return false;
if (! result.empty())
result += ' ';
auto scaled = (microseconds * decimalScale + size / 2) / size;
auto whole = scaled / decimalScale;
if (modulo != 0)
whole = whole % modulo;
result += std::to_string (whole);
if (auto fraction = scaled % decimalScale)
{
result += '.';
result += static_cast<char> ('0' + (fraction / 10));
if (fraction % 10 != 0)
result += static_cast<char> ('0' + (fraction % 10));
}
result += (whole == 1 && units.length() > 3 && units.back() == 's') ? units.substr (0, units.length() - 1) : units;
return true;
};
bool hours = addLevel (60000000ll * 60ll, " hours", 1, 0);
bool mins = addLevel (60000000ll, " min", 1, hours ? 60 : 0);
if (hours)
return result;
if (mins)
{
addLevel (1000000, " sec", 1, 60);
}
else
{
if (! addLevel (1000000, " sec", 100, 0))
if (! addLevel (1000, " ms", 100, 0))
addLevel (1, " microseconds", 100, 0);
}
return result;
}
template <typename StringType>
size_t getLevenshteinDistance (const StringType& string1, const StringType& string2)
{
if (string1.empty()) return string2.length();
if (string2.empty()) return string1.length();
auto calculate = [] (size_t* costs, size_t numCosts, const StringType& s1, const StringType& s2) -> size_t
{
for (size_t i = 0; i < numCosts; ++i)
costs[i] = i;
size_t p1 = 0;
for (auto c1 : s1)
{
auto corner = p1;
*costs = p1 + 1;
size_t p2 = 0;
for (auto c2 : s2)
{
auto upper = costs[p2 + 1];
costs[p2 + 1] = c1 == c2 ? corner : (std::min (costs[p2], std::min (upper, corner)) + 1);
++p2;
corner = upper;
}
++p1;
}
return costs[numCosts - 1];
};
auto sizeNeeded = string2.length() + 1;
constexpr size_t maxStackSize = 96;
if (sizeNeeded <= maxStackSize)
{
size_t costs[maxStackSize];
return calculate (costs, sizeNeeded, string1, string2);
}
std::unique_ptr<size_t[]> costs (new size_t[sizeNeeded]);
return calculate (costs.get(), sizeNeeded, string1, string2);
}
inline std::string getByteSizeDescription (uint64_t size)
{
auto intToStringWith1DecPlace = [] (uint64_t n, uint64_t divisor) -> std::string
{
auto scaled = (n * 10 + divisor / 2) / divisor;
auto result = std::to_string (scaled / 10);
if (auto fraction = scaled % 10)
{
result += '.';
result += static_cast<char> ('0' + fraction);
}
return result;
};
static constexpr uint64_t maxValue = std::numeric_limits<uint64_t>::max() / 10;
if (size >= 0x40000000) return intToStringWith1DecPlace (std::min (maxValue, size), 0x40000000) + " GB";
if (size >= 0x100000) return intToStringWith1DecPlace (size, 0x100000) + " MB";
if (size >= 0x400) return intToStringWith1DecPlace (size, 0x400) + " KB";
if (size != 1) return std::to_string (size) + " bytes";
return "1 byte";
}
inline std::string percentEncodeURI (std::string_view text)
{
std::string result;
result.reserve (text.length());
for (auto c : text)
{
if (std::string_view ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-.~").find (c) != std::string_view::npos)
{
result += c;
}
else
{
result += '%';
result += "0123456789abcdef"[static_cast<uint8_t> (c) >> 4];
result += "0123456789abcdef"[static_cast<uint8_t> (c) & 15u];
}
}
return result;
}
} // namespace choc::text
#endif

View file

@ -0,0 +1,655 @@
//
// ██████ ██  ██  ██████  ██████
// ██      ██  ██ ██    ██ ██       ** Classy Header-Only Classes **
// ██  ███████ ██  ██ ██
// ██  ██   ██ ██  ██ ██ https://github.com/Tracktion/choc
//  ██████ ██  ██  ██████   ██████
//
// CHOC is (C)2022 Tracktion Corporation, and is offered under the terms of the ISC license:
//
// Permission to use, copy, modify, and/or distribute this software for any purpose with or
// without fee is hereby granted, provided that the above copyright notice and this permission
// notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
// CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
// WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#ifndef CHOC_UTF8_HEADER_INCLUDED
#define CHOC_UTF8_HEADER_INCLUDED
#include <cstddef>
#include "choc_StringUtilities.h"
namespace choc::text
{
/// An integer type to represent a unicode code-point.
using UnicodeChar = uint32_t;
//==============================================================================
/** A non-owning pointer which can iterate over a chunk of null-terminated UTF-8 text
and read it as wide unicode characters.
*/
struct UTF8Pointer
{
explicit constexpr UTF8Pointer (const char* utf8Text) noexcept : text (utf8Text) {}
UTF8Pointer() = default;
UTF8Pointer (const UTF8Pointer&) = default;
UTF8Pointer& operator= (const UTF8Pointer&) = default;
/// Returns the raw data that this points to.
const char* data() const noexcept { return text; }
/// Returns true if the pointer is not null.
operator bool() const noexcept { return text != nullptr; }
/// Returns true if the pointer is either null or points to a null terminator char.
bool empty() const { return text == nullptr || *text == 0; }
/// Returns the length by iterating all unicode chars and counting them.
/// Note that this is slow, and is not a count of the number of bytes in the string!
size_t length() const;
//==============================================================================
/// Returns the first unicode character in the string.
UnicodeChar operator*() const;
/// Skips past the first unicode character.
/// Moving beyond the end of the string is undefined behaviour and will trigger an assertion.
UTF8Pointer& operator++();
/// Skips past the first unicode character.
/// Moving beyond the end of the string is undefined behaviour and will trigger an assertion.
UTF8Pointer operator++ (int);
/// Moves backwards to the previous unicode character.
/// Moving beyond the end of the string is undefined behaviour.
UTF8Pointer operator--();
/// Skips past the given number of unicode characters.
/// Moving beyond the end of the string is undefined behaviour and will trigger an assertion.
UTF8Pointer& operator+= (size_t numCharsToSkip);
/// Returns a pointer which points to the n-th unicode character in the text
/// Reading beyond the end of the string is undefined behaviour and may trigger an assertion.
UTF8Pointer operator+ (size_t numCharsToSkip) const;
/// Returns a pointer which points to the n-th unicode character in the text.
/// Reading beyond the end of the string is undefined behaviour and may trigger an assertion.
UTF8Pointer operator+ (int numCharsToSkip) const;
/// Skips past the first unicode character and returns it as a code-point.
/// Calling this when the current character is the terminator will leave the pointer in an
/// invalid state.
UnicodeChar popFirstChar();
/// Finds the next occurrence of the given string, or return a nullptr if not found.
UTF8Pointer find (const char* textToFind) const;
/// Returns true if the text starts with this string
bool startsWith (const char* textToMatch) const;
/// If the first character matches the given one, this will advance the pointer and return true.
bool skipIfStartsWith (char charToMatch);
/// If the start of the text matches the given string, this will advance this pointer to skip
/// past it, and return true. If not, it will return false without modifying this pointer.
bool skipIfStartsWith (const char* textToMatch);
/// Returns a pointer to the first non-whitespace character in the given string (which may
/// be the terminating null character if it's all whitespace).
[[nodiscard]] UTF8Pointer findEndOfWhitespace() const;
/// Iterates backwards from this position to find the first character that follows
/// a new-line. The pointer provided marks the furthest back that the function should search
[[nodiscard]] UTF8Pointer findStartOfLine (UTF8Pointer startOfValidText) const;
/// Searches forwards for the next character that is followed by a new-line or a null-terminator.
[[nodiscard]] UTF8Pointer findEndOfLine() const;
//==============================================================================
struct EndIterator {};
struct Iterator
{
explicit constexpr Iterator (const char* t) : text (t) {}
Iterator (const Iterator&) = default;
Iterator& operator= (const Iterator&) = default;
UnicodeChar operator*() const { return *UTF8Pointer (text); }
Iterator& operator++() { UTF8Pointer p (text); ++p; text = p.text; return *this; }
Iterator operator++ (int) { auto old = *this; ++*this; return old; }
bool operator== (EndIterator) const { return *text == 0; }
bool operator!= (EndIterator) const { return *text != 0; }
private:
const char* text;
};
Iterator begin() const;
EndIterator end() const;
//==============================================================================
/// This does a pointer comparison, NOT a comparison of the text itself!
bool operator== (UTF8Pointer other) const noexcept { return text == other.text; }
/// This does a pointer comparison, NOT a comparison of the text itself!
bool operator!= (UTF8Pointer other) const noexcept { return text != other.text; }
/// This does a pointer comparison, NOT a comparison of the text itself!
bool operator< (UTF8Pointer other) const noexcept { return text < other.text; }
/// This does a pointer comparison, NOT a comparison of the text itself!
bool operator> (UTF8Pointer other) const noexcept { return text > other.text; }
/// This does a pointer comparison, NOT a comparison of the text itself!
bool operator<= (UTF8Pointer other) const noexcept { return text <= other.text; }
/// This does a pointer comparison, NOT a comparison of the text itself!
bool operator>= (UTF8Pointer other) const noexcept { return text >= other.text; }
bool operator== (decltype(nullptr)) const noexcept { return text == nullptr; }
bool operator!= (decltype(nullptr)) const noexcept { return text != nullptr; }
private:
const char* text = nullptr;
};
//==============================================================================
/// Checks a given chunk of data to see whether it's valid UTF-8.
/// If no errors are found, this returns nullptr. If an error is found, it returns the address
/// of the offending byte. Note that zero bytes in the data are considered to be valid UTF-8.
const char* findInvalidUTF8Data (const void* dataToCheck, size_t numBytesToRead);
/// Writes the bytes for a unicode character, and returns the number of bytes that were needed.
/// The buffer passed in needs to have at least 4 bytes capacity.
uint32_t convertUnicodeCodepointToUTF8 (char* dest, UnicodeChar codepoint);
/// Appends a unicode codepoint to a std::string as a sequence of UTF-8 bytes.
void appendUTF8 (std::string& target, UnicodeChar codepoint);
/// Checks whether a given codepoint is a high-surrogate
bool isUnicodeHighSurrogate (UnicodeChar codepoint);
/// Checks whether a given codepoint is a low-surrogate
bool isUnicodeLowSurrogate (UnicodeChar codepoint);
struct SurrogatePair
{
UnicodeChar high = 0, low = 0;
};
/// For a codepoint >= 0x10000, this will return a surrogate pair to represent it.
SurrogatePair splitCodePointIntoSurrogatePair (UnicodeChar fullCodePoint);
/// Combines a high and low surrogate into a single codepoint.
UnicodeChar createUnicodeFromHighAndLowSurrogates (SurrogatePair);
/// Checks a UTF-8/CESU-8 string to see if it contains any surrogate pairs.
/// If it does, then to use it as UTF-8 you'll probably need to run it through
/// convertSurrogatePairsToUTF8().
bool containsSurrogatePairs (UTF8Pointer);
/// Returns a string where any surrogate pairs have been converted to UTF-8 codepoints.
std::string convertSurrogatePairsToUTF8 (UTF8Pointer);
/// Returns true if the given UTF-8 string can be used as CESU-8 without conversion. If not,
/// you'll need to run it through convertUTF8ToCESU8() to convert the 32-bit code-points
/// to surrogate pairs.
bool isValidCESU8 (std::string_view utf8);
/// Converts any 32-bit characters in this UTF-8 string to surrogate pairs, which makes
/// the resulting string suitable for use at CESU-8.
[[nodiscard]] std::string convertUTF8ToCESU8 (UTF8Pointer);
//==============================================================================
/// Represents a line and column index within a block of text.
struct LineAndColumn
{
/// Valid line and column values start at 1.
/// If either is 0, it means that the LineAndColumn object is uninitialised.
size_t line = 0, column = 0;
/// Returns true if neither the line nor column is zero.
bool isValid() const noexcept { return line != 0 && column != 0; }
/// Turns this location into a [line]:[col] string suitable for use in a
/// standard compiler error message format.
std::string toString() const;
};
/// Given a block of text and a position within it, this will work out the
/// line and column of that position.
LineAndColumn findLineAndColumn (UTF8Pointer fullText,
UTF8Pointer targetPosition);
//==============================================================================
// _ _ _ _
// __| | ___ | |_ __ _ (_)| | ___
// / _` | / _ \| __| / _` || || |/ __|
// | (_| || __/| |_ | (_| || || |\__ \ _ _ _
// \__,_| \___| \__| \__,_||_||_||___/(_)(_)(_)
//
// Code beyond this point is implementation detail...
//
//==============================================================================
inline size_t UTF8Pointer::length() const
{
size_t count = 0;
if (text != nullptr)
for (auto p = *this; *p.text != 0; ++p)
++count;
return count;
}
inline const char* findInvalidUTF8Data (const void* dataToCheck, size_t numBytes)
{
CHOC_ASSERT (dataToCheck != nullptr);
auto source = static_cast<const char*> (dataToCheck);
const auto end = source + numBytes;
for (;;)
{
if (source >= end)
return nullptr;
auto byte = static_cast<signed char> (*source);
if (byte >= 0)
{
++source;
continue;
}
int testBit = 0x40, numExtraBytes = 0;
while ((byte & testBit) != 0)
{
testBit >>= 1;
++numExtraBytes;
if (numExtraBytes > 3
|| source + static_cast<size_t> (numExtraBytes) >= end
|| (numExtraBytes == 3 && *UTF8Pointer (source) > 0x10ffff))
{
return source;
}
}
if (numExtraBytes == 0)
return source;
++source;
for (int i = 0; i < numExtraBytes; ++i)
{
if ((*source & 0xc0) != 0x80)
return source;
++source;
}
}
}
inline UnicodeChar UTF8Pointer::operator*() const
{
return UTF8Pointer (*this).popFirstChar();
}
inline UTF8Pointer& UTF8Pointer::operator++()
{
CHOC_ASSERT (! empty()); // can't advance past the zero-terminator
auto firstByte = static_cast<signed char> (*text++);
if (firstByte >= 0)
return *this;
uint32_t testBit = 0x40, unicodeChar = static_cast<unsigned char> (firstByte);
while ((unicodeChar & testBit) != 0 && testBit > 8)
{
++text;
testBit >>= 1;
}
return *this;
}
inline UTF8Pointer UTF8Pointer::operator++ (int)
{
auto prev = *this;
operator++();
return prev;
}
inline UTF8Pointer UTF8Pointer::operator--()
{
CHOC_ASSERT (text != nullptr); // mustn't use this on nullptrs
uint32_t bytesSkipped = 0;
while ((*--text & 0xc0) == 0x80)
{
if (bytesSkipped > 2)
{
CHOC_ASSERT (bytesSkipped <= 2);
break;
}
++bytesSkipped;
}
return *this;
}
inline UTF8Pointer& UTF8Pointer::operator+= (size_t numCharsToSkip)
{
while (numCharsToSkip != 0)
{
--numCharsToSkip;
operator++();
}
return *this;
}
inline UTF8Pointer UTF8Pointer::operator+ (size_t numCharsToSkip) const
{
auto p = *this;
p += numCharsToSkip;
return p;
}
inline UTF8Pointer UTF8Pointer::operator+ (int numCharsToSkip) const
{
CHOC_ASSERT (numCharsToSkip >= 0);
return operator+ (static_cast<size_t> (numCharsToSkip));
}
inline UnicodeChar UTF8Pointer::popFirstChar()
{
CHOC_ASSERT (text != nullptr); // mustn't use this on nullptrs
auto firstByte = static_cast<signed char> (*text++);
UnicodeChar unicodeChar = static_cast<unsigned char> (firstByte);
if (firstByte < 0)
{
uint32_t bitMask = 0x7f, numExtraBytes = 0;
for (uint32_t testBit = 0x40; (unicodeChar & testBit) != 0 && testBit > 8; ++numExtraBytes)
{
bitMask >>= 1;
testBit >>= 1;
}
unicodeChar &= bitMask;
for (uint32_t i = 0; i < numExtraBytes; ++i)
{
uint32_t nextByte = static_cast<unsigned char> (*text);
CHOC_ASSERT ((nextByte & 0xc0) == 0x80); // error in the data - you should always make sure the source
// gets validated before iterating a UTF8Pointer over it
unicodeChar = (unicodeChar << 6) | (nextByte & 0x3f);
++text;
}
}
return unicodeChar;
}
inline bool UTF8Pointer::startsWith (const char* textToMatch) const
{
CHOC_ASSERT (textToMatch != nullptr);
if (auto p = text)
{
while (*textToMatch != 0)
if (*textToMatch++ != *p++)
return false;
return true;
}
return false;
}
inline UTF8Pointer UTF8Pointer::find (const char* textToFind) const
{
CHOC_ASSERT (textToFind != nullptr);
for (auto t = *this;; ++t)
if (t.startsWith (textToFind) || t.empty())
return t;
}
inline bool UTF8Pointer::skipIfStartsWith (char charToMatch)
{
if (text != nullptr && *text == charToMatch && charToMatch != 0)
{
++text;
return true;
}
return false;
}
inline bool UTF8Pointer::skipIfStartsWith (const char* textToMatch)
{
CHOC_ASSERT (textToMatch != nullptr);
if (auto p = text)
{
while (*textToMatch != 0)
if (*textToMatch++ != *p++)
return false;
text = p;
return true;
}
return false;
}
inline UTF8Pointer UTF8Pointer::findEndOfWhitespace() const
{
auto p = *this;
if (p.text != nullptr)
while (choc::text::isWhitespace (*p.text))
++p;
return p;
}
inline UTF8Pointer UTF8Pointer::findStartOfLine (UTF8Pointer start) const
{
if (text == nullptr)
return {};
auto l = *this;
CHOC_ASSERT (l.text >= start.text && start.text != nullptr);
while (l.text > start.text)
{
auto prev = l;
auto c = *--prev;
if (c == '\r' || c == '\n')
break;
l = prev;
}
return l;
}
inline UTF8Pointer UTF8Pointer::findEndOfLine() const
{
if (text == nullptr)
return {};
auto l = *this;
while (! l.empty())
{
auto c = l.popFirstChar();
if (c == '\r' || c == '\n')
break;
}
return l;
}
inline UTF8Pointer::Iterator UTF8Pointer::begin() const { CHOC_ASSERT (text != nullptr); return Iterator (text); }
inline UTF8Pointer::EndIterator UTF8Pointer::end() const { return EndIterator(); }
inline LineAndColumn findLineAndColumn (UTF8Pointer start, UTF8Pointer targetPosition)
{
if (start == nullptr || targetPosition == nullptr)
return {};
CHOC_ASSERT (start <= targetPosition);
LineAndColumn lc { 1, 1 };
while (start < targetPosition && ! start.empty())
{
++lc.column;
if (*start++ == '\n') { lc.line++; lc.column = 1; }
}
return lc;
}
inline std::string LineAndColumn::toString() const { return std::to_string (line) + ':' + std::to_string (column); }
//==============================================================================
inline uint32_t convertUnicodeCodepointToUTF8 (char* dest, UnicodeChar unicodeChar)
{
if (unicodeChar < 0x80)
{
*dest = static_cast<char> (unicodeChar);
return 1;
}
uint32_t extraBytes = 1;
if (unicodeChar >= 0x800)
{
++extraBytes;
if (unicodeChar >= 0x10000)
++extraBytes;
}
dest[0] = static_cast<char> ((0xffu << (7 - extraBytes)) | (unicodeChar >> (extraBytes * 6)));
for (uint32_t i = 1; i <= extraBytes; ++i)
dest[i] = static_cast<char> (0x80u | (0x3fu & (unicodeChar >> ((extraBytes - i) * 6))));
return extraBytes + 1;
}
inline void appendUTF8 (std::string& target, UnicodeChar unicodeChar)
{
char bytes[4];
auto num = convertUnicodeCodepointToUTF8 (bytes, unicodeChar);
target.append (bytes, num);
}
inline bool isUnicodeHighSurrogate (UnicodeChar codepoint) { return codepoint >= 0xd800 && codepoint <= 0xdbff; }
inline bool isUnicodeLowSurrogate (UnicodeChar codepoint) { return codepoint >= 0xdc00 && codepoint <= 0xdfff; }
inline UnicodeChar createUnicodeFromHighAndLowSurrogates (SurrogatePair pair)
{
if (! isUnicodeHighSurrogate (pair.high)) return pair.high;
if (! isUnicodeLowSurrogate (pair.low)) return 0;
return (pair.high << 10) + pair.low - 0x35fdc00u;
}
inline bool containsSurrogatePairs (UTF8Pointer text)
{
for (;;)
{
auto c = text.popFirstChar();
if (c == 0)
return false;
if (isUnicodeHighSurrogate (c))
return true;
}
}
inline std::string convertSurrogatePairsToUTF8 (UTF8Pointer text)
{
std::string result;
for (;;)
{
auto c = text.popFirstChar();
if (choc::text::isUnicodeHighSurrogate (c))
c = createUnicodeFromHighAndLowSurrogates ({ c, text.popFirstChar() });
if (c == 0)
return result;
appendUTF8 (result, c);
}
}
inline SurrogatePair splitCodePointIntoSurrogatePair (UnicodeChar fullCodePoint)
{
CHOC_ASSERT (fullCodePoint >= 0x10000);
return { static_cast<UnicodeChar> (0xd800u + ((fullCodePoint - 0x10000u) >> 10)),
static_cast<UnicodeChar> (0xdc00u + (fullCodePoint & 0x3ffu)) };
}
inline bool isValidCESU8 (std::string_view utf8)
{
for (auto c : utf8)
if (static_cast<uint8_t> (c) >= 0xe8)
return false;
return true;
}
inline std::string convertUTF8ToCESU8 (UTF8Pointer utf8)
{
std::string result;
for (;;)
{
auto c = utf8.popFirstChar();
if (c == 0)
return result;
if (c < 128)
{
result += (char) c;
}
else if (c >= 0x10000)
{
auto pair = splitCodePointIntoSurrogatePair (c);
appendUTF8 (result, pair.high);
appendUTF8 (result, pair.low);
}
else
{
appendUTF8 (result, c);
}
}
}
} // namespace choc::text
#endif