Javascript: Move javascript implementation into a separate module

2026-01-27 02:20:05 +00:00 · 2024-11-05 13:50:57 +00:00 · 2024-11-05 13:50:57 +00:00 · df6f3f8e28
commit df6f3f8e28
parent 637226addc
69 changed files with 941 additions and 1351 deletions
--- a/modules/juce_javascript/choc/text/choc_FloatToString.h
+++ b/modules/juce_javascript/choc/text/choc_FloatToString.h
@ -0,0 +1,397 @@
+//
+//    ██████ ██   ██  ██████   ██████
+//   ██      ██   ██ ██    ██ ██            ** Classy Header-Only Classes **
+//   ██      ███████ ██    ██ ██
+//   ██      ██   ██ ██    ██ ██           https://github.com/Tracktion/choc
+//    ██████ ██   ██  ██████   ██████
+//
+//   CHOC is (C)2022 Tracktion Corporation, and is offered under the terms of the ISC license:
+//
+//   Permission to use, copy, modify, and/or distribute this software for any purpose with or
+//   without fee is hereby granted, provided that the above copyright notice and this permission
+//   notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+//   WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+//   AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
+//   CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+//   WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+//   CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+#ifndef CHOC_FLOAT_TO_STRING_HEADER_INCLUDED
+#define CHOC_FLOAT_TO_STRING_HEADER_INCLUDED
+
+#include <cstring>
+#include <string>
+#include "../math/choc_MathHelpers.h"
+
+namespace choc::text
+{
+
+//==============================================================================
+/** Converts a 32-bit float to an accurate, round-trip-safe string.
+
+    The algorithm used is "Grisu3" from the paper "Printing Floating-Point Numbers
+    Quickly and Accurately with Integers" by Florian Loitsch.
+*/
+std::string floatToString (float value);
+
+/** Converts a 64-bit double to an accurate, round-trip-safe string.
+
+    The algorithm used is "Grisu3" from the paper "Printing Floating-Point Numbers
+    Quickly and Accurately with Integers" by Florian Loitsch.
+*/
+std::string floatToString (double value);
+
+//==============================================================================
+/** Converts a 32-bit float to an accurate, round-trip-safe string.
+    If maxDecimalPlaces is -1, a default is used.
+    If omitDecimalPointForRoundNumbers is true, then values such as "2.0" are returned
+    without the decimal point, e.g. simply "2".
+
+    The algorithm used is "Grisu3" from the paper "Printing Floating-Point Numbers
+    Quickly and Accurately with Integers" by Florian Loitsch.
+*/
+std::string floatToString (float value, int maxDecimalPlaces, bool omitDecimalPointForRoundNumbers = false);
+
+/** Converts a 64-bit double to an accurate, round-trip-safe string.
+    If maxDecimalPlaces is -1, a default is used.
+    If omitDecimalPointForRoundNumbers is true, then values such as "2.0" are returned
+    without the decimal point, e.g. simply "2".
+
+    The algorithm used is "Grisu3" from the paper "Printing Floating-Point Numbers
+    Quickly and Accurately with Integers" by Florian Loitsch.
+*/
+std::string floatToString (double value, int maxDecimalPlaces, bool omitDecimalPointForRoundNumbers = false);
+
+
+//==============================================================================
+/** Helper class containing its own buffer for converting a float or double to a string.
+
+    The algorithm is "Grisu3" from the paper "Printing Floating-Point Numbers
+    Quickly and Accurately with Integers" by Florian Loitsch.
+
+    To use, just construct a FloatToStringBuffer with the value, and use its begin()/end()
+    methods to iterate the result. Or use the floatToString() functions to just convert a
+    value directly to a std::string.
+*/
+template <typename FloatOrDouble>
+struct FloatToStringBuffer
+{
+    FloatToStringBuffer (FloatOrDouble value, int maxDecimalPlaces, bool omitPointIfPossible)
+       : stringEnd (writeAndGetEnd (storage, value, maxDecimalPlaces, omitPointIfPossible)) {}
+
+    const char* begin() const       { return storage; }
+    const char* end() const         { return stringEnd; }
+
+    std::string toString() const    { return std::string (begin(), end()); }
+
+private:
+    //==============================================================================
+    static_assert (std::is_same<const float, const FloatOrDouble>::value || std::is_same<const double, const FloatOrDouble>::value,
+                   "This class can only handle float or double template types");
+
+    char storage[32];
+    const char* stringEnd;
+
+    struct MantissaAndExponent
+    {
+        uint64_t mantissa;
+        int32_t exponent;
+
+        static constexpr MantissaAndExponent create (uint64_t floatBits, uint64_t significand)
+        {
+            constexpr int exponentBias = (sizeof (FloatOrDouble) == 8 ? 0x3ff : 0x7f) + numSignificandBits;
+            auto explonentPlusBias = static_cast<int> ((floatBits & exponentMask) >> numSignificandBits);
+
+            return explonentPlusBias == 0 ? MantissaAndExponent { significand, 1 - exponentBias }
+                                          : MantissaAndExponent { significand + hiddenBit, explonentPlusBias - exponentBias };
+        }
+
+        constexpr MantissaAndExponent operator* (MantissaAndExponent rhs) const
+        {
+            auto mantissaProduct = math::multiply128 (mantissa, rhs.mantissa);
+            return { mantissaProduct.high + (mantissaProduct.low >> 63), exponent + rhs.exponent + 64 };
+        }
+
+        constexpr MantissaAndExponent shiftedUp (int numBits) const   { return { mantissa << numBits, exponent - numBits }; }
+        constexpr MantissaAndExponent normalized() const              { return shiftedUp (static_cast<int> (math::countUpperClearBits (mantissa))); }
+    };
+
+    static uint32_t generateDigits (char* buffer, MantissaAndExponent upperBound, uint64_t mantissaDiff, uint64_t delta, int& K)
+    {
+        uint32_t length = 0;
+        const auto one = MantissaAndExponent { 1ull << -upperBound.exponent, upperBound.exponent };
+        auto p1 = static_cast<uint32_t> (upperBound.mantissa >> -one.exponent);
+        auto p2 = upperBound.mantissa & (one.mantissa - 1);
+        auto numDigits = math::getNumDecimalDigits (p1);
+
+        for (;;)
+        {
+            auto digit = p1;
+
+            switch (--numDigits)
+            {
+                case 0:                           p1 = 0;              break;
+                case 1:  digit /= powersOf10[1];  p1 %= powersOf10[1]; break;
+                case 2:  digit /= powersOf10[2];  p1 %= powersOf10[2]; break;
+                case 3:  digit /= powersOf10[3];  p1 %= powersOf10[3]; break;
+                case 4:  digit /= powersOf10[4];  p1 %= powersOf10[4]; break;
+                case 5:  digit /= powersOf10[5];  p1 %= powersOf10[5]; break;
+                case 6:  digit /= powersOf10[6];  p1 %= powersOf10[6]; break;
+                case 7:  digit /= powersOf10[7];  p1 %= powersOf10[7]; break;
+                case 8:  digit /= powersOf10[8];  p1 %= powersOf10[8]; break;
+                default: break;
+            }
+
+            writeDigitIfNotLeadingZero (buffer, length, digit);
+            auto rest = p2 + (static_cast<uint64_t> (p1) << -one.exponent);
+
+            if (rest <= delta)
+            {
+                K += numDigits;
+                roundFinalDigit (buffer, length, delta, rest, static_cast<uint64_t> (powersOf10[numDigits]) << -one.exponent, mantissaDiff);
+                return length;
+            }
+
+            if (numDigits == 0)
+            {
+                for (;;)
+                {
+                    delta *= 10;
+                    p2 *= 10;
+                    --numDigits;
+                    writeDigitIfNotLeadingZero (buffer, length, static_cast<uint32_t> (p2 >> -one.exponent));
+                    p2 &= one.mantissa - 1;
+
+                    if (p2 < delta)
+                    {
+                        K += numDigits;
+                        roundFinalDigit (buffer, length, delta, p2, one.mantissa, numDigits > -9 ? mantissaDiff * powersOf10[-numDigits] : 0);
+                        return length;
+                    }
+                }
+            }
+        }
+    }
+
+    static void roundFinalDigit (char* buffer, uint32_t length, uint64_t delta, uint64_t rest, uint64_t tenToPowerNumDigits, uint64_t diff)
+    {
+        while (rest < diff && delta - rest >= tenToPowerNumDigits
+                && (rest + tenToPowerNumDigits < diff || diff - rest > rest + tenToPowerNumDigits - diff))
+        {
+            --(buffer[length - 1]);
+            rest += tenToPowerNumDigits;
+        }
+    }
+
+    [[nodiscard]] static char* write (char* dest, char c)                                     { *dest = c; return dest + 1; }
+    template <typename... Chars> static char* write (char* dest, char first, Chars... others) { return write (write (dest, first), others...); }
+    [[nodiscard]] static char* writeDigit (char* dest, int digit)                             { return write (dest, static_cast<char> (digit + '0')); }
+    template <typename... Chars> static char* writeDigit (char* dest, int d, Chars... others) { return writeDigit (writeDigit (dest, d), others...); }
+    [[nodiscard]] static char* writeZero (char* dest)                                         { return write (dest, '0', '.', '0'); }
+    [[nodiscard]] static char* writeExponent (char* dest, int e)                              { return writeShortInteger (write (dest, 'e'), e); }
+    static void writeDigitIfNotLeadingZero (char* dest, uint32_t& length, uint32_t digit)     { if (digit != 0 || length != 0) dest[length++] = static_cast<char> (digit + '0'); }
+
+    [[nodiscard]] static char* writeShortInteger (char* dest, int n)
+    {
+        if (n < 0)    return writeShortInteger (write (dest, '-'), -n);
+        if (n >= 100) return writeDigit (dest, n / 100, (n / 10) % 10, n % 10);
+        if (n >= 10)  return writeDigit (dest, n / 10,  n % 10);
+
+        return writeDigit (dest, n);
+    }
+
+    static void insertChar (char* dest, uint32_t length, char charToInsert, uint32_t numRepetitions)
+    {
+        std::memmove (dest + numRepetitions, dest, (size_t) length);
+
+        for (uint32_t i = 0; i < numRepetitions; ++i)
+            dest[i] = charToInsert;
+    }
+
+    static char* writeAsExponentNotation (char* dest, uint32_t totalLength, int exponent)
+    {
+        if (totalLength == 1)
+            return writeExponent (dest + 1, exponent);
+
+        insertChar (dest + 1, totalLength - 1, '.', 1);
+
+        while (dest[totalLength] == '0' && totalLength > 2)
+            --totalLength;
+
+        return writeExponent (dest + (totalLength + 1), exponent);
+    }
+
+    static char* writeWithoutExponentLessThan1 (char* dest, uint32_t length, int mantissaDigits, int maxDecimalPlaces)
+    {
+        auto numPaddingZeros = static_cast<uint32_t> (2 - mantissaDigits);
+        insertChar (dest, length, '0', numPaddingZeros);
+        dest[1] = '.';
+
+        if (static_cast<int> (length) > maxDecimalPlaces + mantissaDigits)
+        {
+            for (int i = maxDecimalPlaces + 1; i > 2; --i)
+                if (dest[i] != '0')
+                    return dest + (i + 1);
+
+            return dest + 3;
+        }
+
+        length += numPaddingZeros;
+
+        while (dest[length - 1] == '0' && length > 3)
+            --length;
+
+        return dest + length;
+    }
+
+    static char* writeWithoutExponentGreaterThan1 (char* dest, uint32_t totalLength, uint32_t mantissaLength, int maxDecimalPlaces, int K)
+    {
+        if (K >= 0)
+        {
+            dest += totalLength;
+
+            for (auto i = totalLength; i < mantissaLength; ++i)
+                dest = write (dest, '0');
+
+            return write (dest, '.', '0');
+        }
+
+        insertChar (dest + mantissaLength, totalLength - mantissaLength, '.', 1);
+
+        if (K + maxDecimalPlaces >= 0)
+            return dest + (totalLength + 1);
+
+        for (auto i = static_cast<int> (mantissaLength) + maxDecimalPlaces; i > static_cast<int> (mantissaLength + 1); --i)
+            if (dest[i] != '0')
+                return dest + (i + 1);
+
+        return dest + (mantissaLength + 2);
+    }
+
+    struct Limits
+    {
+        constexpr Limits (MantissaAndExponent value)
+        {
+            upper = { (value.mantissa << 1) + 1, value.exponent - 1 };
+
+            while ((upper.mantissa & (hiddenBit << 1)) == 0)
+                upper = upper.shiftedUp (1);
+
+            upper = upper.shiftedUp (static_cast<int> (sizeof (upper.mantissa) * 8 - numSignificandBits - 2));
+
+            lower = value.mantissa == hiddenBit ? MantissaAndExponent { (value.mantissa << 2) - 1, value.exponent - 2 }
+                                                : MantissaAndExponent { (value.mantissa << 1) - 1, value.exponent - 1 };
+            lower.mantissa <<= lower.exponent - upper.exponent;
+            lower.exponent = upper.exponent;
+        }
+
+        MantissaAndExponent lower, upper;
+    };
+
+    static const char* writeAndGetEnd (char* pos, FloatOrDouble value, int maxDecimalPlaces, bool omitPointIfPossible)
+    {
+        auto startPos = pos;
+        auto floatBits = getFloatBits (value);
+
+        if ((floatBits & signMask) == 0)
+        {
+            if (isZero (floatBits))  return writeZero (pos);
+        }
+        else
+        {
+            pos = write (pos, '-');
+
+            if (isZero (floatBits))  return writeZero (pos);
+
+            value = -value;
+            floatBits &= ~signMask;
+        }
+
+        if (floatBits == nanBits)  return write (pos, 'n', 'a', 'n');
+        if (floatBits == infBits)  return write (pos, 'i', 'n', 'f');
+
+        auto v = MantissaAndExponent::create (floatBits, floatBits & significandMask);
+        Limits limits (v);
+
+        int K;
+        auto powerOf10 = createPowerOf10 (limits.upper.exponent, K);
+        auto w = powerOf10 * v.normalized();
+        auto upperBound = powerOf10 * limits.upper;
+        upperBound.mantissa--;
+        auto lowerBound = powerOf10 * limits.lower;
+        lowerBound.mantissa++;
+
+        auto totalLength = generateDigits (pos, upperBound, upperBound.mantissa - w.mantissa, upperBound.mantissa - lowerBound.mantissa, K);
+        auto end = addDecimalPointAndExponent (pos, totalLength, K, maxDecimalPlaces < 0 ? defaultNumDecimalPlaces : maxDecimalPlaces);
+
+        if (omitPointIfPossible && end > startPos + 1 && end[-1] == '0' && end[-2] == '.')
+            end -= 2;
+
+        return end;
+    }
+
+    static const char* addDecimalPointAndExponent (char* pos, uint32_t totalLength, int K, int maxDecimalPlaces)
+    {
+        auto mantissaDigits = static_cast<int> (totalLength) + K;
+
+        if (mantissaDigits < -maxDecimalPlaces)          return writeZero (pos);
+        if (mantissaDigits <= 0 && mantissaDigits > -6)  return writeWithoutExponentLessThan1 (pos, totalLength, mantissaDigits, maxDecimalPlaces);
+        if (mantissaDigits > 0 && mantissaDigits <= 21)  return writeWithoutExponentGreaterThan1 (pos, totalLength, static_cast<uint32_t> (mantissaDigits), maxDecimalPlaces, K);
+
+        return writeAsExponentNotation (pos, totalLength, mantissaDigits - 1);
+    }
+
+    static uint64_t getFloatBits (double value)    { uint64_t i; memcpy (&i, &value, sizeof (i)); return i; }
+    static uint64_t getFloatBits (float value)     { uint32_t i; memcpy (&i, &value, sizeof (i)); return i; }
+    static bool isZero (uint64_t floatBits)        { return (floatBits & (exponentMask | significandMask)) == 0; }
+
+    static constexpr int       defaultNumDecimalPlaces  = 324;
+    static constexpr int       numSignificandBits       = sizeof (FloatOrDouble) == 8 ? 52 : 23;
+    static constexpr uint64_t  signMask                 = 1ull << (sizeof (FloatOrDouble) * 8 - 1);
+    static constexpr uint64_t  hiddenBit                = 1ull << numSignificandBits;
+    static constexpr uint64_t  significandMask          = hiddenBit - 1;
+    static constexpr uint64_t  exponentMask             = sizeof (FloatOrDouble) == 8 ? 0x7ff0000000000000ull : 0x7f800000ull;
+    static constexpr uint64_t  nanBits                  = sizeof (FloatOrDouble) == 8 ? 0x7ff8000000000000ull : 0x7fc00000ull;
+    static constexpr uint64_t  infBits                  = sizeof (FloatOrDouble) == 8 ? 0x7ff0000000000000ull : 0x7f800000ull;
+    static constexpr uint32_t  powersOf10[]             = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
+
+    static MantissaAndExponent createPowerOf10 (int exponentBase2, int& K)
+    {
+        static constexpr MantissaAndExponent powerOf10List[] =
+        {
+            { 0xfa8fd5a0081c0288ull, -1220 }, { 0xbaaee17fa23ebf76ull, -1193 }, { 0x8b16fb203055ac76ull, -1166 }, { 0xcf42894a5dce35eaull, -1140 }, { 0x9a6bb0aa55653b2dull, -1113 },
+            { 0xe61acf033d1a45dfull, -1087 }, { 0xab70fe17c79ac6caull, -1060 }, { 0xff77b1fcbebcdc4full, -1034 }, { 0xbe5691ef416bd60cull, -1007 }, { 0x8dd01fad907ffc3cull,  -980 },
+            { 0xd3515c2831559a83ull,  -954 }, { 0x9d71ac8fada6c9b5ull,  -927 }, { 0xea9c227723ee8bcbull,  -901 }, { 0xaecc49914078536dull,  -874 }, { 0x823c12795db6ce57ull,  -847 },
+            { 0xc21094364dfb5637ull,  -821 }, { 0x9096ea6f3848984full,  -794 }, { 0xd77485cb25823ac7ull,  -768 }, { 0xa086cfcd97bf97f4ull,  -741 }, { 0xef340a98172aace5ull,  -715 },
+            { 0xb23867fb2a35b28eull,  -688 }, { 0x84c8d4dfd2c63f3bull,  -661 }, { 0xc5dd44271ad3cdbaull,  -635 }, { 0x936b9fcebb25c996ull,  -608 }, { 0xdbac6c247d62a584ull,  -582 },
+            { 0xa3ab66580d5fdaf6ull,  -555 }, { 0xf3e2f893dec3f126ull,  -529 }, { 0xb5b5ada8aaff80b8ull,  -502 }, { 0x87625f056c7c4a8bull,  -475 }, { 0xc9bcff6034c13053ull,  -449 },
+            { 0x964e858c91ba2655ull,  -422 }, { 0xdff9772470297ebdull,  -396 }, { 0xa6dfbd9fb8e5b88full,  -369 }, { 0xf8a95fcf88747d94ull,  -343 }, { 0xb94470938fa89bcfull,  -316 },
+            { 0x8a08f0f8bf0f156bull,  -289 }, { 0xcdb02555653131b6ull,  -263 }, { 0x993fe2c6d07b7facull,  -236 }, { 0xe45c10c42a2b3b06ull,  -210 }, { 0xaa242499697392d3ull,  -183 },
+            { 0xfd87b5f28300ca0eull,  -157 }, { 0xbce5086492111aebull,  -130 }, { 0x8cbccc096f5088ccull,  -103 }, { 0xd1b71758e219652cull,   -77 }, { 0x9c40000000000000ull,   -50 },
+            { 0xe8d4a51000000000ull,   -24 }, { 0xad78ebc5ac620000ull,     3 }, { 0x813f3978f8940984ull,    30 }, { 0xc097ce7bc90715b3ull,    56 }, { 0x8f7e32ce7bea5c70ull,    83 },
+            { 0xd5d238a4abe98068ull,   109 }, { 0x9f4f2726179a2245ull,   136 }, { 0xed63a231d4c4fb27ull,   162 }, { 0xb0de65388cc8ada8ull,   189 }, { 0x83c7088e1aab65dbull,   216 },
+            { 0xc45d1df942711d9aull,   242 }, { 0x924d692ca61be758ull,   269 }, { 0xda01ee641a708deaull,   295 }, { 0xa26da3999aef774aull,   322 }, { 0xf209787bb47d6b85ull,   348 },
+            { 0xb454e4a179dd1877ull,   375 }, { 0x865b86925b9bc5c2ull,   402 }, { 0xc83553c5c8965d3dull,   428 }, { 0x952ab45cfa97a0b3ull,   455 }, { 0xde469fbd99a05fe3ull,   481 },
+            { 0xa59bc234db398c25ull,   508 }, { 0xf6c69a72a3989f5cull,   534 }, { 0xb7dcbf5354e9beceull,   561 }, { 0x88fcf317f22241e2ull,   588 }, { 0xcc20ce9bd35c78a5ull,   614 },
+            { 0x98165af37b2153dfull,   641 }, { 0xe2a0b5dc971f303aull,   667 }, { 0xa8d9d1535ce3b396ull,   694 }, { 0xfb9b7cd9a4a7443cull,   720 }, { 0xbb764c4ca7a44410ull,   747 },
+            { 0x8bab8eefb6409c1aull,   774 }, { 0xd01fef10a657842cull,   800 }, { 0x9b10a4e5e9913129ull,   827 }, { 0xe7109bfba19c0c9dull,   853 }, { 0xac2820d9623bf429ull,   880 },
+            { 0x80444b5e7aa7cf85ull,   907 }, { 0xbf21e44003acdd2dull,   933 }, { 0x8e679c2f5e44ff8full,   960 }, { 0xd433179d9c8cb841ull,   986 }, { 0x9e19db92b4e31ba9ull,  1013 },
+            { 0xeb96bf6ebadf77d9ull,  1039 }, { 0xaf87023b9bf0ee6bull,  1066 }
+        };
+
+        auto dk = (exponentBase2 + 61) * -0.30102999566398114;
+        auto ik = static_cast<int> (dk);
+        auto index = ((ik + (dk > ik ? 348 : 347)) >> 3) + 1;
+        K = 348 - (index << 3);
+        return powerOf10List[index];
+    }
+};
+
+inline std::string floatToString (float value)                                              { return FloatToStringBuffer<float>  (value, -1, false).toString(); }
+inline std::string floatToString (double value)                                             { return FloatToStringBuffer<double> (value, -1, false).toString(); }
+inline std::string floatToString (float value, int maxDecimals, bool omitPointIfPossible)   { return FloatToStringBuffer<float>  (value, maxDecimals, omitPointIfPossible).toString(); }
+inline std::string floatToString (double value, int maxDecimals, bool omitPointIfPossible)  { return FloatToStringBuffer<double> (value, maxDecimals, omitPointIfPossible).toString(); }
+
+} // namespace choc::text
+
+#endif
--- a/modules/juce_javascript/choc/text/choc_JSON.h
+++ b/modules/juce_javascript/choc/text/choc_JSON.h
@ -0,0 +1,561 @@
+//
+//    ██████ ██   ██  ██████   ██████
+//   ██      ██   ██ ██    ██ ██            ** Classy Header-Only Classes **
+//   ██      ███████ ██    ██ ██
+//   ██      ██   ██ ██    ██ ██           https://github.com/Tracktion/choc
+//    ██████ ██   ██  ██████   ██████
+//
+//   CHOC is (C)2022 Tracktion Corporation, and is offered under the terms of the ISC license:
+//
+//   Permission to use, copy, modify, and/or distribute this software for any purpose with or
+//   without fee is hereby granted, provided that the above copyright notice and this permission
+//   notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+//   WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+//   AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
+//   CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+//   WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+//   CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+#ifndef CHOC_JSON_HEADER_INCLUDED
+#define CHOC_JSON_HEADER_INCLUDED
+
+#include <limits>
+#include <sstream>
+#include <string_view>
+#include <stdexcept>
+
+#include "choc_UTF8.h"
+#include "choc_FloatToString.h"
+#include "../containers/choc_Value.h"
+
+#undef max   // It's never a smart idea to include any C headers before your C++ ones, as it
+#undef min   // risks polluting your namespace with all kinds of dangerous macros like these ones.
+
+namespace choc::json
+{
+
+//==============================================================================
+/// A parse exception, thrown by choc::json::parse() as needed.
+struct ParseError  : public std::runtime_error
+{
+    ParseError (const char* message, choc::text::LineAndColumn lc)
+        : std::runtime_error (message), lineAndColumn (lc) {}
+
+    choc::text::LineAndColumn lineAndColumn;
+};
+
+/// Parses some JSON text into a choc::value::Value object, using the given pool.
+/// Any errors will result in a ParseError exception being thrown.
+[[nodiscard]] value::Value parse (text::UTF8Pointer);
+
+/// Parses some JSON text into a choc::value::Value object, using the given pool.
+/// Any errors will result in a ParseError exception being thrown.
+[[nodiscard]] value::Value parse (std::string_view);
+
+/// Attempts to parse a bare JSON value such as a number, string, object etc
+[[nodiscard]] value::Value parseValue (std::string_view);
+
+/// A helper function to create a JSON-friendly Value object with a set of properties.
+/// The argument list must be contain pairs of names and values, e.g.
+///
+///  auto myObject = choc::json::create ("property1", 1234,
+///                                      "property2", "hello",
+///                                      "property3", 100.0f);
+///
+/// Essentially, this is a shorthand for calling choc::value::createObject()
+/// and passing it an empty type name.
+template <typename... Properties>
+[[nodiscard]] value::Value create (Properties&&... propertyNamesAndValues);
+
+//==============================================================================
+/// Formats a value as a JSON string.
+/// If useLineBreaks is true, it'll be formatted as multi-line JSON, if false it'll
+/// just be returned as a single line.
+[[nodiscard]] std::string toString (const value::ValueView&, bool useLineBreaks = false);
+
+/// Writes a version of a string to an output stream, with any illegal or non-ascii
+/// written as their equivalent JSON escape sequences.
+template <typename OutputStreamType>
+void writeWithEscapeCharacters (OutputStreamType&, text::UTF8Pointer sourceString);
+
+/// Returns a version of a string with illegal or non-ascii converted into the
+/// equivalent JSON escape sequences.
+[[nodiscard]] std::string addEscapeCharacters (text::UTF8Pointer sourceString);
+
+/// Returns a version of a string with illegal or non-ascii converted into the
+/// equivalent JSON escape sequences.
+[[nodiscard]] std::string addEscapeCharacters (std::string_view sourceString);
+
+/// Returns a version of a string with illegal or non-ascii converted into the
+/// equivalent JSON escape sequences.
+[[nodiscard]] std::string getEscapedQuotedString (std::string_view sourceString);
+
+/// Converts a double to a JSON-format string representation.
+std::string doubleToString (double value);
+
+
+
+//==============================================================================
+//        _        _           _  _
+//     __| |  ___ | |_   __ _ (_)| | ___
+//    / _` | / _ \| __| / _` || || |/ __|
+//   | (_| ||  __/| |_ | (_| || || |\__ \ _  _  _
+//    \__,_| \___| \__| \__,_||_||_||___/(_)(_)(_)
+//
+//   Code beyond this point is implementation detail...
+//
+//==============================================================================
+
+template <typename OutputStreamType>
+void writeWithEscapeCharacters (OutputStreamType& out, text::UTF8Pointer source)
+{
+    auto writeUnicode = [] (OutputStreamType& o, auto digit)
+    {
+        auto hexDigit = [] (auto value) -> char { return "0123456789abcdef"[value & 15]; };
+
+        o << "\\u" << hexDigit (digit >> 12) << hexDigit (digit >> 8) << hexDigit (digit >> 4) << hexDigit (digit);
+    };
+
+    for (;;)
+    {
+        auto c = *source;
+
+        switch (c)
+        {
+            case 0:  return;
+
+            case '\"':   out << "\\\""; break;
+            case '\\':   out << "\\\\"; break;
+            case '\n':   out << "\\n";  break;
+            case '\r':   out << "\\r";  break;
+            case '\t':   out << "\\t";  break;
+            case '\a':   out << "\\a";  break;
+            case '\b':   out << "\\b";  break;
+            case '\f':   out << "\\f";  break;
+
+            default:
+                if (c > 31 && c < 127)
+                {
+                    out << (char) c;
+                    break;
+                }
+
+                if (c >= 0x10000)
+                {
+                    auto pair = choc::text::splitCodePointIntoSurrogatePair (c);
+                    writeUnicode (out, pair.high);
+                    writeUnicode (out, pair.low);
+                    break;
+                }
+
+                writeUnicode (out, c);
+                break;
+        }
+
+        ++source;
+    }
+}
+
+inline std::string addEscapeCharacters (text::UTF8Pointer source)
+{
+    std::ostringstream result (std::ios::binary);
+    writeWithEscapeCharacters (result, source);
+    return result.str();
+}
+
+inline std::string addEscapeCharacters (std::string_view source)
+{
+    return addEscapeCharacters (text::UTF8Pointer (std::string (source).c_str()));
+}
+
+inline std::string getEscapedQuotedString (std::string_view s)
+{
+    std::ostringstream result (std::ios::binary);
+    result << '"';
+    writeWithEscapeCharacters (result, text::UTF8Pointer (std::string (s).c_str()));
+    result << '"';
+    return result.str();
+}
+
+inline std::string doubleToString (double value)
+{
+    if (std::isfinite (value))  return choc::text::floatToString (value, -1, true);
+    if (std::isnan (value))     return "\"NaN\"";
+
+    return value >= 0 ?  "\"Infinity\""
+                      : "\"-Infinity\"";
+}
+
+//==============================================================================
+template <typename Stream>
+struct Writer
+{
+    Stream& out;
+    uint32_t indentSize, currentIndent = 0;
+    static constexpr const char newLine = '\n';
+
+    std::string getIndent() const         { return std::string (currentIndent, ' '); }
+    void startIndent()                    { currentIndent += indentSize; out << newLine << getIndent(); }
+    void endIndent()                      { currentIndent -= indentSize; out << newLine << getIndent(); }
+
+    void dump (const value::ValueView& v)
+    {
+        if (v.isVoid())                   { out << "null"; return; }
+        if (v.isString())                 { out << getEscapedQuotedString (v.getString()); return; }
+        if (v.isBool())                   { out << (v.getBool() ? "true" : "false"); return; }
+        if (v.isFloat())                  { out << doubleToString (v.get<double>()); return; }
+        if (v.isInt())                    { out << v.get<int64_t>(); return; }
+        if (v.isObject())                 return dumpObject (v);
+        if (v.isArray() || v.isVector())  return dumpArrayOrVector (v);
+    }
+
+    void dumpArrayOrVector (const value::ValueView& v)
+    {
+        out << '[';
+        auto numElements = v.size();
+
+        if (indentSize != 0 && numElements != 0)
+        {
+            startIndent();
+
+            for (uint32_t i = 0; i < numElements; ++i)
+            {
+                dump (v[i]);
+
+                if (i != numElements - 1)
+                    out << "," << newLine << getIndent();
+            }
+
+            endIndent();
+        }
+        else
+        {
+            for (uint32_t i = 0; i < numElements; ++i)
+            {
+                if (i != 0) out << ", ";
+                dump (v[i]);
+            }
+        }
+
+        out << ']';
+    }
+
+    void dumpObject (const value::ValueView& object)
+    {
+        out << '{';
+        auto numMembers = object.size();
+
+        if (indentSize != 0 && numMembers != 0)
+        {
+            startIndent();
+
+            for (uint32_t i = 0; i < numMembers; ++i)
+            {
+                auto member = object.getObjectMemberAt (i);
+                out << getEscapedQuotedString (member.name) << ": ";
+                dump (member.value);
+
+                if (i != numMembers - 1)
+                    out << "," << newLine << getIndent();
+            }
+
+            endIndent();
+        }
+        else
+        {
+            for (uint32_t i = 0; i < numMembers; ++i)
+            {
+                if (i != 0) out << ", ";
+
+                auto member = object.getObjectMemberAt (i);
+                out << getEscapedQuotedString (member.name) << ": ";
+                dump (member.value);
+            }
+        }
+
+        out << '}';
+    }
+};
+
+template <typename Stream>
+void writeAsJSON (Stream& output, const value::ValueView& value, bool useMultipleLines)
+{
+    Writer<Stream> { output, useMultipleLines ? 2u : 0u }.dump (value);
+}
+
+inline std::string toString (const value::ValueView& v, bool useLineBreaks)
+{
+    std::ostringstream out (std::ios::binary);
+    writeAsJSON (out, v, useLineBreaks);
+    return out.str();
+}
+
+//==============================================================================
+[[noreturn]] static inline void throwParseError (const char* error, text::UTF8Pointer source, text::UTF8Pointer errorPos)
+{
+    throw ParseError (error, text::findLineAndColumn (source, errorPos));
+}
+
+inline value::Value parse (text::UTF8Pointer text, bool parseBareValue)
+{
+    struct Parser
+    {
+        text::UTF8Pointer source, current;
+
+        bool isEOF() const            { return current.empty(); }
+        uint32_t peek() const         { return *current; }
+        uint32_t pop()                { return current.popFirstChar(); }
+        bool popIf (char c)           { return current.skipIfStartsWith (c); }
+        bool popIf (const char* c)    { return current.skipIfStartsWith (c); }
+
+        static bool isWhitespace (uint32_t c)   { return c == ' ' || (c <= 13 && c >= 9); }
+        void skipWhitespace()                   { auto p = current; while (isWhitespace (p.popFirstChar())) current = p; }
+
+        [[noreturn]] void throwError (const char* error, text::UTF8Pointer errorPos)    { throwParseError (error, source, errorPos); }
+        [[noreturn]] void throwError (const char* error)                                { throwError (error, current); }
+
+        value::Value parseTopLevel()
+        {
+            skipWhitespace();
+
+            if (popIf ('[')) return parseArray();
+            if (popIf ('{')) return parseObject();
+            if (! isEOF()) throwError ("Expected an object or array");
+            return {};
+        }
+
+        value::Value parseArray()
+        {
+            auto result = value::createEmptyArray();
+            auto arrayStart = current;
+
+            skipWhitespace();
+            if (popIf (']')) return result;
+
+            for (;;)
+            {
+                skipWhitespace();
+                if (isEOF())  throwError ("Unexpected EOF in array declaration", arrayStart);
+
+                result.addArrayElement (parseValue());
+                skipWhitespace();
+
+                if (popIf (',')) continue;
+                if (popIf (']')) break;
+                throwError ("Expected ',' or ']'");
+            }
+
+            return result;
+        }
+
+        value::Value parseObject()
+        {
+            auto result = value::createObject ({});
+            auto objectStart = current;
+
+            skipWhitespace();
+            if (popIf ('}')) return result;
+
+            for (;;)
+            {
+                skipWhitespace();
+                if (isEOF())  throwError ("Unexpected EOF in object declaration", objectStart);
+
+                if (! popIf ('"')) throwError ("Expected a name");
+                auto errorPos = current;
+                auto name = parseString();
+
+                if (name.empty())
+                    throwError ("Property names cannot be empty", errorPos);
+
+                skipWhitespace();
+                errorPos = current;
+                if (! popIf (':')) throwError ("Expected ':'");
+                result.addMember (std::move (name), parseValue());
+                skipWhitespace();
+
+                if (popIf (',')) continue;
+                if (popIf ('}')) break;
+                throwError ("Expected ',' or '}'");
+            }
+
+            return result;
+        }
+
+        value::Value parseValue()
+        {
+            skipWhitespace();
+            auto startPos = current;
+
+            switch (pop())
+            {
+                case '[':    return parseArray();
+                case '{':    return parseObject();
+                case '"':    return value::createString (parseString());
+                case '-':    skipWhitespace(); return parseNumber (true);
+                case 'n':    if (popIf ("ull")) return {}; break;
+                case 't':    if (popIf ("rue"))  return value::createBool (true); break;
+                case 'f':    if (popIf ("alse")) return value::createBool (false); break;
+
+                case '0': case '1': case '2': case '3': case '4':
+                case '5': case '6': case '7': case '8': case '9':
+                    current = startPos;
+                    return parseNumber (false);
+
+                default: break;
+            }
+
+            throwError ("Syntax error", startPos);
+        }
+
+        value::Value parseNumber (bool negate)
+        {
+            auto startPos = current;
+            bool hadDot = false, hadExponent = false;
+
+            for (;;)
+            {
+                auto lastPos = current;
+                auto c = pop();
+
+                if (c >= '0' && c <= '9')  continue;
+                if (c == '.' && ! hadDot)  { hadDot = true; continue; }
+
+                if (! hadExponent && (c == 'e' || c == 'E'))
+                {
+                    hadDot = true;
+                    hadExponent = true;
+                    popIf ('-');
+                    continue;
+                }
+
+                if (isWhitespace (c) || c == ',' || c == '}' || c == ']' || c == 0)
+                {
+                    current = lastPos;
+                    char* endOfParsedNumber = nullptr;
+
+                    if (! (hadDot || hadExponent))
+                    {
+                        auto v = std::strtoll (startPos.data(), &endOfParsedNumber, 10);
+
+                        if (endOfParsedNumber == lastPos.data()
+                             && v != std::numeric_limits<long long>::max()
+                             && v != std::numeric_limits<long long>::min())
+                            return value::createInt64 (static_cast<int64_t> (negate ? -v : v));
+                    }
+
+                    auto v = std::strtod (startPos.data(), &endOfParsedNumber);
+
+                    if (endOfParsedNumber == lastPos.data())
+                        return value::createFloat64 (negate ? -v : v);
+                }
+
+                throwError ("Syntax error in number", lastPos);
+            }
+        }
+
+        std::string parseString()
+        {
+            std::ostringstream s (std::ios::binary);
+
+            for (;;)
+            {
+                auto c = pop();
+
+                if (c == '"')
+                    break;
+
+                if (c == '\\')
+                {
+                    auto errorPos = current;
+                    c = pop();
+
+                    switch (c)
+                    {
+                        case 'a':  c = '\a'; break;
+                        case 'b':  c = '\b'; break;
+                        case 'f':  c = '\f'; break;
+                        case 'n':  c = '\n'; break;
+                        case 'r':  c = '\r'; break;
+                        case 't':  c = '\t'; break;
+                        case 'u':  c = parseUnicodeCharacterNumber (false); break;
+                        case 0:    throwError ("Unexpected EOF in string constant", errorPos);
+                        default:   break;
+                    }
+                }
+
+                char utf8Bytes[8];
+                auto numBytes = text::convertUnicodeCodepointToUTF8 (utf8Bytes, c);
+
+                for (uint32_t i = 0; i < numBytes; ++i)
+                    s << utf8Bytes[i];
+            }
+
+            return s.str();
+        }
+
+        uint32_t parseUnicodeCharacterNumber (bool isLowSurrogate)
+        {
+            uint32_t result = 0;
+
+            for (int i = 4; --i >= 0;)
+            {
+                auto errorPos = current;
+                auto digit = pop();
+
+                if (digit >= '0' && digit <= '9')         digit -= '0';
+                else if (digit >= 'a' && digit <= 'f')    digit = 10 + (digit - 'a');
+                else if (digit >= 'A' && digit <= 'F')    digit = 10 + (digit - 'A');
+                else throwError ("Syntax error in unicode character", errorPos);
+
+                result = (result << 4) + digit;
+            }
+
+            if (isLowSurrogate && ! text::isUnicodeLowSurrogate (result))
+                throwError ("Expected a unicode low surrogate codepoint");
+
+            if (text::isUnicodeHighSurrogate (result))
+            {
+                if (! isLowSurrogate && popIf ("\\u"))
+                    return text::createUnicodeFromHighAndLowSurrogates ({ result, parseUnicodeCharacterNumber (true) });
+
+                throwError ("Expected a unicode low surrogate codepoint");
+            }
+
+            return result;
+        }
+    };
+
+    Parser p { text, text };
+    return parseBareValue ? p.parseValue()
+                          : p.parseTopLevel();
+}
+
+inline value::Value parse (const char* text, size_t numbytes, bool parseBareValue)
+{
+    if (text == nullptr)
+    {
+        text = "";
+        numbytes = 0;
+    }
+
+    if (auto error = text::findInvalidUTF8Data (text, numbytes))
+        throwParseError ("Illegal UTF8 data", text::UTF8Pointer (text), text::UTF8Pointer (error));
+
+    return parse (text::UTF8Pointer (text), parseBareValue);
+}
+
+inline value::Value parse (std::string_view text)       { return parse (text.data(), text.length(), false); }
+inline value::Value parseValue (std::string_view text)  { return parse (text.data(), text.length(), true); }
+
+template <typename... Properties>
+value::Value create (Properties&&... properties)
+{
+    static_assert ((sizeof...(properties) & 1) == 0, "The arguments must be a sequence of name, value pairs");
+    return choc::value::createObject ({}, std::forward<Properties> (properties)...);
+}
+
+
+} // namespace choc::json
+
+#endif
--- a/modules/juce_javascript/choc/text/choc_StringUtilities.h
+++ b/modules/juce_javascript/choc/text/choc_StringUtilities.h
@ -0,0 +1,600 @@
+//
+//    ██████ ██   ██  ██████   ██████
+//   ██      ██   ██ ██    ██ ██            ** Classy Header-Only Classes **
+//   ██      ███████ ██    ██ ██
+//   ██      ██   ██ ██    ██ ██           https://github.com/Tracktion/choc
+//    ██████ ██   ██  ██████   ██████
+//
+//   CHOC is (C)2022 Tracktion Corporation, and is offered under the terms of the ISC license:
+//
+//   Permission to use, copy, modify, and/or distribute this software for any purpose with or
+//   without fee is hereby granted, provided that the above copyright notice and this permission
+//   notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+//   WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+//   AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
+//   CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+//   WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+//   CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+#ifndef CHOC_STRING_UTILS_HEADER_INCLUDED
+#define CHOC_STRING_UTILS_HEADER_INCLUDED
+
+#include <cctype>
+#include <string>
+#include <vector>
+#include <cmath>
+#include <chrono>
+#include <memory>
+#include <algorithm>
+#include <cwctype>
+#include "../platform/choc_Assert.h"
+
+namespace choc::text
+{
+
+//==============================================================================
+inline bool isWhitespace (char c)                               { return c == ' ' || (c <= 13 && c >= 9); }
+inline bool isDigit (char c)                                    { return static_cast<uint32_t> (c - '0') < 10; }
+
+/// Replaces all occurrences of a one or more substrings.
+/// The arguments must be a sequence of pairs of strings, where the first of each pair is the string to
+/// look for, followed by its replacement.
+template <typename StringType, typename... OtherReplacements>
+[[nodiscard]] std::string replace (StringType textToSearch,
+                                   std::string_view firstSubstringToReplace, std::string_view firstReplacement,
+                                   OtherReplacements&&... otherPairsOfStringsToReplace);
+
+/// Returns a string with any whitespace trimmed from its start and end.
+[[nodiscard]] std::string trim (std::string textToTrim);
+
+/// Returns a string with any whitespace trimmed from its start and end.
+[[nodiscard]] std::string_view trim (std::string_view textToTrim);
+
+/// Returns a string with any whitespace trimmed from its start and end.
+[[nodiscard]] std::string_view trim (const char* textToTrim);
+
+/// Returns a string with any whitespace trimmed from its start.
+[[nodiscard]] std::string trimStart (std::string textToTrim);
+
+/// Returns a string with any whitespace trimmed from its start.
+[[nodiscard]] std::string_view trimStart (std::string_view textToTrim);
+
+/// Returns a string with any whitespace trimmed from its start.
+[[nodiscard]] std::string_view trimStart (const char* textToTrim);
+
+/// Returns a string with any whitespace trimmed from its end.
+[[nodiscard]] std::string trimEnd (std::string textToTrim);
+
+/// Returns a string with any whitespace trimmed from its end.
+[[nodiscard]] std::string_view trimEnd (std::string_view textToTrim);
+
+/// Returns a string with any whitespace trimmed from its end.
+[[nodiscard]] std::string_view trimEnd (const char* textToTrim);
+
+/// If the string begins with one or more instances of the given character, this
+/// skips past them, returning the remainder of the string.
+[[nodiscard]] std::string_view trimCharacterAtStart (std::string_view textToTrim, char characterToSkip);
+
+/// If the given character is at the start and end of the string, it trims it away.
+[[nodiscard]] std::string removeOuterCharacter (std::string text, char outerChar);
+
+[[nodiscard]] inline std::string removeDoubleQuotes (std::string text)       { return removeOuterCharacter (std::move (text), '"'); }
+[[nodiscard]] inline std::string removeSingleQuotes (std::string text)       { return removeOuterCharacter (std::move (text), '\''); }
+
+[[nodiscard]] inline std::string addDoubleQuotes (std::string text)          { return "\"" + std::move (text) + "\""; }
+[[nodiscard]] inline std::string addSingleQuotes (std::string text)          { return "'" + std::move (text) + "'"; }
+
+[[nodiscard]] std::string toLowerCase (std::string);
+[[nodiscard]] std::string toUpperCase (std::string);
+
+template <typename IsDelimiterChar>
+[[nodiscard]] std::vector<std::string> splitString (std::string_view textToSplit,
+                                                    IsDelimiterChar&& isDelimiterChar,
+                                                    bool includeDelimitersInResult);
+
+template <typename CharStartsDelimiter, typename CharIsInDelimiterBody>
+[[nodiscard]] std::vector<std::string> splitString (std::string_view textToSplit,
+                                                    CharStartsDelimiter&& isDelimiterStart,
+                                                    CharIsInDelimiterBody&& isDelimiterBody,
+                                                    bool includeDelimitersInResult);
+
+[[nodiscard]] std::vector<std::string> splitString (std::string_view textToSplit,
+                                                    char delimiterCharacter,
+                                                    bool includeDelimitersInResult);
+
+[[nodiscard]] std::vector<std::string> splitAtWhitespace (std::string_view text,
+                                                          bool keepDelimiters = false);
+
+/// Splits a string at newline characters, returning an array of strings.
+[[nodiscard]] std::vector<std::string> splitIntoLines (std::string_view text,
+                                                       bool includeNewLinesInResult);
+
+/// Joins some kind of array of strings into a single string, adding the given separator
+/// between them (but not adding it at the start or end)
+template <typename ArrayOfStrings>
+[[nodiscard]] std::string joinStrings (const ArrayOfStrings& strings,
+                                       std::string_view separator);
+
+/// Returns true if this text contains the given sub-string.
+bool contains (std::string_view text, std::string_view possibleSubstring);
+/// Returns true if this text starts with the given character.
+bool startsWith (std::string_view text, char possibleStart);
+/// Returns true if this text starts with the given sub-string.
+bool startsWith (std::string_view text, std::string_view possibleStart);
+/// Returns true if this text ends with the given sub-string.
+bool endsWith (std::string_view text, char possibleEnd);
+/// Returns true if this text ends with the given sub-string.
+bool endsWith (std::string_view text, std::string_view possibleEnd);
+
+/// Calculates the Levenstein distance between two strings.
+template <typename StringType>
+size_t getLevenshteinDistance (const StringType& string1,
+                               const StringType& string2);
+
+/// Converts a hex character to a number 0-15, or -1 if it's not a valid hex digit.
+int hexDigitToInt (uint32_t unicodeChar);
+
+/// Returns a hex string for the given value.
+/// If the minimum number of digits is non-zero, it will be zero-padded to fill this length;
+template <typename IntegerType>
+std::string createHexString (IntegerType value, int minNumDigits = 0);
+
+/// Returns a truncated, easy-to-read version of a time as hours, seconds or milliseconds,
+/// depending on its magnitude. The use-cases include things like logging or console app output.
+std::string getDurationDescription (std::chrono::duration<double, std::micro>);
+
+/// Returns an easy-to-read description of a size in bytes. Depending on the magnitude,
+/// it might choose different units such as GB, MB, KB or just bytes.
+std::string getByteSizeDescription (uint64_t sizeInBytes);
+
+/// Encodes a string as a legal URI, using percent-encoding (aka URL encoding)
+std::string percentEncodeURI (std::string_view text);
+
+
+//==============================================================================
+//        _        _           _  _
+//     __| |  ___ | |_   __ _ (_)| | ___
+//    / _` | / _ \| __| / _` || || |/ __|
+//   | (_| ||  __/| |_ | (_| || || |\__ \ _  _  _
+//    \__,_| \___| \__| \__,_||_||_||___/(_)(_)(_)
+//
+//   Code beyond this point is implementation detail...
+//
+//==============================================================================
+
+inline int hexDigitToInt (uint32_t c)
+{
+    auto d1 = c -  static_cast<uint32_t> ('0');         if (d1 < 10u)  return static_cast<int> (d1);
+    auto d2 = d1 + static_cast<uint32_t> ('0' - 'a');   if (d2 < 6u)   return static_cast<int> (d2 + 10);
+    auto d3 = d2 + static_cast<uint32_t> ('a' - 'A');   if (d3 < 6u)   return static_cast<int> (d3 + 10);
+    return -1;
+}
+
+template <typename IntegerType>
+std::string createHexString (IntegerType v, int minNumDigits)
+{
+    static_assert (std::is_integral<IntegerType>::value, "Need to pass integers into this method");
+    auto value = static_cast<typename std::make_unsigned<IntegerType>::type> (v);
+    CHOC_ASSERT (minNumDigits <= 32);
+
+    char hex[40];
+    const auto end = hex + sizeof (hex) - 1;
+    auto d = end;
+    *d = 0;
+
+    for (;;)
+    {
+        *--d = "0123456789abcdef"[static_cast<uint32_t> (value) & 15u];
+        value = static_cast<decltype (value)> (value >> 4);
+        --minNumDigits;
+
+        if (value == 0 && minNumDigits <= 0)
+            return std::string (d, end);
+    }
+}
+
+template <typename StringType, typename... OtherReplacements>
+std::string replace (StringType textToSearch, std::string_view firstToReplace, std::string_view firstReplacement,
+                     OtherReplacements&&... otherPairsOfStringsToReplace)
+{
+    static_assert ((sizeof... (otherPairsOfStringsToReplace) & 1u) == 0,
+                   "This function expects a list of pairs of strings as its arguments");
+
+    if constexpr (std::is_same<const StringType, const std::string_view>::value || std::is_same<const StringType, const char* const>::value)
+    {
+        return replace (std::string (textToSearch), firstToReplace, firstReplacement,
+                        std::forward<OtherReplacements> (otherPairsOfStringsToReplace)...);
+    }
+    else if constexpr (sizeof... (otherPairsOfStringsToReplace) == 0)
+    {
+        size_t pos = 0;
+
+        for (;;)
+        {
+            pos = textToSearch.find (firstToReplace, pos);
+
+            if (pos == std::string::npos)
+                return textToSearch;
+
+            textToSearch.replace (pos, firstToReplace.length(), firstReplacement);
+            pos += firstReplacement.length();
+        }
+    }
+    else
+    {
+        return replace (replace (std::move (textToSearch), firstToReplace, firstReplacement),
+                        std::forward<OtherReplacements> (otherPairsOfStringsToReplace)...);
+    }
+}
+
+inline std::string      trim (std::string      text)    { return trimStart (trimEnd (std::move (text))); }
+inline std::string_view trim (std::string_view text)    { return trimStart (trimEnd (std::move (text))); }
+
+inline std::string_view trim      (const char* text)    { return trim      (std::string_view (text)); }
+inline std::string_view trimStart (const char* text)    { return trimStart (std::string_view (text)); }
+inline std::string_view trimEnd   (const char* text)    { return trimEnd   (std::string_view (text)); }
+
+inline std::string trimStart (std::string text)
+{
+    auto i = text.begin();
+
+    if (i == text.end())        return {};
+    if (! isWhitespace (*i))    return text;
+
+    for (;;)
+    {
+        ++i;
+
+        if (i == text.end())        return {};
+        if (! isWhitespace (*i))    return { i, text.end() };
+    }
+}
+
+inline std::string_view trimStart (std::string_view text)
+{
+    size_t i = 0;
+
+    for (auto c : text)
+    {
+        if (! isWhitespace (c))
+        {
+            text.remove_prefix (i);
+            return text;
+        }
+
+        ++i;
+    }
+
+    return {};
+}
+
+inline std::string trimEnd (std::string text)
+{
+    for (auto i = text.end();;)
+    {
+        if (i == text.begin())
+            return {};
+
+        --i;
+
+        if (! isWhitespace (*i))
+        {
+            text.erase (i + 1, text.end());
+            return text;
+        }
+    }
+}
+
+inline std::string_view trimEnd (std::string_view text)
+{
+    for (auto i = text.length(); i != 0; --i)
+        if (! isWhitespace (text[i - 1]))
+            return text.substr (0, i);
+
+    return {};
+}
+
+inline std::string_view trimCharacterAtStart (std::string_view textToTrim, char characterToSkip)
+{
+    for (size_t i = 0; i < textToTrim.length(); ++i)
+        if (textToTrim[i] != characterToSkip)
+            return textToTrim.substr (i);
+
+    return {};
+}
+
+inline std::string removeOuterCharacter (std::string t, char outerChar)
+{
+    if (t.length() >= 2 && t.front() == outerChar && t.back() == outerChar)
+        return t.substr (1, t.length() - 2);
+
+    return t;
+}
+
+inline std::string toLowerCase (std::string s)
+{
+    std::transform (s.begin(), s.end(), s.begin(), [] (auto c) { return static_cast<char> (std::tolower (static_cast<unsigned char> (c))); });
+    return s;
+}
+
+inline std::string toUpperCase (std::string s)
+{
+    std::transform (s.begin(), s.end(), s.begin(), [] (auto c) { return static_cast<char> (std::toupper (static_cast<unsigned char> (c))); });
+    return s;
+}
+
+template <typename CharStartsDelimiter, typename CharIsInDelimiterBody>
+std::vector<std::string> splitString (std::string_view source,
+                                      CharStartsDelimiter&& isDelimiterStart,
+                                      CharIsInDelimiterBody&& isDelimiterBody,
+                                      bool keepDelimiters)
+{
+    std::vector<std::string> tokens;
+    auto tokenStart = source.begin();
+    auto pos = tokenStart;
+
+    while (pos != source.end())
+    {
+        if (isDelimiterStart (*pos))
+        {
+            auto delimiterStart = pos++;
+
+            while (pos != source.end() && isDelimiterBody (*pos))
+                ++pos;
+
+            if (pos != source.begin())
+                tokens.push_back ({ tokenStart, keepDelimiters ? pos : delimiterStart });
+
+            tokenStart = pos;
+        }
+        else
+        {
+            ++pos;
+        }
+    }
+
+    if (pos != source.begin())
+        tokens.push_back ({ tokenStart, pos });
+
+    return tokens;
+}
+
+template <typename IsDelimiterChar>
+std::vector<std::string> splitString (std::string_view source, IsDelimiterChar&& isDelimiterChar, bool keepDelimiters)
+{
+    std::vector<std::string> tokens;
+    auto tokenStart = source.begin();
+    auto pos = tokenStart;
+
+    while (pos != source.end())
+    {
+        if (isDelimiterChar (*pos))
+        {
+            tokens.push_back ({ tokenStart, keepDelimiters ? pos + 1 : pos });
+            tokenStart = ++pos;
+        }
+        else
+        {
+            ++pos;
+        }
+    }
+
+    if (pos != source.begin())
+        tokens.push_back ({ tokenStart, pos });
+
+    return tokens;
+}
+
+inline std::vector<std::string> splitString (std::string_view text, char delimiterCharacter, bool keepDelimiters)
+{
+    return splitString (text, [=] (char c) { return c == delimiterCharacter; }, keepDelimiters);
+}
+
+inline std::vector<std::string> splitAtWhitespace (std::string_view text, bool keepDelimiters)
+{
+    return splitString (text,
+                        [] (char c) { return isWhitespace (c); },
+                        [] (char c) { return isWhitespace (c); },
+                        keepDelimiters);
+}
+
+inline std::vector<std::string> splitIntoLines (std::string_view text, bool includeNewLinesInResult)
+{
+    return splitString (text, '\n', includeNewLinesInResult);
+}
+
+template <typename ArrayOfStrings>
+inline std::string joinStrings (const ArrayOfStrings& strings, std::string_view sep)
+{
+    if (strings.empty())
+        return {};
+
+    auto spaceNeeded = sep.length() * strings.size();
+
+    for (auto& s : strings)
+        spaceNeeded += s.length();
+
+    std::string result (strings.front());
+    result.reserve (spaceNeeded);
+
+    for (size_t i = 1; i < strings.size(); ++i)
+    {
+        result += sep;
+        result += strings[i];
+    }
+
+    return result;
+}
+
+inline bool contains   (std::string_view t, std::string_view s)   { return t.find (s) != std::string::npos; }
+inline bool startsWith (std::string_view t, char s)               { return ! t.empty() && t.front() == s; }
+inline bool endsWith   (std::string_view t, char s)               { return ! t.empty() && t.back()  == s; }
+
+inline bool startsWith (std::string_view t, std::string_view s)
+{
+    auto len = s.length();
+    return t.length() >= len && t.substr (0, len) == s;
+}
+
+inline bool endsWith (std::string_view t, std::string_view s)
+{
+    auto len1 = t.length(), len2 = s.length();
+    return len1 >= len2 && t.substr (len1 - len2) == s;
+}
+
+inline std::string getDurationDescription (std::chrono::duration<double, std::micro> d)
+{
+    auto microseconds = std::chrono::duration_cast<std::chrono::microseconds> (d).count();
+
+    if (microseconds < 0)    return "-" + getDurationDescription (-d);
+    if (microseconds == 0)   return "0 sec";
+
+    std::string result;
+
+    auto addLevel = [&] (int64_t size, std::string_view units, int64_t decimalScale, int64_t modulo) -> bool
+    {
+        if (microseconds < size)
+            return false;
+
+        if (! result.empty())
+            result += ' ';
+
+        auto scaled = (microseconds * decimalScale + size / 2) / size;
+        auto whole = scaled / decimalScale;
+
+        if (modulo != 0)
+            whole = whole % modulo;
+
+        result += std::to_string (whole);
+
+        if (auto fraction = scaled % decimalScale)
+        {
+            result += '.';
+            result += static_cast<char> ('0' + (fraction / 10));
+
+            if (fraction % 10 != 0)
+                result += static_cast<char> ('0' + (fraction % 10));
+        }
+
+        result += (whole == 1 && units.length() > 3 && units.back() == 's') ? units.substr (0, units.length() - 1) : units;
+        return true;
+    };
+
+    bool hours = addLevel (60000000ll * 60ll, " hours", 1, 0);
+    bool mins  = addLevel (60000000ll,        " min", 1, hours ? 60 : 0);
+
+    if (hours)
+        return result;
+
+    if (mins)
+    {
+        addLevel (1000000, " sec", 1, 60);
+    }
+    else
+    {
+        if (! addLevel (1000000,   " sec", 100, 0))
+            if (! addLevel (1000,  " ms", 100, 0))
+                addLevel (1,       " microseconds", 100, 0);
+    }
+
+    return result;
+}
+
+template <typename StringType>
+size_t getLevenshteinDistance (const StringType& string1, const StringType& string2)
+{
+    if (string1.empty())  return string2.length();
+    if (string2.empty())  return string1.length();
+
+    auto calculate = [] (size_t* costs, size_t numCosts, const StringType& s1, const StringType& s2) -> size_t
+    {
+        for (size_t i = 0; i < numCosts; ++i)
+            costs[i] = i;
+
+        size_t p1 = 0;
+
+        for (auto c1 : s1)
+        {
+            auto corner = p1;
+            *costs = p1 + 1;
+            size_t p2 = 0;
+
+            for (auto c2 : s2)
+            {
+                auto upper = costs[p2 + 1];
+                costs[p2 + 1] = c1 == c2 ? corner : (std::min (costs[p2], std::min (upper, corner)) + 1);
+                ++p2;
+                corner = upper;
+            }
+
+            ++p1;
+        }
+
+        return costs[numCosts - 1];
+    };
+
+    auto sizeNeeded = string2.length() + 1;
+    constexpr size_t maxStackSize = 96;
+
+    if (sizeNeeded <= maxStackSize)
+    {
+        size_t costs[maxStackSize];
+        return calculate (costs, sizeNeeded, string1, string2);
+    }
+
+    std::unique_ptr<size_t[]> costs (new size_t[sizeNeeded]);
+    return calculate (costs.get(), sizeNeeded, string1, string2);
+}
+
+inline std::string getByteSizeDescription (uint64_t size)
+{
+    auto intToStringWith1DecPlace = [] (uint64_t n, uint64_t divisor) -> std::string
+    {
+        auto scaled = (n * 10 + divisor / 2) / divisor;
+        auto result = std::to_string (scaled / 10);
+
+        if (auto fraction = scaled % 10)
+        {
+            result += '.';
+            result += static_cast<char> ('0' + fraction);
+        }
+
+        return result;
+    };
+
+    static constexpr uint64_t maxValue = std::numeric_limits<uint64_t>::max() / 10;
+
+    if (size >= 0x40000000)  return intToStringWith1DecPlace (std::min (maxValue, size), 0x40000000) + " GB";
+    if (size >= 0x100000)    return intToStringWith1DecPlace (size, 0x100000) + " MB";
+    if (size >= 0x400)       return intToStringWith1DecPlace (size, 0x400)    + " KB";
+    if (size != 1)           return std::to_string (size) + " bytes";
+
+    return "1 byte";
+}
+
+inline std::string percentEncodeURI (std::string_view text)
+{
+    std::string result;
+    result.reserve (text.length());
+
+    for (auto c : text)
+    {
+        if (std::string_view ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-.~").find (c) != std::string_view::npos)
+        {
+            result += c;
+        }
+        else
+        {
+            result += '%';
+            result += "0123456789abcdef"[static_cast<uint8_t> (c) >> 4];
+            result += "0123456789abcdef"[static_cast<uint8_t> (c) & 15u];
+        }
+    }
+
+    return result;
+}
+
+
+} // namespace choc::text
+
+#endif
--- a/modules/juce_javascript/choc/text/choc_UTF8.h
+++ b/modules/juce_javascript/choc/text/choc_UTF8.h
@ -0,0 +1,655 @@
+//
+//    ██████ ██   ██  ██████   ██████
+//   ██      ██   ██ ██    ██ ██            ** Classy Header-Only Classes **
+//   ██      ███████ ██    ██ ██
+//   ██      ██   ██ ██    ██ ██           https://github.com/Tracktion/choc
+//    ██████ ██   ██  ██████   ██████
+//
+//   CHOC is (C)2022 Tracktion Corporation, and is offered under the terms of the ISC license:
+//
+//   Permission to use, copy, modify, and/or distribute this software for any purpose with or
+//   without fee is hereby granted, provided that the above copyright notice and this permission
+//   notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+//   WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+//   AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
+//   CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+//   WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+//   CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+#ifndef CHOC_UTF8_HEADER_INCLUDED
+#define CHOC_UTF8_HEADER_INCLUDED
+
+#include <cstddef>
+#include "choc_StringUtilities.h"
+
+namespace choc::text
+{
+
+/// An integer type to represent a unicode code-point.
+using UnicodeChar = uint32_t;
+
+//==============================================================================
+/** A non-owning pointer which can iterate over a chunk of null-terminated UTF-8 text
+    and read it as wide unicode characters.
+*/
+struct UTF8Pointer
+{
+    explicit constexpr UTF8Pointer (const char* utf8Text) noexcept  : text (utf8Text) {}
+
+    UTF8Pointer() = default;
+    UTF8Pointer (const UTF8Pointer&) = default;
+    UTF8Pointer& operator= (const UTF8Pointer&) = default;
+
+    /// Returns the raw data that this points to.
+    const char* data() const noexcept                   { return text; }
+
+    /// Returns true if the pointer is not null.
+    operator bool() const noexcept                      { return text != nullptr; }
+
+    /// Returns true if the pointer is either null or points to a null terminator char.
+    bool empty() const                                  { return text == nullptr || *text == 0; }
+
+    /// Returns the length by iterating all unicode chars and counting them.
+    /// Note that this is slow, and is not a count of the number of bytes in the string!
+    size_t length() const;
+
+    //==============================================================================
+    /// Returns the first unicode character in the string.
+    UnicodeChar operator*() const;
+
+    /// Skips past the first unicode character.
+    /// Moving beyond the end of the string is undefined behaviour and will trigger an assertion.
+    UTF8Pointer& operator++();
+
+    /// Skips past the first unicode character.
+    /// Moving beyond the end of the string is undefined behaviour and will trigger an assertion.
+    UTF8Pointer operator++ (int);
+
+    /// Moves backwards to the previous unicode character.
+    /// Moving beyond the end of the string is undefined behaviour.
+    UTF8Pointer operator--();
+
+    /// Skips past the given number of unicode characters.
+    /// Moving beyond the end of the string is undefined behaviour and will trigger an assertion.
+    UTF8Pointer& operator+= (size_t numCharsToSkip);
+
+    /// Returns a pointer which points to the n-th unicode character in the text
+    /// Reading beyond the end of the string is undefined behaviour and may trigger an assertion.
+    UTF8Pointer operator+ (size_t numCharsToSkip) const;
+
+    /// Returns a pointer which points to the n-th unicode character in the text.
+    /// Reading beyond the end of the string is undefined behaviour and may trigger an assertion.
+    UTF8Pointer operator+ (int numCharsToSkip) const;
+
+    /// Skips past the first unicode character and returns it as a code-point.
+    /// Calling this when the current character is the terminator will leave the pointer in an
+    /// invalid state.
+    UnicodeChar popFirstChar();
+
+    /// Finds the next occurrence of the given string, or return a nullptr if not found.
+    UTF8Pointer find (const char* textToFind) const;
+
+    /// Returns true if the text starts with this string
+    bool startsWith (const char* textToMatch) const;
+
+    /// If the first character matches the given one, this will advance the pointer and return true.
+    bool skipIfStartsWith (char charToMatch);
+
+    /// If the start of the text matches the given string, this will advance this pointer to skip
+    /// past it, and return true. If not, it will return false without modifying this pointer.
+    bool skipIfStartsWith (const char* textToMatch);
+
+    /// Returns a pointer to the first non-whitespace character in the given string (which may
+    /// be the terminating null character if it's all whitespace).
+    [[nodiscard]] UTF8Pointer findEndOfWhitespace() const;
+
+    /// Iterates backwards from this position to find the first character that follows
+    /// a new-line. The pointer provided marks the furthest back that the function should search
+    [[nodiscard]] UTF8Pointer findStartOfLine (UTF8Pointer startOfValidText) const;
+
+    /// Searches forwards for the next character that is followed by a new-line or a null-terminator.
+    [[nodiscard]] UTF8Pointer findEndOfLine() const;
+
+    //==============================================================================
+    struct EndIterator {};
+
+    struct Iterator
+    {
+        explicit constexpr Iterator (const char* t) : text (t) {}
+        Iterator (const Iterator&) = default;
+        Iterator& operator= (const Iterator&) = default;
+
+        UnicodeChar operator*() const           { return *UTF8Pointer (text); }
+        Iterator& operator++()                  { UTF8Pointer p (text); ++p; text = p.text; return *this; }
+        Iterator operator++ (int)               { auto old = *this; ++*this; return old; }
+        bool operator== (EndIterator) const     { return *text == 0; }
+        bool operator!= (EndIterator) const     { return *text != 0; }
+
+    private:
+        const char* text;
+    };
+
+    Iterator begin() const;
+    EndIterator end() const;
+
+    //==============================================================================
+    /// This does a pointer comparison, NOT a comparison of the text itself!
+    bool operator== (UTF8Pointer other) const noexcept      { return text == other.text; }
+    /// This does a pointer comparison, NOT a comparison of the text itself!
+    bool operator!= (UTF8Pointer other) const noexcept      { return text != other.text; }
+    /// This does a pointer comparison, NOT a comparison of the text itself!
+    bool operator<  (UTF8Pointer other) const noexcept      { return text <  other.text; }
+    /// This does a pointer comparison, NOT a comparison of the text itself!
+    bool operator>  (UTF8Pointer other) const noexcept      { return text >  other.text; }
+    /// This does a pointer comparison, NOT a comparison of the text itself!
+    bool operator<= (UTF8Pointer other) const noexcept      { return text <= other.text; }
+    /// This does a pointer comparison, NOT a comparison of the text itself!
+    bool operator>= (UTF8Pointer other) const noexcept      { return text >= other.text; }
+
+    bool operator== (decltype(nullptr)) const noexcept      { return text == nullptr; }
+    bool operator!= (decltype(nullptr)) const noexcept      { return text != nullptr; }
+
+private:
+    const char* text = nullptr;
+};
+
+//==============================================================================
+/// Checks a given chunk of data to see whether it's valid UTF-8.
+/// If no errors are found, this returns nullptr. If an error is found, it returns the address
+/// of the offending byte. Note that zero bytes in the data are considered to be valid UTF-8.
+const char* findInvalidUTF8Data (const void* dataToCheck, size_t numBytesToRead);
+
+/// Writes the bytes for a unicode character, and returns the number of bytes that were needed.
+/// The buffer passed in needs to have at least 4 bytes capacity.
+uint32_t convertUnicodeCodepointToUTF8 (char* dest, UnicodeChar codepoint);
+
+/// Appends a unicode codepoint to a std::string as a sequence of UTF-8 bytes.
+void appendUTF8 (std::string& target, UnicodeChar codepoint);
+
+/// Checks whether a given codepoint is a high-surrogate
+bool isUnicodeHighSurrogate (UnicodeChar codepoint);
+
+/// Checks whether a given codepoint is a low-surrogate
+bool isUnicodeLowSurrogate (UnicodeChar codepoint);
+
+struct SurrogatePair
+{
+    UnicodeChar high = 0, low = 0;
+};
+
+/// For a codepoint >= 0x10000, this will return a surrogate pair to represent it.
+SurrogatePair splitCodePointIntoSurrogatePair (UnicodeChar fullCodePoint);
+
+/// Combines a high and low surrogate into a single codepoint.
+UnicodeChar createUnicodeFromHighAndLowSurrogates (SurrogatePair);
+
+/// Checks a UTF-8/CESU-8 string to see if it contains any surrogate pairs.
+/// If it does, then to use it as UTF-8 you'll probably need to run it through
+/// convertSurrogatePairsToUTF8().
+bool containsSurrogatePairs (UTF8Pointer);
+
+/// Returns a string where any surrogate pairs have been converted to UTF-8 codepoints.
+std::string convertSurrogatePairsToUTF8 (UTF8Pointer);
+
+/// Returns true if the given UTF-8 string can be used as CESU-8 without conversion. If not,
+/// you'll need to run it through convertUTF8ToCESU8() to convert the 32-bit code-points
+/// to surrogate pairs.
+bool isValidCESU8 (std::string_view utf8);
+
+/// Converts any 32-bit characters in this UTF-8 string to surrogate pairs, which makes
+/// the resulting string suitable for use at CESU-8.
+[[nodiscard]] std::string convertUTF8ToCESU8 (UTF8Pointer);
+
+
+//==============================================================================
+/// Represents a line and column index within a block of text.
+struct LineAndColumn
+{
+    /// Valid line and column values start at 1.
+    /// If either is 0, it means that the LineAndColumn object is uninitialised.
+    size_t line = 0, column = 0;
+
+    /// Returns true if neither the line nor column is zero.
+    bool isValid() const noexcept          { return line != 0 && column != 0; }
+
+    /// Turns this location into a [line]:[col] string suitable for use in a
+    /// standard compiler error message format.
+    std::string toString() const;
+};
+
+/// Given a block of text and a position within it, this will work out the
+/// line and column of that position.
+LineAndColumn findLineAndColumn (UTF8Pointer fullText,
+                                 UTF8Pointer targetPosition);
+
+
+//==============================================================================
+//        _        _           _  _
+//     __| |  ___ | |_   __ _ (_)| | ___
+//    / _` | / _ \| __| / _` || || |/ __|
+//   | (_| ||  __/| |_ | (_| || || |\__ \ _  _  _
+//    \__,_| \___| \__| \__,_||_||_||___/(_)(_)(_)
+//
+//   Code beyond this point is implementation detail...
+//
+//==============================================================================
+
+inline size_t UTF8Pointer::length() const
+{
+    size_t count = 0;
+
+    if (text != nullptr)
+        for (auto p = *this; *p.text != 0; ++p)
+            ++count;
+
+    return count;
+}
+
+inline const char* findInvalidUTF8Data (const void* dataToCheck, size_t numBytes)
+{
+    CHOC_ASSERT (dataToCheck != nullptr);
+    auto source = static_cast<const char*> (dataToCheck);
+    const auto end = source + numBytes;
+
+    for (;;)
+    {
+        if (source >= end)
+            return nullptr;
+
+        auto byte = static_cast<signed char> (*source);
+
+        if (byte >= 0)
+        {
+            ++source;
+            continue;
+        }
+
+        int testBit = 0x40, numExtraBytes = 0;
+
+        while ((byte & testBit) != 0)
+        {
+            testBit >>= 1;
+            ++numExtraBytes;
+
+            if (numExtraBytes > 3
+                || source + static_cast<size_t> (numExtraBytes) >= end
+                || (numExtraBytes == 3 && *UTF8Pointer (source) > 0x10ffff))
+            {
+                return source;
+            }
+        }
+
+        if (numExtraBytes == 0)
+            return source;
+
+        ++source;
+
+        for (int i = 0; i < numExtraBytes; ++i)
+        {
+            if ((*source & 0xc0) != 0x80)
+                return source;
+
+            ++source;
+        }
+    }
+}
+
+inline UnicodeChar UTF8Pointer::operator*() const
+{
+    return UTF8Pointer (*this).popFirstChar();
+}
+
+inline UTF8Pointer& UTF8Pointer::operator++()
+{
+    CHOC_ASSERT (! empty());  // can't advance past the zero-terminator
+    auto firstByte = static_cast<signed char> (*text++);
+
+    if (firstByte >= 0)
+        return *this;
+
+    uint32_t testBit = 0x40, unicodeChar = static_cast<unsigned char> (firstByte);
+
+    while ((unicodeChar & testBit) != 0 && testBit > 8)
+    {
+        ++text;
+        testBit >>= 1;
+    }
+
+    return *this;
+}
+
+inline UTF8Pointer UTF8Pointer::operator++ (int)
+{
+    auto prev = *this;
+    operator++();
+    return prev;
+}
+
+inline UTF8Pointer UTF8Pointer::operator--()
+{
+    CHOC_ASSERT (text != nullptr); // mustn't use this on nullptrs
+    uint32_t bytesSkipped = 0;
+
+    while ((*--text & 0xc0) == 0x80)
+    {
+        if (bytesSkipped > 2)
+        {
+            CHOC_ASSERT (bytesSkipped <= 2);
+            break;
+        }
+
+        ++bytesSkipped;
+    }
+
+    return *this;
+}
+
+inline UTF8Pointer& UTF8Pointer::operator+= (size_t numCharsToSkip)
+{
+    while (numCharsToSkip != 0)
+    {
+        --numCharsToSkip;
+        operator++();
+    }
+
+    return *this;
+}
+
+inline UTF8Pointer UTF8Pointer::operator+ (size_t numCharsToSkip) const
+{
+    auto p = *this;
+    p += numCharsToSkip;
+    return p;
+}
+
+inline UTF8Pointer UTF8Pointer::operator+ (int numCharsToSkip) const
+{
+    CHOC_ASSERT (numCharsToSkip >= 0);
+    return operator+ (static_cast<size_t> (numCharsToSkip));
+}
+
+inline UnicodeChar UTF8Pointer::popFirstChar()
+{
+    CHOC_ASSERT (text != nullptr); // mustn't use this on nullptrs
+    auto firstByte = static_cast<signed char> (*text++);
+    UnicodeChar unicodeChar = static_cast<unsigned char> (firstByte);
+
+    if (firstByte < 0)
+    {
+        uint32_t bitMask = 0x7f, numExtraBytes = 0;
+
+        for (uint32_t testBit = 0x40; (unicodeChar & testBit) != 0 && testBit > 8; ++numExtraBytes)
+        {
+            bitMask >>= 1;
+            testBit >>= 1;
+        }
+
+        unicodeChar &= bitMask;
+
+        for (uint32_t i = 0; i < numExtraBytes; ++i)
+        {
+            uint32_t nextByte = static_cast<unsigned char> (*text);
+
+            CHOC_ASSERT ((nextByte & 0xc0) == 0x80); // error in the data - you should always make sure the source
+                                                        // gets validated before iterating a UTF8Pointer over it
+
+            unicodeChar = (unicodeChar << 6) | (nextByte & 0x3f);
+            ++text;
+        }
+    }
+
+    return unicodeChar;
+}
+
+inline bool UTF8Pointer::startsWith (const char* textToMatch) const
+{
+    CHOC_ASSERT (textToMatch != nullptr);
+
+    if (auto p = text)
+    {
+        while (*textToMatch != 0)
+            if (*textToMatch++ != *p++)
+                return false;
+
+        return true;
+    }
+
+    return false;
+}
+
+inline UTF8Pointer UTF8Pointer::find (const char* textToFind) const
+{
+    CHOC_ASSERT (textToFind != nullptr);
+
+    for (auto t = *this;; ++t)
+        if (t.startsWith (textToFind) || t.empty())
+            return t;
+}
+
+inline bool UTF8Pointer::skipIfStartsWith (char charToMatch)
+{
+    if (text != nullptr && *text == charToMatch && charToMatch != 0)
+    {
+        ++text;
+        return true;
+    }
+
+    return false;
+}
+
+inline bool UTF8Pointer::skipIfStartsWith (const char* textToMatch)
+{
+    CHOC_ASSERT (textToMatch != nullptr);
+
+    if (auto p = text)
+    {
+        while (*textToMatch != 0)
+            if (*textToMatch++ != *p++)
+                return false;
+
+        text = p;
+        return true;
+    }
+
+    return false;
+}
+
+inline UTF8Pointer UTF8Pointer::findEndOfWhitespace() const
+{
+    auto p = *this;
+
+    if (p.text != nullptr)
+        while (choc::text::isWhitespace (*p.text))
+            ++p;
+
+    return p;
+}
+
+inline UTF8Pointer UTF8Pointer::findStartOfLine (UTF8Pointer start) const
+{
+    if (text == nullptr)
+        return {};
+
+    auto l = *this;
+    CHOC_ASSERT (l.text >= start.text && start.text != nullptr);
+
+    while (l.text > start.text)
+    {
+        auto prev = l;
+        auto c = *--prev;
+
+        if (c == '\r' || c == '\n')
+            break;
+
+        l = prev;
+    }
+
+    return l;
+}
+
+inline UTF8Pointer UTF8Pointer::findEndOfLine() const
+{
+    if (text == nullptr)
+        return {};
+
+    auto l = *this;
+
+    while (! l.empty())
+    {
+        auto c = l.popFirstChar();
+
+        if (c == '\r' || c == '\n')
+            break;
+    }
+
+    return l;
+}
+
+inline UTF8Pointer::Iterator UTF8Pointer::begin() const      { CHOC_ASSERT (text != nullptr); return Iterator (text); }
+inline UTF8Pointer::EndIterator UTF8Pointer::end() const     { return EndIterator(); }
+
+inline LineAndColumn findLineAndColumn (UTF8Pointer start, UTF8Pointer targetPosition)
+{
+    if (start == nullptr || targetPosition == nullptr)
+        return {};
+
+    CHOC_ASSERT (start <= targetPosition);
+    LineAndColumn lc { 1, 1 };
+
+    while (start < targetPosition && ! start.empty())
+    {
+        ++lc.column;
+        if (*start++ == '\n')  { lc.line++; lc.column = 1; }
+    }
+
+    return lc;
+}
+
+inline std::string LineAndColumn::toString() const   { return std::to_string (line) + ':' + std::to_string (column); }
+
+//==============================================================================
+inline uint32_t convertUnicodeCodepointToUTF8 (char* dest, UnicodeChar unicodeChar)
+{
+    if (unicodeChar < 0x80)
+    {
+        *dest = static_cast<char> (unicodeChar);
+        return 1;
+    }
+
+    uint32_t extraBytes = 1;
+
+    if (unicodeChar >= 0x800)
+    {
+        ++extraBytes;
+
+        if (unicodeChar >= 0x10000)
+            ++extraBytes;
+    }
+
+    dest[0] = static_cast<char> ((0xffu << (7 - extraBytes)) | (unicodeChar >> (extraBytes * 6)));
+
+    for (uint32_t i = 1; i <= extraBytes; ++i)
+        dest[i] = static_cast<char> (0x80u | (0x3fu & (unicodeChar >> ((extraBytes - i) * 6))));
+
+    return extraBytes + 1;
+}
+
+inline void appendUTF8 (std::string& target, UnicodeChar unicodeChar)
+{
+    char bytes[4];
+    auto num = convertUnicodeCodepointToUTF8 (bytes, unicodeChar);
+    target.append (bytes, num);
+}
+
+inline bool isUnicodeHighSurrogate (UnicodeChar codepoint)   { return codepoint >= 0xd800 && codepoint <= 0xdbff; }
+inline bool isUnicodeLowSurrogate  (UnicodeChar codepoint)   { return codepoint >= 0xdc00 && codepoint <= 0xdfff; }
+
+inline UnicodeChar createUnicodeFromHighAndLowSurrogates (SurrogatePair pair)
+{
+    if (! isUnicodeHighSurrogate (pair.high))   return pair.high;
+    if (! isUnicodeLowSurrogate (pair.low))     return 0;
+
+    return (pair.high << 10) + pair.low - 0x35fdc00u;
+}
+
+inline bool containsSurrogatePairs (UTF8Pointer text)
+{
+    for (;;)
+    {
+        auto c = text.popFirstChar();
+
+        if (c == 0)
+            return false;
+
+        if (isUnicodeHighSurrogate (c))
+            return true;
+    }
+}
+
+inline std::string convertSurrogatePairsToUTF8 (UTF8Pointer text)
+{
+    std::string result;
+
+    for (;;)
+    {
+        auto c = text.popFirstChar();
+
+        if (choc::text::isUnicodeHighSurrogate (c))
+            c = createUnicodeFromHighAndLowSurrogates ({ c, text.popFirstChar() });
+
+        if (c == 0)
+            return result;
+
+        appendUTF8 (result, c);
+    }
+}
+
+inline SurrogatePair splitCodePointIntoSurrogatePair (UnicodeChar fullCodePoint)
+{
+    CHOC_ASSERT (fullCodePoint >= 0x10000);
+
+    return { static_cast<UnicodeChar> (0xd800u + ((fullCodePoint - 0x10000u) >> 10)),
+             static_cast<UnicodeChar> (0xdc00u + (fullCodePoint & 0x3ffu)) };
+}
+
+inline bool isValidCESU8 (std::string_view utf8)
+{
+    for (auto c : utf8)
+        if (static_cast<uint8_t> (c) >= 0xe8)
+            return false;
+
+    return true;
+}
+
+inline std::string convertUTF8ToCESU8 (UTF8Pointer utf8)
+{
+    std::string result;
+
+    for (;;)
+    {
+        auto c = utf8.popFirstChar();
+
+        if (c == 0)
+            return result;
+
+        if (c < 128)
+        {
+            result += (char) c;
+        }
+        else if (c >= 0x10000)
+        {
+            auto pair = splitCodePointIntoSurrogatePair (c);
+            appendUTF8 (result, pair.high);
+            appendUTF8 (result, pair.low);
+        }
+        else
+        {
+            appendUTF8 (result, c);
+        }
+    }
+}
+
+
+} // namespace choc::text
+
+#endif