Added support for NEON vector instructions, and some unit-tests for FloatVectorOperations.

2026-01-10 23:44:24 +00:00 · 2013-12-02 11:25:35 +00:00 · 2013-12-02 11:25:35 +00:00 · e53235741f
commit e53235741f
parent 53cbc74986
3 changed files with 350 additions and 81 deletions
--- a/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp
+++ b/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp
@ -22,10 +22,13 @@
  ==============================================================================
 */

-#if JUCE_USE_SSE_INTRINSICS
-
 namespace FloatVectorHelpers
 {
+
+    #define JUCE_INCREMENT_SRC_DEST    dest += 4; src += 4;
+    #define JUCE_INCREMENT_DEST        dest += 4;
+
+   #if JUCE_USE_SSE_INTRINSICS
    static bool sse2Present = false;

    static bool isSSE2Available() noexcept
@ -44,7 +47,6 @@ namespace FloatVectorHelpers

    static inline float findMinimumOrMaximum (const float* src, int num, const bool isMinimum) noexcept
    {
-       #if JUCE_USE_SSE_INTRINSICS
        const int numLongOps = num / 4;

        if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
@ -90,66 +92,144 @@ namespace FloatVectorHelpers

            return localVal;
        }
-       #endif

        return isMinimum ? juce::findMinimum (src, num)
                         : juce::findMaximum (src, num);
    }
-}

-#define JUCE_BEGIN_SSE_OP \
-    if (FloatVectorHelpers::isSSE2Available()) \
-    { \
+    #define JUCE_BEGIN_SSE_OP \
+        if (FloatVectorHelpers::isSSE2Available()) \
+        { \
+            const int numLongOps = num / 4;
+
+    #define JUCE_FINISH_SSE_OP(normalOp) \
+            num &= 3; \
+            if (num == 0) return; \
+        } \
+        for (int i = 0; i < num; ++i) normalOp;
+
+    #define JUCE_SSE_LOOP(sseOp, srcLoad, dstLoad, dstStore, locals, increment) \
+        for (int i = 0; i < numLongOps; ++i) \
+        { \
+            locals (srcLoad, dstLoad); \
+            dstStore (dest, sseOp); \
+            increment; \
+        }
+
+    #define JUCE_LOAD_NONE(srcLoad, dstLoad)
+    #define JUCE_LOAD_DEST(srcLoad, dstLoad)     const __m128 d = dstLoad (dest);
+    #define JUCE_LOAD_SRC(srcLoad, dstLoad)      const __m128 s = srcLoad (src);
+    #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest); const __m128 s = srcLoad (src);
+
+    #define JUCE_PERFORM_SSE_OP_DEST(normalOp, sseOp, locals) \
+        JUCE_BEGIN_SSE_OP \
+        if (FloatVectorHelpers::isAligned (dest))   JUCE_SSE_LOOP (sseOp, dummy, _mm_load_ps,  _mm_store_ps,  locals, JUCE_INCREMENT_DEST) \
+        else                                        JUCE_SSE_LOOP (sseOp, dummy, _mm_loadu_ps, _mm_storeu_ps, locals, JUCE_INCREMENT_DEST) \
+        JUCE_FINISH_SSE_OP (normalOp)
+
+    #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) \
+        JUCE_BEGIN_SSE_OP \
+        if (FloatVectorHelpers::isAligned (dest)) \
+        { \
+            if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps,  _mm_load_ps, _mm_store_ps, locals, increment) \
+            else                                     JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
+        }\
+        else \
+        { \
+            if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps,  _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
+            else                                     JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
+        } \
+        JUCE_FINISH_SSE_OP (normalOp)
+
+
+    //==============================================================================
+   #elif JUCE_USE_ARM_NEON
+
+    static inline float findMinimumOrMaximum (const float* src, int num, const bool isMinimum) noexcept
+    {
        const int numLongOps = num / 4;

-#define JUCE_FINISH_SSE_OP(normalOp) \
-        num &= 3; \
-        if (num == 0) return; \
-    } \
-    for (int i = 0; i < num; ++i) normalOp;
+        if (numLongOps > 1)
+        {
+            float32x4_t val;

-#define JUCE_SSE_LOOP(sseOp, srcLoad, dstLoad, dstStore, locals, increment) \
-    for (int i = 0; i < numLongOps; ++i) \
-    { \
-        locals (srcLoad, dstLoad); \
-        dstStore (dest, sseOp); \
-        increment; \
+            #define JUCE_MINIMUMMAXIMUM_NEON_LOOP(loadOp, minMaxOp) \
+                val = loadOp (src); \
+                src += 4; \
+                for (int i = 1; i < numLongOps; ++i) \
+                { \
+                    const float32x4_t s = loadOp (src); \
+                    val = minMaxOp (val, s); \
+                    src += 4; \
+                }
+
+            if (isMinimum)
+                JUCE_MINIMUMMAXIMUM_NEON_LOOP (vld1q_f32, vminq_f32)
+            else
+                JUCE_MINIMUMMAXIMUM_NEON_LOOP (vld1q_f32, vmaxq_f32)
+
+            float localVal;
+
+            {
+                float vals[4];
+                vst1q_f32 (vals, val);
+
+                localVal = isMinimum ? jmin (vals[0], vals[1], vals[2], vals[3])
+                                     : jmax (vals[0], vals[1], vals[2], vals[3]);
+            }
+
+            num &= 3;
+
+            for (int i = 0; i < num; ++i)
+                localVal = isMinimum ? jmin (localVal, src[i])
+                                     : jmax (localVal, src[i]);
+
+            return localVal;
+        }
+
+        return isMinimum ? juce::findMinimum (src, num)
+                         : juce::findMaximum (src, num);
    }

-#define JUCE_INCREMENT_SRC_DEST    dest += 4; src += 4;
-#define JUCE_INCREMENT_DEST        dest += 4;
+    #define JUCE_BEGIN_NEON_OP \
+        const int numLongOps = num / 4;

-#define JUCE_LOAD_NONE(srcLoad, dstLoad)
-#define JUCE_LOAD_DEST(srcLoad, dstLoad)     const __m128 d = dstLoad (dest);
-#define JUCE_LOAD_SRC(srcLoad, dstLoad)      const __m128 s = srcLoad (src);
-#define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest); const __m128 s = srcLoad (src);
+    #define JUCE_FINISH_NEON_OP(normalOp) \
+        num &= 3; \
+        if (num == 0) return; \
+        for (int i = 0; i < num; ++i) normalOp;

-#define JUCE_PERFORM_SSE_OP_DEST(normalOp, sseOp, locals) \
-    JUCE_BEGIN_SSE_OP \
-    if (FloatVectorHelpers::isAligned (dest))   JUCE_SSE_LOOP (sseOp, dummy, _mm_load_ps,  _mm_store_ps,  locals, JUCE_INCREMENT_DEST) \
-    else                                        JUCE_SSE_LOOP (sseOp, dummy, _mm_loadu_ps, _mm_storeu_ps, locals, JUCE_INCREMENT_DEST) \
-    JUCE_FINISH_SSE_OP (normalOp)
+    #define JUCE_NEON_LOOP(neonOp, srcLoad, dstLoad, dstStore, locals, increment) \
+        for (int i = 0; i < numLongOps; ++i) \
+        { \
+            locals (srcLoad, dstLoad); \
+            dstStore (dest, neonOp); \
+            increment; \
+        }

-#define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) \
-    JUCE_BEGIN_SSE_OP \
-    if (FloatVectorHelpers::isAligned (dest)) \
-    { \
-        if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps,  _mm_load_ps, _mm_store_ps, locals, increment) \
-        else                                     JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
-    }\
-    else \
-    { \
-        if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps,  _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
-        else                                     JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
-    } \
-    JUCE_FINISH_SSE_OP (normalOp)
+    #define JUCE_LOAD_NONE(srcLoad, dstLoad)
+    #define JUCE_LOAD_DEST(srcLoad, dstLoad)     const float32x4_t d = dstLoad (dest);
+    #define JUCE_LOAD_SRC(srcLoad, dstLoad)      const float32x4_t s = srcLoad (src);
+    #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const float32x4_t d = dstLoad (dest); const float32x4_t s = srcLoad (src);

+    #define JUCE_PERFORM_NEON_OP_DEST(normalOp, neonOp, locals) \
+        JUCE_BEGIN_NEON_OP \
+        JUCE_NEON_LOOP (neonOp, dummy, vld1q_f32,  vst1q_f32,  locals, JUCE_INCREMENT_DEST) \
+        JUCE_FINISH_NEON_OP (normalOp)

-#else
- #define JUCE_PERFORM_SSE_OP_DEST(normalOp, unused1, unused2)              for (int i = 0; i < num; ++i) normalOp;
- #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment)  for (int i = 0; i < num; ++i) normalOp;
-#endif
+    #define JUCE_PERFORM_NEON_OP_SRC_DEST(normalOp, neonOp, locals) \
+        JUCE_BEGIN_NEON_OP \
+        JUCE_NEON_LOOP (neonOp, vld1q_f32, vld1q_f32,  vst1q_f32,  locals, JUCE_INCREMENT_SRC_DEST) \
+        JUCE_FINISH_NEON_OP (normalOp)

+    //==============================================================================
+    #else
+     #define JUCE_PERFORM_SSE_OP_DEST(normalOp, unused1, unused2)              for (int i = 0; i < num; ++i) normalOp;
+     #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment)  for (int i = 0; i < num; ++i) normalOp;
+    #endif
+}
+
+//==============================================================================
 void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
 {
   #if JUCE_USE_VDSP_FRAMEWORK
@ -163,11 +243,13 @@ void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill,
 {
   #if JUCE_USE_VDSP_FRAMEWORK
    vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
+   #elif JUCE_USE_ARM_NEON
+    const float32x4_t val = vld1q_dup_f32 (&valueToFill);
+    JUCE_PERFORM_NEON_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE)
   #else
    #if JUCE_USE_SSE_INTRINSICS
     const __m128 val = _mm_load1_ps (&valueToFill);
    #endif
-
    JUCE_PERFORM_SSE_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE)
   #endif
 }
@ -181,13 +263,13 @@ void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const f
 {
   #if JUCE_USE_VDSP_FRAMEWORK
    vDSP_vsmul (src, 1, &multiplier, dest, 1, num);
+   #elif JUCE_USE_ARM_NEON
+    JUCE_PERFORM_NEON_OP_SRC_DEST (dest[i] += src[i], vmulq_n_f32(s, multiplier), JUCE_LOAD_SRC)
   #else
    #if JUCE_USE_SSE_INTRINSICS
     const __m128 mult = _mm_load1_ps (&multiplier);
    #endif
-
-    JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
-                                  _mm_mul_ps (mult, s),
+    JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier, _mm_mul_ps (mult, s),
                                  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST)
   #endif
 }
@ -196,43 +278,52 @@ void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, in
 {
   #if JUCE_USE_VDSP_FRAMEWORK
    vDSP_vadd (src, 1, dest, 1, dest, 1, num);
+   #elif JUCE_USE_ARM_NEON
+    JUCE_PERFORM_NEON_OP_SRC_DEST (dest[i] += src[i], vaddq_f32 (d, s), JUCE_LOAD_SRC_DEST)
   #else
-    JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i],
-                                  _mm_add_ps (d, s),
-                                  JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
+    JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i], _mm_add_ps (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
   #endif
 }

 void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
 {
-   #if JUCE_USE_SSE_INTRINSICS
-    const __m128 amountToAdd = _mm_load1_ps (&amount);
+   #if JUCE_USE_ARM_NEON
+    const float32x4_t amountToAdd = vld1q_dup_f32(&amount);
+    JUCE_PERFORM_NEON_OP_DEST (dest[i] += amount, vaddq_f32 (d, amountToAdd), JUCE_LOAD_DEST)
+   #else
+    #if JUCE_USE_SSE_INTRINSICS
+     const __m128 amountToAdd = _mm_load1_ps (&amount);
+    #endif
+     JUCE_PERFORM_SSE_OP_DEST (dest[i] += amount, _mm_add_ps (d, amountToAdd), JUCE_LOAD_DEST)
   #endif
-
-    JUCE_PERFORM_SSE_OP_DEST (dest[i] += amount,
-                              _mm_add_ps (d, amountToAdd),
-                              JUCE_LOAD_DEST)
 }

 void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
 {
-   #if JUCE_USE_SSE_INTRINSICS
-    const __m128 mult = _mm_load1_ps (&multiplier);
+   #if JUCE_USE_ARM_NEON
+    JUCE_PERFORM_NEON_OP_SRC_DEST (dest[i] += src[i] * multiplier,
+                                   vmlaq_n_f32 (d, s, multiplier),
+                                   JUCE_LOAD_SRC_DEST)
+   #else
+    #if JUCE_USE_SSE_INTRINSICS
+     const __m128 mult = _mm_load1_ps (&multiplier);
+    #endif
+
+     JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i] * multiplier,
+                                   _mm_add_ps (d, _mm_mul_ps (mult, s)),
+                                   JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
   #endif

-    JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i] * multiplier,
-                                  _mm_add_ps (d, _mm_mul_ps (mult, s)),
-                                  JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
 }

 void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
 {
   #if JUCE_USE_VDSP_FRAMEWORK
    vDSP_vmul (src, 1, dest, 1, dest, 1, num);
+   #elif JUCE_USE_ARM_NEON
+    JUCE_PERFORM_NEON_OP_SRC_DEST (dest[i] *= src[i], vmulq_f32 (d, s), JUCE_LOAD_SRC_DEST)
   #else
-    JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] *= src[i],
-                                  _mm_mul_ps (d, s),
-                                  JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
+    JUCE_PERFORM_SSE_OP_SRC_DEST  (dest[i] *= src[i], _mm_mul_ps (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
   #endif
 }

@ -240,14 +331,13 @@ void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplie
 {
   #if JUCE_USE_VDSP_FRAMEWORK
    vDSP_vsmul (dest, 1, &multiplier, dest, 1, num);
+   #elif JUCE_USE_ARM_NEON
+    JUCE_PERFORM_NEON_OP_DEST (dest[i] *= multiplier, vmulq_n_f32 (d, multiplier), JUCE_LOAD_DEST)
   #else
    #if JUCE_USE_SSE_INTRINSICS
     const __m128 mult = _mm_load1_ps (&multiplier);
    #endif
-
-    JUCE_PERFORM_SSE_OP_DEST (dest[i] *= multiplier,
-                              _mm_mul_ps (d, mult),
-                              JUCE_LOAD_DEST)
+    JUCE_PERFORM_SSE_OP_DEST (dest[i] *= multiplier, _mm_mul_ps (d, mult), JUCE_LOAD_DEST)
   #endif
 }

@ -262,13 +352,19 @@ void FloatVectorOperations::negate (float* dest, const float* src, int num) noex

 void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
 {
-   #if JUCE_USE_SSE_INTRINSICS
-    const __m128 mult = _mm_load1_ps (&multiplier);
-   #endif
+   #if JUCE_USE_ARM_NEON
+    JUCE_PERFORM_NEON_OP_SRC_DEST (dest[i] = src[i] * multiplier,
+                                   vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
+                                   JUCE_LOAD_NONE)
+   #else
+    #if JUCE_USE_SSE_INTRINSICS
+     const __m128 mult = _mm_load1_ps (&multiplier);
+    #endif

-    JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
-                                  _mm_mul_ps (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
-                                  JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST)
+     JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
+                                   _mm_mul_ps (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
+                                   JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST)
+   #endif
 }

 void JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num, float& minResult, float& maxResult) noexcept
@ -315,6 +411,51 @@ void JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int n
            localMax = jmax (localMax, s);
        }

+        minResult = localMin;
+        maxResult = localMax;
+        return;
+    }
+   #elif JUCE_USE_ARM_NEON
+    const int numLongOps = num / 4;
+
+    if (numLongOps > 1)
+    {
+        float32x4_t mn, mx;
+
+        #define JUCE_MINMAX_NEON_LOOP(loadOp) \
+            mn = loadOp (src); \
+            mx = mn; \
+            src += 4; \
+            for (int i = 1; i < numLongOps; ++i) \
+            { \
+                const float32x4_t s = loadOp (src); \
+                mn = vminq_f32 (mn, s); \
+                mx = vmaxq_f32 (mx, s); \
+                src += 4; \
+            }
+
+        JUCE_MINMAX_NEON_LOOP (vld1q_f32);
+
+        float localMin, localMax;
+
+        {
+            float mns[4], mxs[4];
+            vst1q_f32 (mns, mn);
+            vst1q_f32 (mxs, mx);
+
+            localMin = jmin (mns[0], mns[1], mns[2], mns[3]);
+            localMax = jmax (mxs[0], mxs[1], mxs[2], mxs[3]);
+        }
+
+        num &= 3;
+
+        for (int i = 0; i < num; ++i)
+        {
+            const float s = src[i];
+            localMin = jmin (localMin, s);
+            localMax = jmax (localMax, s);
+        }
+
        minResult = localMin;
        maxResult = localMax;
        return;
@ -326,7 +467,7 @@ void JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int n

 float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
 {
-   #if JUCE_USE_SSE_INTRINSICS
+   #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
    return FloatVectorHelpers::findMinimumOrMaximum (src, num, true);
   #else
    return juce::findMinimum (src, num);
@ -335,7 +476,7 @@ float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int nu

 float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
 {
-   #if JUCE_USE_SSE_INTRINSICS
+   #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
    return FloatVectorHelpers::findMinimumOrMaximum (src, num, false);
   #else
    return juce::findMaximum (src, num);
@ -350,3 +491,126 @@ void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnab
   #endif
    (void) shouldEnable;
 }
+
+//==============================================================================
+//==============================================================================
+#if JUCE_UNIT_TESTS
+
+class FloatVectorOperationsTests  : public UnitTest
+{
+public:
+    FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
+
+    void runTest()
+    {
+        beginTest ("FloatVectorOperations");
+
+        for (int i = 100; --i >= 0;)
+        {
+            const int num = getRandom().nextInt (500) + 1;
+
+            HeapBlock<float> buffer1 (num + 16), buffer2 (num + 16);
+            HeapBlock<int> buffer3 (num + 16);
+
+           #if JUCE_ARM
+            float* const data1 = buffer1;
+            float* const data2 = buffer2;
+            int* const int1 = buffer3;
+           #else
+            float* const data1 = addBytesToPointer (buffer1.getData(), getRandom().nextInt (16));
+            float* const data2 = addBytesToPointer (buffer2.getData(), getRandom().nextInt (16));
+            int* const int1 = addBytesToPointer (buffer3.getData(), getRandom().nextInt (16));
+           #endif
+
+            fillRandomly (data1, num);
+            fillRandomly (data2, num);
+
+            float mn1, mx1, mn2, mx2;
+            FloatVectorOperations::findMinAndMax (data1, num, mn1, mx1);
+            juce::findMinAndMax (data1, num, mn2, mx2);
+            expect (mn1 == mn2);
+            expect (mx1 == mx2);
+
+            expect (FloatVectorOperations::findMinimum (data1, num) == juce::findMinimum (data1, num));
+            expect (FloatVectorOperations::findMaximum (data1, num) == juce::findMaximum (data1, num));
+
+            expect (FloatVectorOperations::findMinimum (data2, num) == juce::findMinimum (data2, num));
+            expect (FloatVectorOperations::findMaximum (data2, num) == juce::findMaximum (data2, num));
+
+            FloatVectorOperations::clear (data1, num);
+            expect (areAllValuesEqual (data1, num, 0));
+
+            FloatVectorOperations::fill (data1, 2.0f, num);
+            expect (areAllValuesEqual (data1, num, 2.0f));
+
+            FloatVectorOperations::add (data1, 2.0f, num);
+            expect (areAllValuesEqual (data1, num, 4.0f));
+
+            FloatVectorOperations::copy (data2, data1, num);
+            expect (areAllValuesEqual (data2, num, 4.0f));
+
+            FloatVectorOperations::add (data2, data1, num);
+            expect (areAllValuesEqual (data2, num, 8.0f));
+
+            FloatVectorOperations::copyWithMultiply (data2, data1, 4.0f, num);
+            expect (areAllValuesEqual (data2, num, 16.0f));
+
+            FloatVectorOperations::addWithMultiply (data2, data1, 4.0f, num);
+            expect (areAllValuesEqual (data2, num, 32.0f));
+
+            FloatVectorOperations::multiply (data1, 2.0f, num);
+            expect (areAllValuesEqual (data1, num, 8.0f));
+
+            FloatVectorOperations::multiply (data1, data2, num);
+            expect (areAllValuesEqual (data1, num, 256.0f));
+
+            FloatVectorOperations::negate (data2, data1, num);
+            expect (areAllValuesEqual (data2, num, -256.0f));
+
+            fillRandomly (int1, num);
+            FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
+            convertFixed (data2, int1, 2.0f, num);
+            expect (buffersMatch (data1, data2, num));
+        }
+    }
+
+    void fillRandomly (float* d, int num)
+    {
+        while (--num >= 0)
+            *d++ = getRandom().nextFloat() * 1000.0f;
+    }
+
+    void fillRandomly (int* d, int num)
+    {
+        while (--num >= 0)
+            *d++ = getRandom().nextInt();
+    }
+
+    static void convertFixed (float* d, const int* s, float multiplier, int num)
+    {
+        while (--num >= 0)
+            *d++ = *s++ * multiplier;
+    }
+
+    static bool areAllValuesEqual (const float* d, int num, float target)
+    {
+        while (--num >= 0)
+            if (*d++ != target)
+                return false;
+
+        return true;
+    }
+
+    static bool buffersMatch (const float* d1, const float* d2, int num)
+    {
+        while (--num >= 0)
+            if (std::abs (*d1++ - *d2++) > std::numeric_limits<float>::epsilon())
+                return false;
+
+        return true;
+    }
+};
+
+static FloatVectorOperationsTests vectorOpTests;
+
+#endif
--- a/modules/juce_audio_basics/juce_audio_basics.cpp
+++ b/modules/juce_audio_basics/juce_audio_basics.cpp
@ -58,6 +58,11 @@
 #undef JUCE_USE_VDSP_FRAMEWORK
 #endif

+#if __ARM_NEON__ && ! (JUCE_USE_VDSP_FRAMEWORK || defined (JUCE_USE_ARM_NEON))
+ #define JUCE_USE_ARM_NEON 1
+ #include <arm_neon.h>
+#endif
+
 namespace juce
 {

--- a/modules/juce_audio_devices/native/juce_android_OpenSL.cpp
+++ b/modules/juce_audio_devices/native/juce_android_OpenSL.cpp
@ -46,7 +46,7 @@ public:
    {
        // OpenSL has piss-poor support for determining latency, so the only way I can find to
        // get a number for this is by asking the AudioTrack/AudioRecord classes..
-        AndroidAudioIODevice javaDevice (String());
+        AndroidAudioIODevice javaDevice (String::empty);

        // this is a total guess about how to calculate the latency, but seems to vaguely agree
        // with the devices I've tested.. YMMV