mirror of
https://github.com/juce-framework/JUCE.git
synced 2026-01-10 23:44:24 +00:00
Added support for NEON vector instructions, and some unit-tests for FloatVectorOperations.
This commit is contained in:
parent
53cbc74986
commit
e53235741f
3 changed files with 350 additions and 81 deletions
|
|
@ -22,10 +22,13 @@
|
||||||
==============================================================================
|
==============================================================================
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if JUCE_USE_SSE_INTRINSICS
|
|
||||||
|
|
||||||
namespace FloatVectorHelpers
|
namespace FloatVectorHelpers
|
||||||
{
|
{
|
||||||
|
|
||||||
|
#define JUCE_INCREMENT_SRC_DEST dest += 4; src += 4;
|
||||||
|
#define JUCE_INCREMENT_DEST dest += 4;
|
||||||
|
|
||||||
|
#if JUCE_USE_SSE_INTRINSICS
|
||||||
static bool sse2Present = false;
|
static bool sse2Present = false;
|
||||||
|
|
||||||
static bool isSSE2Available() noexcept
|
static bool isSSE2Available() noexcept
|
||||||
|
|
@ -44,7 +47,6 @@ namespace FloatVectorHelpers
|
||||||
|
|
||||||
static inline float findMinimumOrMaximum (const float* src, int num, const bool isMinimum) noexcept
|
static inline float findMinimumOrMaximum (const float* src, int num, const bool isMinimum) noexcept
|
||||||
{
|
{
|
||||||
#if JUCE_USE_SSE_INTRINSICS
|
|
||||||
const int numLongOps = num / 4;
|
const int numLongOps = num / 4;
|
||||||
|
|
||||||
if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
|
if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
|
||||||
|
|
@ -90,66 +92,144 @@ namespace FloatVectorHelpers
|
||||||
|
|
||||||
return localVal;
|
return localVal;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
return isMinimum ? juce::findMinimum (src, num)
|
return isMinimum ? juce::findMinimum (src, num)
|
||||||
: juce::findMaximum (src, num);
|
: juce::findMaximum (src, num);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#define JUCE_BEGIN_SSE_OP \
|
#define JUCE_BEGIN_SSE_OP \
|
||||||
if (FloatVectorHelpers::isSSE2Available()) \
|
if (FloatVectorHelpers::isSSE2Available()) \
|
||||||
{ \
|
{ \
|
||||||
|
const int numLongOps = num / 4;
|
||||||
|
|
||||||
|
#define JUCE_FINISH_SSE_OP(normalOp) \
|
||||||
|
num &= 3; \
|
||||||
|
if (num == 0) return; \
|
||||||
|
} \
|
||||||
|
for (int i = 0; i < num; ++i) normalOp;
|
||||||
|
|
||||||
|
#define JUCE_SSE_LOOP(sseOp, srcLoad, dstLoad, dstStore, locals, increment) \
|
||||||
|
for (int i = 0; i < numLongOps; ++i) \
|
||||||
|
{ \
|
||||||
|
locals (srcLoad, dstLoad); \
|
||||||
|
dstStore (dest, sseOp); \
|
||||||
|
increment; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define JUCE_LOAD_NONE(srcLoad, dstLoad)
|
||||||
|
#define JUCE_LOAD_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest);
|
||||||
|
#define JUCE_LOAD_SRC(srcLoad, dstLoad) const __m128 s = srcLoad (src);
|
||||||
|
#define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest); const __m128 s = srcLoad (src);
|
||||||
|
|
||||||
|
#define JUCE_PERFORM_SSE_OP_DEST(normalOp, sseOp, locals) \
|
||||||
|
JUCE_BEGIN_SSE_OP \
|
||||||
|
if (FloatVectorHelpers::isAligned (dest)) JUCE_SSE_LOOP (sseOp, dummy, _mm_load_ps, _mm_store_ps, locals, JUCE_INCREMENT_DEST) \
|
||||||
|
else JUCE_SSE_LOOP (sseOp, dummy, _mm_loadu_ps, _mm_storeu_ps, locals, JUCE_INCREMENT_DEST) \
|
||||||
|
JUCE_FINISH_SSE_OP (normalOp)
|
||||||
|
|
||||||
|
#define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) \
|
||||||
|
JUCE_BEGIN_SSE_OP \
|
||||||
|
if (FloatVectorHelpers::isAligned (dest)) \
|
||||||
|
{ \
|
||||||
|
if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
|
||||||
|
else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
|
||||||
|
}\
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
|
||||||
|
else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
|
||||||
|
} \
|
||||||
|
JUCE_FINISH_SSE_OP (normalOp)
|
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
#elif JUCE_USE_ARM_NEON
|
||||||
|
|
||||||
|
static inline float findMinimumOrMaximum (const float* src, int num, const bool isMinimum) noexcept
|
||||||
|
{
|
||||||
const int numLongOps = num / 4;
|
const int numLongOps = num / 4;
|
||||||
|
|
||||||
#define JUCE_FINISH_SSE_OP(normalOp) \
|
if (numLongOps > 1)
|
||||||
num &= 3; \
|
{
|
||||||
if (num == 0) return; \
|
float32x4_t val;
|
||||||
} \
|
|
||||||
for (int i = 0; i < num; ++i) normalOp;
|
|
||||||
|
|
||||||
#define JUCE_SSE_LOOP(sseOp, srcLoad, dstLoad, dstStore, locals, increment) \
|
#define JUCE_MINIMUMMAXIMUM_NEON_LOOP(loadOp, minMaxOp) \
|
||||||
for (int i = 0; i < numLongOps; ++i) \
|
val = loadOp (src); \
|
||||||
{ \
|
src += 4; \
|
||||||
locals (srcLoad, dstLoad); \
|
for (int i = 1; i < numLongOps; ++i) \
|
||||||
dstStore (dest, sseOp); \
|
{ \
|
||||||
increment; \
|
const float32x4_t s = loadOp (src); \
|
||||||
|
val = minMaxOp (val, s); \
|
||||||
|
src += 4; \
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isMinimum)
|
||||||
|
JUCE_MINIMUMMAXIMUM_NEON_LOOP (vld1q_f32, vminq_f32)
|
||||||
|
else
|
||||||
|
JUCE_MINIMUMMAXIMUM_NEON_LOOP (vld1q_f32, vmaxq_f32)
|
||||||
|
|
||||||
|
float localVal;
|
||||||
|
|
||||||
|
{
|
||||||
|
float vals[4];
|
||||||
|
vst1q_f32 (vals, val);
|
||||||
|
|
||||||
|
localVal = isMinimum ? jmin (vals[0], vals[1], vals[2], vals[3])
|
||||||
|
: jmax (vals[0], vals[1], vals[2], vals[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
num &= 3;
|
||||||
|
|
||||||
|
for (int i = 0; i < num; ++i)
|
||||||
|
localVal = isMinimum ? jmin (localVal, src[i])
|
||||||
|
: jmax (localVal, src[i]);
|
||||||
|
|
||||||
|
return localVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
return isMinimum ? juce::findMinimum (src, num)
|
||||||
|
: juce::findMaximum (src, num);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define JUCE_INCREMENT_SRC_DEST dest += 4; src += 4;
|
#define JUCE_BEGIN_NEON_OP \
|
||||||
#define JUCE_INCREMENT_DEST dest += 4;
|
const int numLongOps = num / 4;
|
||||||
|
|
||||||
#define JUCE_LOAD_NONE(srcLoad, dstLoad)
|
#define JUCE_FINISH_NEON_OP(normalOp) \
|
||||||
#define JUCE_LOAD_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest);
|
num &= 3; \
|
||||||
#define JUCE_LOAD_SRC(srcLoad, dstLoad) const __m128 s = srcLoad (src);
|
if (num == 0) return; \
|
||||||
#define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest); const __m128 s = srcLoad (src);
|
for (int i = 0; i < num; ++i) normalOp;
|
||||||
|
|
||||||
#define JUCE_PERFORM_SSE_OP_DEST(normalOp, sseOp, locals) \
|
#define JUCE_NEON_LOOP(neonOp, srcLoad, dstLoad, dstStore, locals, increment) \
|
||||||
JUCE_BEGIN_SSE_OP \
|
for (int i = 0; i < numLongOps; ++i) \
|
||||||
if (FloatVectorHelpers::isAligned (dest)) JUCE_SSE_LOOP (sseOp, dummy, _mm_load_ps, _mm_store_ps, locals, JUCE_INCREMENT_DEST) \
|
{ \
|
||||||
else JUCE_SSE_LOOP (sseOp, dummy, _mm_loadu_ps, _mm_storeu_ps, locals, JUCE_INCREMENT_DEST) \
|
locals (srcLoad, dstLoad); \
|
||||||
JUCE_FINISH_SSE_OP (normalOp)
|
dstStore (dest, neonOp); \
|
||||||
|
increment; \
|
||||||
|
}
|
||||||
|
|
||||||
#define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) \
|
#define JUCE_LOAD_NONE(srcLoad, dstLoad)
|
||||||
JUCE_BEGIN_SSE_OP \
|
#define JUCE_LOAD_DEST(srcLoad, dstLoad) const float32x4_t d = dstLoad (dest);
|
||||||
if (FloatVectorHelpers::isAligned (dest)) \
|
#define JUCE_LOAD_SRC(srcLoad, dstLoad) const float32x4_t s = srcLoad (src);
|
||||||
{ \
|
#define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const float32x4_t d = dstLoad (dest); const float32x4_t s = srcLoad (src);
|
||||||
if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
|
|
||||||
else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
|
|
||||||
}\
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
|
|
||||||
else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
|
|
||||||
} \
|
|
||||||
JUCE_FINISH_SSE_OP (normalOp)
|
|
||||||
|
|
||||||
|
#define JUCE_PERFORM_NEON_OP_DEST(normalOp, neonOp, locals) \
|
||||||
|
JUCE_BEGIN_NEON_OP \
|
||||||
|
JUCE_NEON_LOOP (neonOp, dummy, vld1q_f32, vst1q_f32, locals, JUCE_INCREMENT_DEST) \
|
||||||
|
JUCE_FINISH_NEON_OP (normalOp)
|
||||||
|
|
||||||
#else
|
#define JUCE_PERFORM_NEON_OP_SRC_DEST(normalOp, neonOp, locals) \
|
||||||
#define JUCE_PERFORM_SSE_OP_DEST(normalOp, unused1, unused2) for (int i = 0; i < num; ++i) normalOp;
|
JUCE_BEGIN_NEON_OP \
|
||||||
#define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) for (int i = 0; i < num; ++i) normalOp;
|
JUCE_NEON_LOOP (neonOp, vld1q_f32, vld1q_f32, vst1q_f32, locals, JUCE_INCREMENT_SRC_DEST) \
|
||||||
#endif
|
JUCE_FINISH_NEON_OP (normalOp)
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
#else
|
||||||
|
#define JUCE_PERFORM_SSE_OP_DEST(normalOp, unused1, unused2) for (int i = 0; i < num; ++i) normalOp;
|
||||||
|
#define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) for (int i = 0; i < num; ++i) normalOp;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
|
void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
|
||||||
{
|
{
|
||||||
#if JUCE_USE_VDSP_FRAMEWORK
|
#if JUCE_USE_VDSP_FRAMEWORK
|
||||||
|
|
@ -163,11 +243,13 @@ void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill,
|
||||||
{
|
{
|
||||||
#if JUCE_USE_VDSP_FRAMEWORK
|
#if JUCE_USE_VDSP_FRAMEWORK
|
||||||
vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
|
vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
|
||||||
|
#elif JUCE_USE_ARM_NEON
|
||||||
|
const float32x4_t val = vld1q_dup_f32 (&valueToFill);
|
||||||
|
JUCE_PERFORM_NEON_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE)
|
||||||
#else
|
#else
|
||||||
#if JUCE_USE_SSE_INTRINSICS
|
#if JUCE_USE_SSE_INTRINSICS
|
||||||
const __m128 val = _mm_load1_ps (&valueToFill);
|
const __m128 val = _mm_load1_ps (&valueToFill);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
JUCE_PERFORM_SSE_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE)
|
JUCE_PERFORM_SSE_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -181,13 +263,13 @@ void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const f
|
||||||
{
|
{
|
||||||
#if JUCE_USE_VDSP_FRAMEWORK
|
#if JUCE_USE_VDSP_FRAMEWORK
|
||||||
vDSP_vsmul (src, 1, &multiplier, dest, 1, num);
|
vDSP_vsmul (src, 1, &multiplier, dest, 1, num);
|
||||||
|
#elif JUCE_USE_ARM_NEON
|
||||||
|
JUCE_PERFORM_NEON_OP_SRC_DEST (dest[i] += src[i], vmulq_n_f32(s, multiplier), JUCE_LOAD_SRC)
|
||||||
#else
|
#else
|
||||||
#if JUCE_USE_SSE_INTRINSICS
|
#if JUCE_USE_SSE_INTRINSICS
|
||||||
const __m128 mult = _mm_load1_ps (&multiplier);
|
const __m128 mult = _mm_load1_ps (&multiplier);
|
||||||
#endif
|
#endif
|
||||||
|
JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier, _mm_mul_ps (mult, s),
|
||||||
JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
|
|
||||||
_mm_mul_ps (mult, s),
|
|
||||||
JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST)
|
JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -196,43 +278,52 @@ void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, in
|
||||||
{
|
{
|
||||||
#if JUCE_USE_VDSP_FRAMEWORK
|
#if JUCE_USE_VDSP_FRAMEWORK
|
||||||
vDSP_vadd (src, 1, dest, 1, dest, 1, num);
|
vDSP_vadd (src, 1, dest, 1, dest, 1, num);
|
||||||
|
#elif JUCE_USE_ARM_NEON
|
||||||
|
JUCE_PERFORM_NEON_OP_SRC_DEST (dest[i] += src[i], vaddq_f32 (d, s), JUCE_LOAD_SRC_DEST)
|
||||||
#else
|
#else
|
||||||
JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i],
|
JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i], _mm_add_ps (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
|
||||||
_mm_add_ps (d, s),
|
|
||||||
JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
|
void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
|
||||||
{
|
{
|
||||||
#if JUCE_USE_SSE_INTRINSICS
|
#if JUCE_USE_ARM_NEON
|
||||||
const __m128 amountToAdd = _mm_load1_ps (&amount);
|
const float32x4_t amountToAdd = vld1q_dup_f32(&amount);
|
||||||
|
JUCE_PERFORM_NEON_OP_DEST (dest[i] += amount, vaddq_f32 (d, amountToAdd), JUCE_LOAD_DEST)
|
||||||
|
#else
|
||||||
|
#if JUCE_USE_SSE_INTRINSICS
|
||||||
|
const __m128 amountToAdd = _mm_load1_ps (&amount);
|
||||||
|
#endif
|
||||||
|
JUCE_PERFORM_SSE_OP_DEST (dest[i] += amount, _mm_add_ps (d, amountToAdd), JUCE_LOAD_DEST)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
JUCE_PERFORM_SSE_OP_DEST (dest[i] += amount,
|
|
||||||
_mm_add_ps (d, amountToAdd),
|
|
||||||
JUCE_LOAD_DEST)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
|
void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
|
||||||
{
|
{
|
||||||
#if JUCE_USE_SSE_INTRINSICS
|
#if JUCE_USE_ARM_NEON
|
||||||
const __m128 mult = _mm_load1_ps (&multiplier);
|
JUCE_PERFORM_NEON_OP_SRC_DEST (dest[i] += src[i] * multiplier,
|
||||||
|
vmlaq_n_f32 (d, s, multiplier),
|
||||||
|
JUCE_LOAD_SRC_DEST)
|
||||||
|
#else
|
||||||
|
#if JUCE_USE_SSE_INTRINSICS
|
||||||
|
const __m128 mult = _mm_load1_ps (&multiplier);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i] * multiplier,
|
||||||
|
_mm_add_ps (d, _mm_mul_ps (mult, s)),
|
||||||
|
JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i] * multiplier,
|
|
||||||
_mm_add_ps (d, _mm_mul_ps (mult, s)),
|
|
||||||
JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
|
void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
|
||||||
{
|
{
|
||||||
#if JUCE_USE_VDSP_FRAMEWORK
|
#if JUCE_USE_VDSP_FRAMEWORK
|
||||||
vDSP_vmul (src, 1, dest, 1, dest, 1, num);
|
vDSP_vmul (src, 1, dest, 1, dest, 1, num);
|
||||||
|
#elif JUCE_USE_ARM_NEON
|
||||||
|
JUCE_PERFORM_NEON_OP_SRC_DEST (dest[i] *= src[i], vmulq_f32 (d, s), JUCE_LOAD_SRC_DEST)
|
||||||
#else
|
#else
|
||||||
JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] *= src[i],
|
JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] *= src[i], _mm_mul_ps (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
|
||||||
_mm_mul_ps (d, s),
|
|
||||||
JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -240,14 +331,13 @@ void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplie
|
||||||
{
|
{
|
||||||
#if JUCE_USE_VDSP_FRAMEWORK
|
#if JUCE_USE_VDSP_FRAMEWORK
|
||||||
vDSP_vsmul (dest, 1, &multiplier, dest, 1, num);
|
vDSP_vsmul (dest, 1, &multiplier, dest, 1, num);
|
||||||
|
#elif JUCE_USE_ARM_NEON
|
||||||
|
JUCE_PERFORM_NEON_OP_DEST (dest[i] *= multiplier, vmulq_n_f32 (d, multiplier), JUCE_LOAD_DEST)
|
||||||
#else
|
#else
|
||||||
#if JUCE_USE_SSE_INTRINSICS
|
#if JUCE_USE_SSE_INTRINSICS
|
||||||
const __m128 mult = _mm_load1_ps (&multiplier);
|
const __m128 mult = _mm_load1_ps (&multiplier);
|
||||||
#endif
|
#endif
|
||||||
|
JUCE_PERFORM_SSE_OP_DEST (dest[i] *= multiplier, _mm_mul_ps (d, mult), JUCE_LOAD_DEST)
|
||||||
JUCE_PERFORM_SSE_OP_DEST (dest[i] *= multiplier,
|
|
||||||
_mm_mul_ps (d, mult),
|
|
||||||
JUCE_LOAD_DEST)
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -262,13 +352,19 @@ void FloatVectorOperations::negate (float* dest, const float* src, int num) noex
|
||||||
|
|
||||||
void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
|
void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
|
||||||
{
|
{
|
||||||
#if JUCE_USE_SSE_INTRINSICS
|
#if JUCE_USE_ARM_NEON
|
||||||
const __m128 mult = _mm_load1_ps (&multiplier);
|
JUCE_PERFORM_NEON_OP_SRC_DEST (dest[i] = src[i] * multiplier,
|
||||||
#endif
|
vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
|
||||||
|
JUCE_LOAD_NONE)
|
||||||
|
#else
|
||||||
|
#if JUCE_USE_SSE_INTRINSICS
|
||||||
|
const __m128 mult = _mm_load1_ps (&multiplier);
|
||||||
|
#endif
|
||||||
|
|
||||||
JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
|
JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
|
||||||
_mm_mul_ps (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
|
_mm_mul_ps (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
|
||||||
JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST)
|
JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST)
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num, float& minResult, float& maxResult) noexcept
|
void JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num, float& minResult, float& maxResult) noexcept
|
||||||
|
|
@ -315,6 +411,51 @@ void JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int n
|
||||||
localMax = jmax (localMax, s);
|
localMax = jmax (localMax, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
minResult = localMin;
|
||||||
|
maxResult = localMax;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#elif JUCE_USE_ARM_NEON
|
||||||
|
const int numLongOps = num / 4;
|
||||||
|
|
||||||
|
if (numLongOps > 1)
|
||||||
|
{
|
||||||
|
float32x4_t mn, mx;
|
||||||
|
|
||||||
|
#define JUCE_MINMAX_NEON_LOOP(loadOp) \
|
||||||
|
mn = loadOp (src); \
|
||||||
|
mx = mn; \
|
||||||
|
src += 4; \
|
||||||
|
for (int i = 1; i < numLongOps; ++i) \
|
||||||
|
{ \
|
||||||
|
const float32x4_t s = loadOp (src); \
|
||||||
|
mn = vminq_f32 (mn, s); \
|
||||||
|
mx = vmaxq_f32 (mx, s); \
|
||||||
|
src += 4; \
|
||||||
|
}
|
||||||
|
|
||||||
|
JUCE_MINMAX_NEON_LOOP (vld1q_f32);
|
||||||
|
|
||||||
|
float localMin, localMax;
|
||||||
|
|
||||||
|
{
|
||||||
|
float mns[4], mxs[4];
|
||||||
|
vst1q_f32 (mns, mn);
|
||||||
|
vst1q_f32 (mxs, mx);
|
||||||
|
|
||||||
|
localMin = jmin (mns[0], mns[1], mns[2], mns[3]);
|
||||||
|
localMax = jmax (mxs[0], mxs[1], mxs[2], mxs[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
num &= 3;
|
||||||
|
|
||||||
|
for (int i = 0; i < num; ++i)
|
||||||
|
{
|
||||||
|
const float s = src[i];
|
||||||
|
localMin = jmin (localMin, s);
|
||||||
|
localMax = jmax (localMax, s);
|
||||||
|
}
|
||||||
|
|
||||||
minResult = localMin;
|
minResult = localMin;
|
||||||
maxResult = localMax;
|
maxResult = localMax;
|
||||||
return;
|
return;
|
||||||
|
|
@ -326,7 +467,7 @@ void JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int n
|
||||||
|
|
||||||
float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
|
float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
|
||||||
{
|
{
|
||||||
#if JUCE_USE_SSE_INTRINSICS
|
#if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
|
||||||
return FloatVectorHelpers::findMinimumOrMaximum (src, num, true);
|
return FloatVectorHelpers::findMinimumOrMaximum (src, num, true);
|
||||||
#else
|
#else
|
||||||
return juce::findMinimum (src, num);
|
return juce::findMinimum (src, num);
|
||||||
|
|
@ -335,7 +476,7 @@ float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int nu
|
||||||
|
|
||||||
float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
|
float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
|
||||||
{
|
{
|
||||||
#if JUCE_USE_SSE_INTRINSICS
|
#if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
|
||||||
return FloatVectorHelpers::findMinimumOrMaximum (src, num, false);
|
return FloatVectorHelpers::findMinimumOrMaximum (src, num, false);
|
||||||
#else
|
#else
|
||||||
return juce::findMaximum (src, num);
|
return juce::findMaximum (src, num);
|
||||||
|
|
@ -350,3 +491,126 @@ void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnab
|
||||||
#endif
|
#endif
|
||||||
(void) shouldEnable;
|
(void) shouldEnable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//==============================================================================
|
||||||
|
#if JUCE_UNIT_TESTS
|
||||||
|
|
||||||
|
class FloatVectorOperationsTests : public UnitTest
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
|
||||||
|
|
||||||
|
void runTest()
|
||||||
|
{
|
||||||
|
beginTest ("FloatVectorOperations");
|
||||||
|
|
||||||
|
for (int i = 100; --i >= 0;)
|
||||||
|
{
|
||||||
|
const int num = getRandom().nextInt (500) + 1;
|
||||||
|
|
||||||
|
HeapBlock<float> buffer1 (num + 16), buffer2 (num + 16);
|
||||||
|
HeapBlock<int> buffer3 (num + 16);
|
||||||
|
|
||||||
|
#if JUCE_ARM
|
||||||
|
float* const data1 = buffer1;
|
||||||
|
float* const data2 = buffer2;
|
||||||
|
int* const int1 = buffer3;
|
||||||
|
#else
|
||||||
|
float* const data1 = addBytesToPointer (buffer1.getData(), getRandom().nextInt (16));
|
||||||
|
float* const data2 = addBytesToPointer (buffer2.getData(), getRandom().nextInt (16));
|
||||||
|
int* const int1 = addBytesToPointer (buffer3.getData(), getRandom().nextInt (16));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fillRandomly (data1, num);
|
||||||
|
fillRandomly (data2, num);
|
||||||
|
|
||||||
|
float mn1, mx1, mn2, mx2;
|
||||||
|
FloatVectorOperations::findMinAndMax (data1, num, mn1, mx1);
|
||||||
|
juce::findMinAndMax (data1, num, mn2, mx2);
|
||||||
|
expect (mn1 == mn2);
|
||||||
|
expect (mx1 == mx2);
|
||||||
|
|
||||||
|
expect (FloatVectorOperations::findMinimum (data1, num) == juce::findMinimum (data1, num));
|
||||||
|
expect (FloatVectorOperations::findMaximum (data1, num) == juce::findMaximum (data1, num));
|
||||||
|
|
||||||
|
expect (FloatVectorOperations::findMinimum (data2, num) == juce::findMinimum (data2, num));
|
||||||
|
expect (FloatVectorOperations::findMaximum (data2, num) == juce::findMaximum (data2, num));
|
||||||
|
|
||||||
|
FloatVectorOperations::clear (data1, num);
|
||||||
|
expect (areAllValuesEqual (data1, num, 0));
|
||||||
|
|
||||||
|
FloatVectorOperations::fill (data1, 2.0f, num);
|
||||||
|
expect (areAllValuesEqual (data1, num, 2.0f));
|
||||||
|
|
||||||
|
FloatVectorOperations::add (data1, 2.0f, num);
|
||||||
|
expect (areAllValuesEqual (data1, num, 4.0f));
|
||||||
|
|
||||||
|
FloatVectorOperations::copy (data2, data1, num);
|
||||||
|
expect (areAllValuesEqual (data2, num, 4.0f));
|
||||||
|
|
||||||
|
FloatVectorOperations::add (data2, data1, num);
|
||||||
|
expect (areAllValuesEqual (data2, num, 8.0f));
|
||||||
|
|
||||||
|
FloatVectorOperations::copyWithMultiply (data2, data1, 4.0f, num);
|
||||||
|
expect (areAllValuesEqual (data2, num, 16.0f));
|
||||||
|
|
||||||
|
FloatVectorOperations::addWithMultiply (data2, data1, 4.0f, num);
|
||||||
|
expect (areAllValuesEqual (data2, num, 32.0f));
|
||||||
|
|
||||||
|
FloatVectorOperations::multiply (data1, 2.0f, num);
|
||||||
|
expect (areAllValuesEqual (data1, num, 8.0f));
|
||||||
|
|
||||||
|
FloatVectorOperations::multiply (data1, data2, num);
|
||||||
|
expect (areAllValuesEqual (data1, num, 256.0f));
|
||||||
|
|
||||||
|
FloatVectorOperations::negate (data2, data1, num);
|
||||||
|
expect (areAllValuesEqual (data2, num, -256.0f));
|
||||||
|
|
||||||
|
fillRandomly (int1, num);
|
||||||
|
FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
|
||||||
|
convertFixed (data2, int1, 2.0f, num);
|
||||||
|
expect (buffersMatch (data1, data2, num));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void fillRandomly (float* d, int num)
|
||||||
|
{
|
||||||
|
while (--num >= 0)
|
||||||
|
*d++ = getRandom().nextFloat() * 1000.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
void fillRandomly (int* d, int num)
|
||||||
|
{
|
||||||
|
while (--num >= 0)
|
||||||
|
*d++ = getRandom().nextInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void convertFixed (float* d, const int* s, float multiplier, int num)
|
||||||
|
{
|
||||||
|
while (--num >= 0)
|
||||||
|
*d++ = *s++ * multiplier;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool areAllValuesEqual (const float* d, int num, float target)
|
||||||
|
{
|
||||||
|
while (--num >= 0)
|
||||||
|
if (*d++ != target)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool buffersMatch (const float* d1, const float* d2, int num)
|
||||||
|
{
|
||||||
|
while (--num >= 0)
|
||||||
|
if (std::abs (*d1++ - *d2++) > std::numeric_limits<float>::epsilon())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static FloatVectorOperationsTests vectorOpTests;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -58,6 +58,11 @@
|
||||||
#undef JUCE_USE_VDSP_FRAMEWORK
|
#undef JUCE_USE_VDSP_FRAMEWORK
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if __ARM_NEON__ && ! (JUCE_USE_VDSP_FRAMEWORK || defined (JUCE_USE_ARM_NEON))
|
||||||
|
#define JUCE_USE_ARM_NEON 1
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace juce
|
namespace juce
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ public:
|
||||||
{
|
{
|
||||||
// OpenSL has piss-poor support for determining latency, so the only way I can find to
|
// OpenSL has piss-poor support for determining latency, so the only way I can find to
|
||||||
// get a number for this is by asking the AudioTrack/AudioRecord classes..
|
// get a number for this is by asking the AudioTrack/AudioRecord classes..
|
||||||
AndroidAudioIODevice javaDevice (String());
|
AndroidAudioIODevice javaDevice (String::empty);
|
||||||
|
|
||||||
// this is a total guess about how to calculate the latency, but seems to vaguely agree
|
// this is a total guess about how to calculate the latency, but seems to vaguely agree
|
||||||
// with the devices I've tested.. YMMV
|
// with the devices I've tested.. YMMV
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue