From 8f02179bbf9a6a3726bc03bd4306042d222c0931 Mon Sep 17 00:00:00 2001 From: Zsolt Garamvolgyi Date: Thu, 30 Nov 2017 13:06:31 +0100 Subject: [PATCH] Various additions to SIMDRegister --- .../juce_dsp/containers/juce_SIMDRegister.h | 25 ++ .../containers/juce_SIMDRegister_test.cpp | 37 ++- .../juce_dsp/native/juce_avx_SIMDNativeOps.h | 9 + .../native/juce_fallback_SIMDNativeOps.h | 13 + .../juce_dsp/native/juce_neon_SIMDNativeOps.h | 287 ++++++++++-------- .../juce_dsp/native/juce_sse_SIMDNativeOps.h | 10 + modules/juce_dsp/processors/juce_FIRFilter.h | 2 +- .../processors/juce_FIRFilter_test.cpp | 2 +- 8 files changed, 250 insertions(+), 135 deletions(-) diff --git a/modules/juce_dsp/containers/juce_SIMDRegister.h b/modules/juce_dsp/containers/juce_SIMDRegister.h index 8850a4f111..fa038887e3 100644 --- a/modules/juce_dsp/containers/juce_SIMDRegister.h +++ b/modules/juce_dsp/containers/juce_SIMDRegister.h @@ -102,6 +102,18 @@ struct SIMDRegister vSIMDType value; + /** Default constructor. */ + inline JUCE_VECTOR_CALLTYPE SIMDRegister() noexcept {} + + /** Constructs an object from the native SIMD type. */ + inline JUCE_VECTOR_CALLTYPE SIMDRegister (vSIMDType a) noexcept : value (a) {} + + /** Constructs an object from a scalar type by broadcasting it to all elements. */ + inline JUCE_VECTOR_CALLTYPE SIMDRegister (Type s) noexcept { *this = s; } + + /** Destrutor. */ + inline JUCE_VECTOR_CALLTYPE ~SIMDRegister() noexcept {} + //============================================================================== /** Returns the number of elements in this vector. */ static constexpr size_t size() noexcept { return SIMDNumElements; } @@ -232,6 +244,19 @@ struct SIMDRegister /** Returns a vector where each element is the bit-xor'd value of the corresponding element in the receiver and the scalar s.*/ inline SIMDRegister JUCE_VECTOR_CALLTYPE operator^ (MaskType s) const noexcept { return { NativeOps::bit_xor (value, toVecType (s)) }; } + //============================================================================== + /** Returns true if all elements-wise comparisons return true. */ + inline bool JUCE_VECTOR_CALLTYPE operator== (SIMDRegister other) const noexcept { return NativeOps::allEqual (value, other.value); } + + /** Returns true if any elements-wise comparisons return false. */ + inline bool JUCE_VECTOR_CALLTYPE operator!= (SIMDRegister other) const noexcept { return ! (*this == other); } + + /** Returns true if all elements are equal to the scalar. */ + inline bool JUCE_VECTOR_CALLTYPE operator== (Type s) const noexcept { return *this == SIMDRegister::expand (s); } + + /** Returns true if any elements are not equal to the scalar. */ + inline bool JUCE_VECTOR_CALLTYPE operator!= (Type s) const noexcept { return ! (*this == s); } + //============================================================================== /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set if the corresponding element of a is equal to the corresponding element of b, or zero otherwise. diff --git a/modules/juce_dsp/containers/juce_SIMDRegister_test.cpp b/modules/juce_dsp/containers/juce_SIMDRegister_test.cpp index c23ca22c82..c84e34ed23 100644 --- a/modules/juce_dsp/containers/juce_SIMDRegister_test.cpp +++ b/modules/juce_dsp/containers/juce_SIMDRegister_test.cpp @@ -370,8 +370,10 @@ public: { type array_a [SIMDRegister::SIMDNumElements]; - union + union ConversionUnion { + inline ConversionUnion() {} + inline ~ConversionUnion() {} SIMDRegister floatVersion; vMaskType intVersion; } a, b; @@ -512,6 +514,39 @@ public: u.expect (vecEqualToArray (le, array_le )); u.expect (vecEqualToArray (gt, array_gt )); u.expect (vecEqualToArray (ge, array_ge )); + + do + { + SIMDRegister_test_internal::fillRandom (array_a, SIMDRegister::SIMDNumElements, random); + SIMDRegister_test_internal::fillRandom (array_b, SIMDRegister::SIMDNumElements, random); + } while (std::equal (array_a, array_a + SIMDRegister::SIMDNumElements, array_b)); + + copy (a, array_a); + copy (b, array_b); + u.expect (a != b); + u.expect (b != a); + u.expect (! (a == b)); + u.expect (! (b == a)); + + SIMDRegister_test_internal::fillRandom (array_a, SIMDRegister::SIMDNumElements, random); + copy (a, array_a); + copy (b, array_a); + + u.expect (a == b); + u.expect (b == a); + u.expect (! (a != b)); + u.expect (! (b != a)); + + auto scalar = a[0]; + a = SIMDRegister::expand (scalar); + + u.expect (a == scalar); + u.expect (! (a != scalar)); + + scalar--; + + u.expect (a != scalar); + u.expect (! (a == scalar)); } } }; diff --git a/modules/juce_dsp/native/juce_avx_SIMDNativeOps.h b/modules/juce_dsp/native/juce_avx_SIMDNativeOps.h index 935c83866b..db5decf191 100644 --- a/modules/juce_dsp/native/juce_avx_SIMDNativeOps.h +++ b/modules/juce_dsp/native/juce_avx_SIMDNativeOps.h @@ -82,6 +82,7 @@ struct SIMDNativeOps static forcedinline __m256 JUCE_VECTOR_CALLTYPE notEqual (__m256 a, __m256 b) noexcept { return _mm256_cmp_ps (a, b, _CMP_NEQ_OQ); } static forcedinline __m256 JUCE_VECTOR_CALLTYPE greaterThan (__m256 a, __m256 b) noexcept { return _mm256_cmp_ps (a, b, _CMP_GT_OQ); } static forcedinline __m256 JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256 a, __m256 b) noexcept { return _mm256_cmp_ps (a, b, _CMP_GE_OQ); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256 a, __m256 b) noexcept { return (_mm256_movemask_ps (equal (a, b)) == 0xff); } static forcedinline __m256 JUCE_VECTOR_CALLTYPE multiplyAdd (__m256 a, __m256 b, __m256 c) noexcept { return _mm256_fmadd_ps (b, c, a); } static forcedinline __m256 JUCE_VECTOR_CALLTYPE dupeven (__m256 a) noexcept { return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (2, 2, 0, 0)); } static forcedinline __m256 JUCE_VECTOR_CALLTYPE dupodd (__m256 a) noexcept { return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (3, 3, 1, 1)); } @@ -141,6 +142,7 @@ struct SIMDNativeOps static forcedinline __m256d JUCE_VECTOR_CALLTYPE notEqual (__m256d a, __m256d b) noexcept { return _mm256_cmp_pd (a, b, _CMP_NEQ_OQ); } static forcedinline __m256d JUCE_VECTOR_CALLTYPE greaterThan (__m256d a, __m256d b) noexcept { return _mm256_cmp_pd (a, b, _CMP_GT_OQ); } static forcedinline __m256d JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256d a, __m256d b) noexcept { return _mm256_cmp_pd (a, b, _CMP_GE_OQ); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256d a, __m256d b) noexcept { return (_mm256_movemask_pd (equal (a, b)) == 0xf); } static forcedinline __m256d JUCE_VECTOR_CALLTYPE multiplyAdd (__m256d a, __m256d b, __m256d c) noexcept { return _mm256_add_pd (a, _mm256_mul_pd (b, c)); } static forcedinline __m256d JUCE_VECTOR_CALLTYPE dupeven (__m256d a) noexcept { return _mm256_shuffle_pd (a, a, 0); } static forcedinline __m256d JUCE_VECTOR_CALLTYPE dupodd (__m256d a) noexcept { return _mm256_shuffle_pd (a, a, (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)); } @@ -261,6 +263,7 @@ struct SIMDNativeOps static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b) noexcept { return _mm256_cmpeq_epi8 (a, b); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b) noexcept { return _mm256_cmpgt_epi8 (ssign (a), ssign (b)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); } @@ -336,6 +339,7 @@ struct SIMDNativeOps static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); } //============================================================================== static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int16_t* a) noexcept @@ -390,6 +394,7 @@ struct SIMDNativeOps static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); } //============================================================================== static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const uint16_t* a) noexcept @@ -443,6 +448,7 @@ struct SIMDNativeOps static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); } //============================================================================== static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int32_t* a) noexcept @@ -495,6 +501,7 @@ struct SIMDNativeOps static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); } //============================================================================== static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const uint32_t* a) noexcept @@ -543,6 +550,7 @@ struct SIMDNativeOps static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); } //============================================================================== static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int64_t* a) noexcept @@ -614,6 +622,7 @@ struct SIMDNativeOps static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); } //============================================================================== static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const uint64_t* a) noexcept diff --git a/modules/juce_dsp/native/juce_fallback_SIMDNativeOps.h b/modules/juce_dsp/native/juce_fallback_SIMDNativeOps.h index d88ee12fcb..3d5325a9a4 100644 --- a/modules/juce_dsp/native/juce_fallback_SIMDNativeOps.h +++ b/modules/juce_dsp/native/juce_fallback_SIMDNativeOps.h @@ -117,6 +117,19 @@ struct SIMDFallbackOps return retval; } + //============================================================================== + static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept + { + auto* aSrc = reinterpret_cast (&a); + auto* bSrc = reinterpret_cast (&b); + + for (size_t i = 0; i < n; ++i) + if (aSrc[i] != bSrc[i]) + return false; + + return true; + } + //============================================================================== static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept { diff --git a/modules/juce_dsp/native/juce_neon_SIMDNativeOps.h b/modules/juce_dsp/native/juce_neon_SIMDNativeOps.h index 0f67564476..ddced57ec6 100644 --- a/modules/juce_dsp/native/juce_neon_SIMDNativeOps.h +++ b/modules/juce_dsp/native/juce_neon_SIMDNativeOps.h @@ -51,86 +51,82 @@ template struct SIMDNativeOps; //============================================================================== -/** Single-precision floating point NEON intrinsics. */ +/** Unsigned 32-bit integer NEON intrinsics. */ template <> -struct SIMDNativeOps +struct SIMDNativeOps { //============================================================================== - typedef float32x4_t vSIMDType; - typedef uint32x4_t vMaskType; - typedef SIMDFallbackOps fb; + typedef uint32x4_t vSIMDType; + typedef SIMDFallbackOps fb; //============================================================================== - DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet); - DECLARE_NEON_SIMD_CONST (int32_t, kEvenHighBit); - DECLARE_NEON_SIMD_CONST (float, kOne); + DECLARE_NEON_SIMD_CONST (uint32_t, kAllBitsSet); //============================================================================== - static forcedinline vSIMDType expand (float s) noexcept { return vdupq_n_f32 (s); } - static forcedinline vSIMDType load (const float* a) noexcept { return vld1q_f32 (a); } - static forcedinline void store (vSIMDType value, float* a) noexcept { vst1q_f32 (a, value); } - static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_f32 (a, b); } - static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_f32 (a, b); } - static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_f32 (a, b); } - static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vandq_u32 ((vMaskType) a, (vMaskType) b); } - static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vorrq_u32 ((vMaskType) a, (vMaskType) b); } - static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) veorq_u32 ((vMaskType) a, (vMaskType) b); } - static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vbicq_u32 ((vMaskType) b, (vMaskType) a); } - static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_f32 ((float*) kAllBitsSet)); } - static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_f32 (a, b); } - static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_f32 (a, b); } - static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_f32 (a, b); } + static forcedinline vSIMDType expand (uint32_t s) noexcept { return vdupq_n_u32 (s); } + static forcedinline vSIMDType load (const uint32_t* a) noexcept { return vld1q_u32 (a); } + static forcedinline void store (vSIMDType value, uint32_t* a) noexcept { vst1q_u32 (a, value); } + static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u32 (a, b); } + static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u32 (a, b); } + static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u32 (a, b); } + static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u32 (a, b); } + static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u32 (a, b); } + static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u32 (a, b); } + static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u32 (b, a); } + static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u32 ((uint32_t*) kAllBitsSet)); } + static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u32 (a, b); } + static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u32 (a, b); } + static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u32 (a, b); } + static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (sum (notEqual (a, b)) == 0); } static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); } - static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_f32 (a, b); } - static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_f32 (a, b); } - static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_f32 (a, b, c); } - static forcedinline vSIMDType dupeven (vSIMDType a) noexcept { return fb::shuffle<(0 << 0) | (0 << 2) | (2 << 4) | (2 << 6)> (a); } - static forcedinline vSIMDType dupodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (1 << 2) | (3 << 4) | (3 << 6)> (a); } - static forcedinline vSIMDType swapevenodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (0 << 2) | (3 << 4) | (2 << 6)> (a); } - static forcedinline float sum (vSIMDType a) noexcept { return fb::sum (a); } - static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return add (fb::shuffle<(2 << 0) | (3 << 2) | (0 << 4) | (1 << 6)> (a), a); } - - //============================================================================== - static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept + static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u32 (a, b); } + static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u32 (a, b); } + static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u32 (a, b, c); } + static forcedinline uint32_t sum (vSIMDType a) noexcept { - vSIMDType rr_ir = mul (a, dupeven (b)); - vSIMDType ii_ri = mul (swapevenodd (a), dupodd (b)); - return add (rr_ir, bit_xor (ii_ri, vld1q_f32 ((float*) kEvenHighBit))); + auto rr = vadd_u32 (vget_high_u32 (a), vget_low_u32 (a)); + return vget_lane_u32 (vpadd_u32 (rr, rr), 0); } }; //============================================================================== -/** Double-precision floating point NEON intrinsics does not exist in NEON - so we need to emulate this. -*/ +/** Signed 32-bit integer NEON intrinsics. */ template <> -struct SIMDNativeOps +struct SIMDNativeOps { //============================================================================== - typedef struct { double values [2]; } vSIMDType; - typedef SIMDFallbackOps fb; + typedef int32x4_t vSIMDType; + typedef SIMDFallbackOps fb; - static forcedinline vSIMDType expand (double s) noexcept { return fb::expand (s); } - static forcedinline vSIMDType load (const double* a) noexcept { return fb::load (a); } - static forcedinline void store (vSIMDType value, double* a) noexcept { fb::store (value, a); } - static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return fb::add (a, b); } - static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return fb::sub (a, b); } - static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); } - static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return fb::bit_and (a, b); } - static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return fb::bit_or (a, b); } - static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return fb::bit_xor (a, b); } - static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return fb::bit_notand (a, b); } - static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return fb::bit_not (a); } - static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); } - static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); } - static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); } - static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); } - static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); } - static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); } - static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); } - static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept { return fb::cmplxmul (a, b); } - static forcedinline double sum (vSIMDType a) noexcept { return fb::sum (a); } - static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return a; } + //============================================================================== + DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet); + + //============================================================================== + static forcedinline vSIMDType expand (int32_t s) noexcept { return vdupq_n_s32 (s); } + static forcedinline vSIMDType load (const int32_t* a) noexcept { return vld1q_s32 (a); } + static forcedinline void store (vSIMDType value, int32_t* a) noexcept { vst1q_s32 (a, value); } + static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s32 (a, b); } + static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s32 (a, b); } + static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s32 (a, b); } + static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s32 (a, b); } + static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s32 (a, b); } + static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s32 (a, b); } + static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s32 (b, a); } + static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s32 ((int32_t*) kAllBitsSet)); } + static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s32 (a, b); } + static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s32 (a, b); } + static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s32 (a, b); } + static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (sum (notEqual (a, b)) == 0); } + static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); } + static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s32 (a, b); } + static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s32 (a, b); } + static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s32 (a, b, c); } + static forcedinline int32_t sum (vSIMDType a) noexcept + { + auto rr = vadd_s32 (vget_high_s32 (a), vget_low_s32 (a)); + rr = vpadd_s32 (rr, rr); + return vget_lane_s32 (rr, 0); + } }; //============================================================================== @@ -163,6 +159,7 @@ struct SIMDNativeOps static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); } static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s8 (a, b); } static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s8 (a, b); } + static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps::sum (notEqual (a, b)) == 0); } static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s8 (a, b, c); } static forcedinline int8_t sum (vSIMDType a) noexcept { return fb::sum (a); } }; @@ -197,6 +194,7 @@ struct SIMDNativeOps static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); } static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u8 (a, b); } static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u8 (a, b); } + static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps::sum (notEqual (a, b)) == 0); } static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u8 (a, b, c); } static forcedinline uint8_t sum (vSIMDType a) noexcept { return fb::sum (a); } }; @@ -231,6 +229,7 @@ struct SIMDNativeOps static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); } static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s16 (a, b); } static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s16 (a, b); } + static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps::sum (notEqual (a, b)) == 0); } static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s16 (a, b, c); } static forcedinline int16_t sum (vSIMDType a) noexcept { return fb::sum (a); } }; @@ -266,79 +265,11 @@ struct SIMDNativeOps static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); } static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u16 (a, b); } static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u16 (a, b); } + static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps::sum (notEqual (a, b)) == 0); } static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u16 (a, b, c); } static forcedinline uint16_t sum (vSIMDType a) noexcept { return fb::sum (a); } }; -//============================================================================== -/** Signed 32-bit integer NEON intrinsics. */ -template <> -struct SIMDNativeOps -{ - //============================================================================== - typedef int32x4_t vSIMDType; - typedef SIMDFallbackOps fb; - - //============================================================================== - DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet); - - //============================================================================== - static forcedinline vSIMDType expand (int32_t s) noexcept { return vdupq_n_s32 (s); } - static forcedinline vSIMDType load (const int32_t* a) noexcept { return vld1q_s32 (a); } - static forcedinline void store (vSIMDType value, int32_t* a) noexcept { vst1q_s32 (a, value); } - static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s32 (a, b); } - static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s32 (a, b); } - static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s32 (a, b); } - static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s32 (a, b); } - static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s32 (a, b); } - static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s32 (a, b); } - static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s32 (b, a); } - static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s32 ((int32_t*) kAllBitsSet)); } - static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s32 (a, b); } - static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s32 (a, b); } - static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s32 (a, b); } - static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); } - static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s32 (a, b); } - static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s32 (a, b); } - static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s32 (a, b, c); } - static forcedinline int32_t sum (vSIMDType a) noexcept { return fb::sum (a); } -}; - - -//============================================================================== -/** Unsigned 32-bit integer NEON intrinsics. */ -template <> -struct SIMDNativeOps -{ - //============================================================================== - typedef uint32x4_t vSIMDType; - typedef SIMDFallbackOps fb; - - //============================================================================== - DECLARE_NEON_SIMD_CONST (uint32_t, kAllBitsSet); - - //============================================================================== - static forcedinline vSIMDType expand (uint32_t s) noexcept { return vdupq_n_u32 (s); } - static forcedinline vSIMDType load (const uint32_t* a) noexcept { return vld1q_u32 (a); } - static forcedinline void store (vSIMDType value, uint32_t* a) noexcept { vst1q_u32 (a, value); } - static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u32 (a, b); } - static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u32 (a, b); } - static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u32 (a, b); } - static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u32 (a, b); } - static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u32 (a, b); } - static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u32 (a, b); } - static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u32 (b, a); } - static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u32 ((uint32_t*) kAllBitsSet)); } - static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u32 (a, b); } - static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u32 (a, b); } - static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u32 (a, b); } - static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); } - static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u32 (a, b); } - static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u32 (a, b); } - static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u32 (a, b, c); } - static forcedinline uint32_t sum (vSIMDType a) noexcept { return fb::sum (a); } -}; - //============================================================================== /** Signed 64-bit integer NEON intrinsics. */ template <> @@ -369,6 +300,7 @@ struct SIMDNativeOps static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); } static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); } static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); } + static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps::sum (notEqual (a, b)) == 0); } static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); } static forcedinline int64_t sum (vSIMDType a) noexcept { return fb::sum (a); } }; @@ -404,10 +336,101 @@ struct SIMDNativeOps static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); } static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); } static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); } + static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps::sum (notEqual (a, b)) == 0); } static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); } static forcedinline uint64_t sum (vSIMDType a) noexcept { return fb::sum (a); } }; + //============================================================================== +/** Single-precision floating point NEON intrinsics. */ +template <> +struct SIMDNativeOps +{ + //============================================================================== + typedef float32x4_t vSIMDType; + typedef uint32x4_t vMaskType; + typedef SIMDFallbackOps fb; + + //============================================================================== + DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet); + DECLARE_NEON_SIMD_CONST (int32_t, kEvenHighBit); + DECLARE_NEON_SIMD_CONST (float, kOne); + + //============================================================================== + static forcedinline vSIMDType expand (float s) noexcept { return vdupq_n_f32 (s); } + static forcedinline vSIMDType load (const float* a) noexcept { return vld1q_f32 (a); } + static forcedinline void store (vSIMDType value, float* a) noexcept { vst1q_f32 (a, value); } + static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_f32 (a, b); } + static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_f32 (a, b); } + static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_f32 (a, b); } + static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vandq_u32 ((vMaskType) a, (vMaskType) b); } + static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vorrq_u32 ((vMaskType) a, (vMaskType) b); } + static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) veorq_u32 ((vMaskType) a, (vMaskType) b); } + static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vbicq_u32 ((vMaskType) b, (vMaskType) a); } + static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_f32 ((float*) kAllBitsSet)); } + static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_f32 (a, b); } + static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_f32 (a, b); } + static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_f32 (a, b); } + static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); } + static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_f32 (a, b); } + static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_f32 (a, b); } + static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps::sum (notEqual (a, b)) == 0); } + static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_f32 (a, b, c); } + static forcedinline vSIMDType dupeven (vSIMDType a) noexcept { return fb::shuffle<(0 << 0) | (0 << 2) | (2 << 4) | (2 << 6)> (a); } + static forcedinline vSIMDType dupodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (1 << 2) | (3 << 4) | (3 << 6)> (a); } + static forcedinline vSIMDType swapevenodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (0 << 2) | (3 << 4) | (2 << 6)> (a); } + static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return add (fb::shuffle<(2 << 0) | (3 << 2) | (0 << 4) | (1 << 6)> (a), a); } + + //============================================================================== + static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept + { + vSIMDType rr_ir = mul (a, dupeven (b)); + vSIMDType ii_ri = mul (swapevenodd (a), dupodd (b)); + return add (rr_ir, bit_xor (ii_ri, vld1q_f32 ((float*) kEvenHighBit))); + } + + static forcedinline float sum (vSIMDType a) noexcept + { + auto rr = vadd_f32 (vget_high_f32 (a), vget_low_f32 (a)); + return vget_lane_f32 (vpadd_f32 (rr, rr), 0); + } +}; + +//============================================================================== +/** Double-precision floating point NEON intrinsics does not exist in NEON + so we need to emulate this. +*/ +template <> +struct SIMDNativeOps +{ + //============================================================================== + typedef struct { double values [2]; } vSIMDType; + typedef SIMDFallbackOps fb; + + static forcedinline vSIMDType expand (double s) noexcept { return fb::expand (s); } + static forcedinline vSIMDType load (const double* a) noexcept { return fb::load (a); } + static forcedinline void store (vSIMDType value, double* a) noexcept { fb::store (value, a); } + static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return fb::add (a, b); } + static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return fb::sub (a, b); } + static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); } + static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return fb::bit_and (a, b); } + static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return fb::bit_or (a, b); } + static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return fb::bit_xor (a, b); } + static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return fb::bit_notand (a, b); } + static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return fb::bit_not (a); } + static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); } + static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); } + static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); } + static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); } + static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); } + static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); } + static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return fb::allEqual (a, b); } + static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); } + static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept { return fb::cmplxmul (a, b); } + static forcedinline double sum (vSIMDType a) noexcept { return fb::sum (a); } + static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return a; } +}; + #endif } // namespace dsp diff --git a/modules/juce_dsp/native/juce_sse_SIMDNativeOps.h b/modules/juce_dsp/native/juce_sse_SIMDNativeOps.h index ce3bd819f6..a60d5d41ef 100644 --- a/modules/juce_dsp/native/juce_sse_SIMDNativeOps.h +++ b/modules/juce_dsp/native/juce_sse_SIMDNativeOps.h @@ -81,6 +81,7 @@ struct SIMDNativeOps static forcedinline __m128 JUCE_VECTOR_CALLTYPE notEqual (__m128 a, __m128 b) noexcept { return _mm_cmpneq_ps (a, b); } static forcedinline __m128 JUCE_VECTOR_CALLTYPE greaterThan (__m128 a, __m128 b) noexcept { return _mm_cmpgt_ps (a, b); } static forcedinline __m128 JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128 a, __m128 b) noexcept { return _mm_cmpge_ps (a, b); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128 a, __m128 b ) noexcept { return (_mm_movemask_ps (equal (a, b)) == 0xf); } static forcedinline __m128 JUCE_VECTOR_CALLTYPE multiplyAdd (__m128 a, __m128 b, __m128 c) noexcept { return _mm_add_ps (a, _mm_mul_ps (b, c)); } static forcedinline __m128 JUCE_VECTOR_CALLTYPE dupeven (__m128 a) noexcept { return _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 2, 0, 0)); } static forcedinline __m128 JUCE_VECTOR_CALLTYPE dupodd (__m128 a) noexcept { return _mm_shuffle_ps (a, a, _MM_SHUFFLE (3, 3, 1, 1)); } @@ -142,6 +143,7 @@ struct SIMDNativeOps static forcedinline __m128d JUCE_VECTOR_CALLTYPE notEqual (__m128d a, __m128d b) noexcept { return _mm_cmpneq_pd (a, b); } static forcedinline __m128d JUCE_VECTOR_CALLTYPE greaterThan (__m128d a, __m128d b) noexcept { return _mm_cmpgt_pd (a, b); } static forcedinline __m128d JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128d a, __m128d b) noexcept { return _mm_cmpge_pd (a, b); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128d a, __m128d b ) noexcept { return (_mm_movemask_pd (equal (a, b)) == 0x3); } static forcedinline __m128d JUCE_VECTOR_CALLTYPE multiplyAdd (__m128d a, __m128d b, __m128d c) noexcept { return _mm_add_pd (a, _mm_mul_pd (b, c)); } static forcedinline __m128d JUCE_VECTOR_CALLTYPE dupeven (__m128d a) noexcept { return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (0, 0)); } static forcedinline __m128d JUCE_VECTOR_CALLTYPE dupodd (__m128d a) noexcept { return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (1, 1)); } @@ -201,6 +203,7 @@ struct SIMDNativeOps static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); } //============================================================================== static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const int8_t* a) noexcept @@ -282,6 +285,7 @@ struct SIMDNativeOps static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); } //============================================================================== static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint8_t* a) noexcept @@ -363,6 +367,7 @@ struct SIMDNativeOps static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); } //============================================================================== static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const int16_t* a) noexcept @@ -431,6 +436,7 @@ struct SIMDNativeOps static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); } //============================================================================== static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint16_t* a) noexcept @@ -490,6 +496,7 @@ struct SIMDNativeOps static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); } //============================================================================== static forcedinline void JUCE_VECTOR_CALLTYPE store (__m128i value, int32_t* dest) noexcept @@ -575,6 +582,7 @@ struct SIMDNativeOps static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); } //============================================================================== static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint32_t* a) noexcept @@ -671,6 +679,7 @@ struct SIMDNativeOps static forcedinline __m128i greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); } //============================================================================== static forcedinline void JUCE_VECTOR_CALLTYPE store (__m128i value, int64_t* dest) noexcept @@ -762,6 +771,7 @@ struct SIMDNativeOps static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); } static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); } + static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); } //============================================================================== static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint64_t* a) noexcept diff --git a/modules/juce_dsp/processors/juce_FIRFilter.h b/modules/juce_dsp/processors/juce_FIRFilter.h index da5b19cf51..09d2aaa94c 100644 --- a/modules/juce_dsp/processors/juce_FIRFilter.h +++ b/modules/juce_dsp/processors/juce_FIRFilter.h @@ -172,7 +172,7 @@ namespace FIR static SampleType JUCE_VECTOR_CALLTYPE processSingleSample (SampleType sample, SampleType* buf, const NumericType* fir, size_t m, size_t& p) noexcept { - SampleType out = {}; + SampleType out (0); buf[p] = sample; diff --git a/modules/juce_dsp/processors/juce_FIRFilter_test.cpp b/modules/juce_dsp/processors/juce_FIRFilter_test.cpp index 0a4701e48b..308ed20970 100644 --- a/modules/juce_dsp/processors/juce_FIRFilter_test.cpp +++ b/modules/juce_dsp/processors/juce_FIRFilter_test.cpp @@ -106,7 +106,7 @@ class FIRFilterTest : public UnitTest buffer[0] = input[i]; - SampleType sum{}; + SampleType sum (0); for (size_t j = 0; j < numCoefficients; ++j) sum += buffer[j] * firCoefficients[j];