mirror of
https://github.com/juce-framework/JUCE.git
synced 2026-01-10 23:44:24 +00:00
Various additions to SIMDRegister
This commit is contained in:
parent
6894e04356
commit
8f02179bbf
8 changed files with 250 additions and 135 deletions
|
|
@ -102,6 +102,18 @@ struct SIMDRegister
|
|||
|
||||
vSIMDType value;
|
||||
|
||||
/** Default constructor. */
|
||||
inline JUCE_VECTOR_CALLTYPE SIMDRegister() noexcept {}
|
||||
|
||||
/** Constructs an object from the native SIMD type. */
|
||||
inline JUCE_VECTOR_CALLTYPE SIMDRegister (vSIMDType a) noexcept : value (a) {}
|
||||
|
||||
/** Constructs an object from a scalar type by broadcasting it to all elements. */
|
||||
inline JUCE_VECTOR_CALLTYPE SIMDRegister (Type s) noexcept { *this = s; }
|
||||
|
||||
/** Destrutor. */
|
||||
inline JUCE_VECTOR_CALLTYPE ~SIMDRegister() noexcept {}
|
||||
|
||||
//==============================================================================
|
||||
/** Returns the number of elements in this vector. */
|
||||
static constexpr size_t size() noexcept { return SIMDNumElements; }
|
||||
|
|
@ -232,6 +244,19 @@ struct SIMDRegister
|
|||
/** Returns a vector where each element is the bit-xor'd value of the corresponding element in the receiver and the scalar s.*/
|
||||
inline SIMDRegister JUCE_VECTOR_CALLTYPE operator^ (MaskType s) const noexcept { return { NativeOps::bit_xor (value, toVecType (s)) }; }
|
||||
|
||||
//==============================================================================
|
||||
/** Returns true if all elements-wise comparisons return true. */
|
||||
inline bool JUCE_VECTOR_CALLTYPE operator== (SIMDRegister other) const noexcept { return NativeOps::allEqual (value, other.value); }
|
||||
|
||||
/** Returns true if any elements-wise comparisons return false. */
|
||||
inline bool JUCE_VECTOR_CALLTYPE operator!= (SIMDRegister other) const noexcept { return ! (*this == other); }
|
||||
|
||||
/** Returns true if all elements are equal to the scalar. */
|
||||
inline bool JUCE_VECTOR_CALLTYPE operator== (Type s) const noexcept { return *this == SIMDRegister::expand (s); }
|
||||
|
||||
/** Returns true if any elements are not equal to the scalar. */
|
||||
inline bool JUCE_VECTOR_CALLTYPE operator!= (Type s) const noexcept { return ! (*this == s); }
|
||||
|
||||
//==============================================================================
|
||||
/** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
|
||||
if the corresponding element of a is equal to the corresponding element of b, or zero otherwise.
|
||||
|
|
|
|||
|
|
@ -370,8 +370,10 @@ public:
|
|||
{
|
||||
type array_a [SIMDRegister<type>::SIMDNumElements];
|
||||
|
||||
union
|
||||
union ConversionUnion
|
||||
{
|
||||
inline ConversionUnion() {}
|
||||
inline ~ConversionUnion() {}
|
||||
SIMDRegister<type> floatVersion;
|
||||
vMaskType intVersion;
|
||||
} a, b;
|
||||
|
|
@ -512,6 +514,39 @@ public:
|
|||
u.expect (vecEqualToArray (le, array_le ));
|
||||
u.expect (vecEqualToArray (gt, array_gt ));
|
||||
u.expect (vecEqualToArray (ge, array_ge ));
|
||||
|
||||
do
|
||||
{
|
||||
SIMDRegister_test_internal::fillRandom (array_a, SIMDRegister<type>::SIMDNumElements, random);
|
||||
SIMDRegister_test_internal::fillRandom (array_b, SIMDRegister<type>::SIMDNumElements, random);
|
||||
} while (std::equal (array_a, array_a + SIMDRegister<type>::SIMDNumElements, array_b));
|
||||
|
||||
copy (a, array_a);
|
||||
copy (b, array_b);
|
||||
u.expect (a != b);
|
||||
u.expect (b != a);
|
||||
u.expect (! (a == b));
|
||||
u.expect (! (b == a));
|
||||
|
||||
SIMDRegister_test_internal::fillRandom (array_a, SIMDRegister<type>::SIMDNumElements, random);
|
||||
copy (a, array_a);
|
||||
copy (b, array_a);
|
||||
|
||||
u.expect (a == b);
|
||||
u.expect (b == a);
|
||||
u.expect (! (a != b));
|
||||
u.expect (! (b != a));
|
||||
|
||||
auto scalar = a[0];
|
||||
a = SIMDRegister<type>::expand (scalar);
|
||||
|
||||
u.expect (a == scalar);
|
||||
u.expect (! (a != scalar));
|
||||
|
||||
scalar--;
|
||||
|
||||
u.expect (a != scalar);
|
||||
u.expect (! (a == scalar));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -82,6 +82,7 @@ struct SIMDNativeOps<float>
|
|||
static forcedinline __m256 JUCE_VECTOR_CALLTYPE notEqual (__m256 a, __m256 b) noexcept { return _mm256_cmp_ps (a, b, _CMP_NEQ_OQ); }
|
||||
static forcedinline __m256 JUCE_VECTOR_CALLTYPE greaterThan (__m256 a, __m256 b) noexcept { return _mm256_cmp_ps (a, b, _CMP_GT_OQ); }
|
||||
static forcedinline __m256 JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256 a, __m256 b) noexcept { return _mm256_cmp_ps (a, b, _CMP_GE_OQ); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256 a, __m256 b) noexcept { return (_mm256_movemask_ps (equal (a, b)) == 0xff); }
|
||||
static forcedinline __m256 JUCE_VECTOR_CALLTYPE multiplyAdd (__m256 a, __m256 b, __m256 c) noexcept { return _mm256_fmadd_ps (b, c, a); }
|
||||
static forcedinline __m256 JUCE_VECTOR_CALLTYPE dupeven (__m256 a) noexcept { return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (2, 2, 0, 0)); }
|
||||
static forcedinline __m256 JUCE_VECTOR_CALLTYPE dupodd (__m256 a) noexcept { return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (3, 3, 1, 1)); }
|
||||
|
|
@ -141,6 +142,7 @@ struct SIMDNativeOps<double>
|
|||
static forcedinline __m256d JUCE_VECTOR_CALLTYPE notEqual (__m256d a, __m256d b) noexcept { return _mm256_cmp_pd (a, b, _CMP_NEQ_OQ); }
|
||||
static forcedinline __m256d JUCE_VECTOR_CALLTYPE greaterThan (__m256d a, __m256d b) noexcept { return _mm256_cmp_pd (a, b, _CMP_GT_OQ); }
|
||||
static forcedinline __m256d JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256d a, __m256d b) noexcept { return _mm256_cmp_pd (a, b, _CMP_GE_OQ); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256d a, __m256d b) noexcept { return (_mm256_movemask_pd (equal (a, b)) == 0xf); }
|
||||
static forcedinline __m256d JUCE_VECTOR_CALLTYPE multiplyAdd (__m256d a, __m256d b, __m256d c) noexcept { return _mm256_add_pd (a, _mm256_mul_pd (b, c)); }
|
||||
static forcedinline __m256d JUCE_VECTOR_CALLTYPE dupeven (__m256d a) noexcept { return _mm256_shuffle_pd (a, a, 0); }
|
||||
static forcedinline __m256d JUCE_VECTOR_CALLTYPE dupodd (__m256d a) noexcept { return _mm256_shuffle_pd (a, a, (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)); }
|
||||
|
|
@ -261,6 +263,7 @@ struct SIMDNativeOps<uint8_t>
|
|||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b) noexcept { return _mm256_cmpeq_epi8 (a, b); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b) noexcept { return _mm256_cmpgt_epi8 (ssign (a), ssign (b)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
|
||||
|
||||
|
|
@ -336,6 +339,7 @@ struct SIMDNativeOps<int16_t>
|
|||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int16_t* a) noexcept
|
||||
|
|
@ -390,6 +394,7 @@ struct SIMDNativeOps<uint16_t>
|
|||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const uint16_t* a) noexcept
|
||||
|
|
@ -443,6 +448,7 @@ struct SIMDNativeOps<int32_t>
|
|||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int32_t* a) noexcept
|
||||
|
|
@ -495,6 +501,7 @@ struct SIMDNativeOps<uint32_t>
|
|||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const uint32_t* a) noexcept
|
||||
|
|
@ -543,6 +550,7 @@ struct SIMDNativeOps<int64_t>
|
|||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int64_t* a) noexcept
|
||||
|
|
@ -614,6 +622,7 @@ struct SIMDNativeOps<uint64_t>
|
|||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const uint64_t* a) noexcept
|
||||
|
|
|
|||
|
|
@ -117,6 +117,19 @@ struct SIMDFallbackOps
|
|||
return retval;
|
||||
}
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept
|
||||
{
|
||||
auto* aSrc = reinterpret_cast<const ScalarType*> (&a);
|
||||
auto* bSrc = reinterpret_cast<const ScalarType*> (&b);
|
||||
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
if (aSrc[i] != bSrc[i])
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept
|
||||
{
|
||||
|
|
|
|||
|
|
@ -51,86 +51,82 @@ template <typename type>
|
|||
struct SIMDNativeOps;
|
||||
|
||||
//==============================================================================
|
||||
/** Single-precision floating point NEON intrinsics. */
|
||||
/** Unsigned 32-bit integer NEON intrinsics. */
|
||||
template <>
|
||||
struct SIMDNativeOps<float>
|
||||
struct SIMDNativeOps<uint32_t>
|
||||
{
|
||||
//==============================================================================
|
||||
typedef float32x4_t vSIMDType;
|
||||
typedef uint32x4_t vMaskType;
|
||||
typedef SIMDFallbackOps<float, vSIMDType> fb;
|
||||
typedef uint32x4_t vSIMDType;
|
||||
typedef SIMDFallbackOps<uint32_t, vSIMDType> fb;
|
||||
|
||||
//==============================================================================
|
||||
DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
|
||||
DECLARE_NEON_SIMD_CONST (int32_t, kEvenHighBit);
|
||||
DECLARE_NEON_SIMD_CONST (float, kOne);
|
||||
DECLARE_NEON_SIMD_CONST (uint32_t, kAllBitsSet);
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline vSIMDType expand (float s) noexcept { return vdupq_n_f32 (s); }
|
||||
static forcedinline vSIMDType load (const float* a) noexcept { return vld1q_f32 (a); }
|
||||
static forcedinline void store (vSIMDType value, float* a) noexcept { vst1q_f32 (a, value); }
|
||||
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_f32 (a, b); }
|
||||
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_f32 (a, b); }
|
||||
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_f32 (a, b); }
|
||||
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vandq_u32 ((vMaskType) a, (vMaskType) b); }
|
||||
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vorrq_u32 ((vMaskType) a, (vMaskType) b); }
|
||||
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) veorq_u32 ((vMaskType) a, (vMaskType) b); }
|
||||
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vbicq_u32 ((vMaskType) b, (vMaskType) a); }
|
||||
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_f32 ((float*) kAllBitsSet)); }
|
||||
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_f32 (a, b); }
|
||||
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_f32 (a, b); }
|
||||
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_f32 (a, b); }
|
||||
static forcedinline vSIMDType expand (uint32_t s) noexcept { return vdupq_n_u32 (s); }
|
||||
static forcedinline vSIMDType load (const uint32_t* a) noexcept { return vld1q_u32 (a); }
|
||||
static forcedinline void store (vSIMDType value, uint32_t* a) noexcept { vst1q_u32 (a, value); }
|
||||
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u32 (a, b); }
|
||||
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u32 (a, b); }
|
||||
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u32 (a, b); }
|
||||
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u32 (a, b); }
|
||||
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u32 (a, b); }
|
||||
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u32 (a, b); }
|
||||
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u32 (b, a); }
|
||||
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u32 ((uint32_t*) kAllBitsSet)); }
|
||||
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u32 (a, b); }
|
||||
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u32 (a, b); }
|
||||
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u32 (a, b); }
|
||||
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (sum (notEqual (a, b)) == 0); }
|
||||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_f32 (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_f32 (a, b); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_f32 (a, b, c); }
|
||||
static forcedinline vSIMDType dupeven (vSIMDType a) noexcept { return fb::shuffle<(0 << 0) | (0 << 2) | (2 << 4) | (2 << 6)> (a); }
|
||||
static forcedinline vSIMDType dupodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (1 << 2) | (3 << 4) | (3 << 6)> (a); }
|
||||
static forcedinline vSIMDType swapevenodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (0 << 2) | (3 << 4) | (2 << 6)> (a); }
|
||||
static forcedinline float sum (vSIMDType a) noexcept { return fb::sum (a); }
|
||||
static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return add (fb::shuffle<(2 << 0) | (3 << 2) | (0 << 4) | (1 << 6)> (a), a); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u32 (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u32 (a, b); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u32 (a, b, c); }
|
||||
static forcedinline uint32_t sum (vSIMDType a) noexcept
|
||||
{
|
||||
vSIMDType rr_ir = mul (a, dupeven (b));
|
||||
vSIMDType ii_ri = mul (swapevenodd (a), dupodd (b));
|
||||
return add (rr_ir, bit_xor (ii_ri, vld1q_f32 ((float*) kEvenHighBit)));
|
||||
auto rr = vadd_u32 (vget_high_u32 (a), vget_low_u32 (a));
|
||||
return vget_lane_u32 (vpadd_u32 (rr, rr), 0);
|
||||
}
|
||||
};
|
||||
|
||||
//==============================================================================
|
||||
/** Double-precision floating point NEON intrinsics does not exist in NEON
|
||||
so we need to emulate this.
|
||||
*/
|
||||
/** Signed 32-bit integer NEON intrinsics. */
|
||||
template <>
|
||||
struct SIMDNativeOps<double>
|
||||
struct SIMDNativeOps<int32_t>
|
||||
{
|
||||
//==============================================================================
|
||||
typedef struct { double values [2]; } vSIMDType;
|
||||
typedef SIMDFallbackOps<double, vSIMDType> fb;
|
||||
typedef int32x4_t vSIMDType;
|
||||
typedef SIMDFallbackOps<int32_t, vSIMDType> fb;
|
||||
|
||||
static forcedinline vSIMDType expand (double s) noexcept { return fb::expand (s); }
|
||||
static forcedinline vSIMDType load (const double* a) noexcept { return fb::load (a); }
|
||||
static forcedinline void store (vSIMDType value, double* a) noexcept { fb::store (value, a); }
|
||||
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return fb::add (a, b); }
|
||||
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return fb::sub (a, b); }
|
||||
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); }
|
||||
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return fb::bit_and (a, b); }
|
||||
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return fb::bit_or (a, b); }
|
||||
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return fb::bit_xor (a, b); }
|
||||
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return fb::bit_notand (a, b); }
|
||||
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return fb::bit_not (a); }
|
||||
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
|
||||
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
|
||||
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
|
||||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
|
||||
static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept { return fb::cmplxmul (a, b); }
|
||||
static forcedinline double sum (vSIMDType a) noexcept { return fb::sum (a); }
|
||||
static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return a; }
|
||||
//==============================================================================
|
||||
DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline vSIMDType expand (int32_t s) noexcept { return vdupq_n_s32 (s); }
|
||||
static forcedinline vSIMDType load (const int32_t* a) noexcept { return vld1q_s32 (a); }
|
||||
static forcedinline void store (vSIMDType value, int32_t* a) noexcept { vst1q_s32 (a, value); }
|
||||
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s32 (a, b); }
|
||||
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s32 (a, b); }
|
||||
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s32 (a, b); }
|
||||
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s32 (a, b); }
|
||||
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s32 (a, b); }
|
||||
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s32 (a, b); }
|
||||
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s32 (b, a); }
|
||||
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s32 ((int32_t*) kAllBitsSet)); }
|
||||
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s32 (a, b); }
|
||||
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s32 (a, b); }
|
||||
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s32 (a, b); }
|
||||
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (sum (notEqual (a, b)) == 0); }
|
||||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s32 (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s32 (a, b); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s32 (a, b, c); }
|
||||
static forcedinline int32_t sum (vSIMDType a) noexcept
|
||||
{
|
||||
auto rr = vadd_s32 (vget_high_s32 (a), vget_low_s32 (a));
|
||||
rr = vpadd_s32 (rr, rr);
|
||||
return vget_lane_s32 (rr, 0);
|
||||
}
|
||||
};
|
||||
|
||||
//==============================================================================
|
||||
|
|
@ -163,6 +159,7 @@ struct SIMDNativeOps<int8_t>
|
|||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s8 (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s8 (a, b); }
|
||||
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum (notEqual (a, b)) == 0); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s8 (a, b, c); }
|
||||
static forcedinline int8_t sum (vSIMDType a) noexcept { return fb::sum (a); }
|
||||
};
|
||||
|
|
@ -197,6 +194,7 @@ struct SIMDNativeOps<uint8_t>
|
|||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u8 (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u8 (a, b); }
|
||||
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum (notEqual (a, b)) == 0); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u8 (a, b, c); }
|
||||
static forcedinline uint8_t sum (vSIMDType a) noexcept { return fb::sum (a); }
|
||||
};
|
||||
|
|
@ -231,6 +229,7 @@ struct SIMDNativeOps<int16_t>
|
|||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s16 (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s16 (a, b); }
|
||||
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum (notEqual (a, b)) == 0); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s16 (a, b, c); }
|
||||
static forcedinline int16_t sum (vSIMDType a) noexcept { return fb::sum (a); }
|
||||
};
|
||||
|
|
@ -266,79 +265,11 @@ struct SIMDNativeOps<uint16_t>
|
|||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u16 (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u16 (a, b); }
|
||||
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum (notEqual (a, b)) == 0); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u16 (a, b, c); }
|
||||
static forcedinline uint16_t sum (vSIMDType a) noexcept { return fb::sum (a); }
|
||||
};
|
||||
|
||||
//==============================================================================
|
||||
/** Signed 32-bit integer NEON intrinsics. */
|
||||
template <>
|
||||
struct SIMDNativeOps<int32_t>
|
||||
{
|
||||
//==============================================================================
|
||||
typedef int32x4_t vSIMDType;
|
||||
typedef SIMDFallbackOps<int32_t, vSIMDType> fb;
|
||||
|
||||
//==============================================================================
|
||||
DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline vSIMDType expand (int32_t s) noexcept { return vdupq_n_s32 (s); }
|
||||
static forcedinline vSIMDType load (const int32_t* a) noexcept { return vld1q_s32 (a); }
|
||||
static forcedinline void store (vSIMDType value, int32_t* a) noexcept { vst1q_s32 (a, value); }
|
||||
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s32 (a, b); }
|
||||
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s32 (a, b); }
|
||||
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s32 (a, b); }
|
||||
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s32 (a, b); }
|
||||
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s32 (a, b); }
|
||||
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s32 (a, b); }
|
||||
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s32 (b, a); }
|
||||
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s32 ((int32_t*) kAllBitsSet)); }
|
||||
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s32 (a, b); }
|
||||
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s32 (a, b); }
|
||||
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s32 (a, b); }
|
||||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s32 (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s32 (a, b); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s32 (a, b, c); }
|
||||
static forcedinline int32_t sum (vSIMDType a) noexcept { return fb::sum (a); }
|
||||
};
|
||||
|
||||
|
||||
//==============================================================================
|
||||
/** Unsigned 32-bit integer NEON intrinsics. */
|
||||
template <>
|
||||
struct SIMDNativeOps<uint32_t>
|
||||
{
|
||||
//==============================================================================
|
||||
typedef uint32x4_t vSIMDType;
|
||||
typedef SIMDFallbackOps<uint32_t, vSIMDType> fb;
|
||||
|
||||
//==============================================================================
|
||||
DECLARE_NEON_SIMD_CONST (uint32_t, kAllBitsSet);
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline vSIMDType expand (uint32_t s) noexcept { return vdupq_n_u32 (s); }
|
||||
static forcedinline vSIMDType load (const uint32_t* a) noexcept { return vld1q_u32 (a); }
|
||||
static forcedinline void store (vSIMDType value, uint32_t* a) noexcept { vst1q_u32 (a, value); }
|
||||
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u32 (a, b); }
|
||||
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u32 (a, b); }
|
||||
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u32 (a, b); }
|
||||
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u32 (a, b); }
|
||||
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u32 (a, b); }
|
||||
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u32 (a, b); }
|
||||
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u32 (b, a); }
|
||||
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u32 ((uint32_t*) kAllBitsSet)); }
|
||||
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u32 (a, b); }
|
||||
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u32 (a, b); }
|
||||
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u32 (a, b); }
|
||||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u32 (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u32 (a, b); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u32 (a, b, c); }
|
||||
static forcedinline uint32_t sum (vSIMDType a) noexcept { return fb::sum (a); }
|
||||
};
|
||||
|
||||
//==============================================================================
|
||||
/** Signed 64-bit integer NEON intrinsics. */
|
||||
template <>
|
||||
|
|
@ -369,6 +300,7 @@ struct SIMDNativeOps<int64_t>
|
|||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
|
||||
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum (notEqual (a, b)) == 0); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
|
||||
static forcedinline int64_t sum (vSIMDType a) noexcept { return fb::sum (a); }
|
||||
};
|
||||
|
|
@ -404,10 +336,101 @@ struct SIMDNativeOps<uint64_t>
|
|||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
|
||||
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum (notEqual (a, b)) == 0); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
|
||||
static forcedinline uint64_t sum (vSIMDType a) noexcept { return fb::sum (a); }
|
||||
};
|
||||
|
||||
//==============================================================================
|
||||
/** Single-precision floating point NEON intrinsics. */
|
||||
template <>
|
||||
struct SIMDNativeOps<float>
|
||||
{
|
||||
//==============================================================================
|
||||
typedef float32x4_t vSIMDType;
|
||||
typedef uint32x4_t vMaskType;
|
||||
typedef SIMDFallbackOps<float, vSIMDType> fb;
|
||||
|
||||
//==============================================================================
|
||||
DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
|
||||
DECLARE_NEON_SIMD_CONST (int32_t, kEvenHighBit);
|
||||
DECLARE_NEON_SIMD_CONST (float, kOne);
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline vSIMDType expand (float s) noexcept { return vdupq_n_f32 (s); }
|
||||
static forcedinline vSIMDType load (const float* a) noexcept { return vld1q_f32 (a); }
|
||||
static forcedinline void store (vSIMDType value, float* a) noexcept { vst1q_f32 (a, value); }
|
||||
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_f32 (a, b); }
|
||||
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_f32 (a, b); }
|
||||
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_f32 (a, b); }
|
||||
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vandq_u32 ((vMaskType) a, (vMaskType) b); }
|
||||
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vorrq_u32 ((vMaskType) a, (vMaskType) b); }
|
||||
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) veorq_u32 ((vMaskType) a, (vMaskType) b); }
|
||||
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vbicq_u32 ((vMaskType) b, (vMaskType) a); }
|
||||
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_f32 ((float*) kAllBitsSet)); }
|
||||
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_f32 (a, b); }
|
||||
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_f32 (a, b); }
|
||||
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_f32 (a, b); }
|
||||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_f32 (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_f32 (a, b); }
|
||||
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum (notEqual (a, b)) == 0); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_f32 (a, b, c); }
|
||||
static forcedinline vSIMDType dupeven (vSIMDType a) noexcept { return fb::shuffle<(0 << 0) | (0 << 2) | (2 << 4) | (2 << 6)> (a); }
|
||||
static forcedinline vSIMDType dupodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (1 << 2) | (3 << 4) | (3 << 6)> (a); }
|
||||
static forcedinline vSIMDType swapevenodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (0 << 2) | (3 << 4) | (2 << 6)> (a); }
|
||||
static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return add (fb::shuffle<(2 << 0) | (3 << 2) | (0 << 4) | (1 << 6)> (a), a); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept
|
||||
{
|
||||
vSIMDType rr_ir = mul (a, dupeven (b));
|
||||
vSIMDType ii_ri = mul (swapevenodd (a), dupodd (b));
|
||||
return add (rr_ir, bit_xor (ii_ri, vld1q_f32 ((float*) kEvenHighBit)));
|
||||
}
|
||||
|
||||
static forcedinline float sum (vSIMDType a) noexcept
|
||||
{
|
||||
auto rr = vadd_f32 (vget_high_f32 (a), vget_low_f32 (a));
|
||||
return vget_lane_f32 (vpadd_f32 (rr, rr), 0);
|
||||
}
|
||||
};
|
||||
|
||||
//==============================================================================
|
||||
/** Double-precision floating point NEON intrinsics does not exist in NEON
|
||||
so we need to emulate this.
|
||||
*/
|
||||
template <>
|
||||
struct SIMDNativeOps<double>
|
||||
{
|
||||
//==============================================================================
|
||||
typedef struct { double values [2]; } vSIMDType;
|
||||
typedef SIMDFallbackOps<double, vSIMDType> fb;
|
||||
|
||||
static forcedinline vSIMDType expand (double s) noexcept { return fb::expand (s); }
|
||||
static forcedinline vSIMDType load (const double* a) noexcept { return fb::load (a); }
|
||||
static forcedinline void store (vSIMDType value, double* a) noexcept { fb::store (value, a); }
|
||||
static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return fb::add (a, b); }
|
||||
static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return fb::sub (a, b); }
|
||||
static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); }
|
||||
static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return fb::bit_and (a, b); }
|
||||
static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return fb::bit_or (a, b); }
|
||||
static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return fb::bit_xor (a, b); }
|
||||
static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return fb::bit_notand (a, b); }
|
||||
static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return fb::bit_not (a); }
|
||||
static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
|
||||
static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
|
||||
static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
|
||||
static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
|
||||
static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
|
||||
static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
|
||||
static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return fb::allEqual (a, b); }
|
||||
static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
|
||||
static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept { return fb::cmplxmul (a, b); }
|
||||
static forcedinline double sum (vSIMDType a) noexcept { return fb::sum (a); }
|
||||
static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return a; }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace dsp
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ struct SIMDNativeOps<float>
|
|||
static forcedinline __m128 JUCE_VECTOR_CALLTYPE notEqual (__m128 a, __m128 b) noexcept { return _mm_cmpneq_ps (a, b); }
|
||||
static forcedinline __m128 JUCE_VECTOR_CALLTYPE greaterThan (__m128 a, __m128 b) noexcept { return _mm_cmpgt_ps (a, b); }
|
||||
static forcedinline __m128 JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128 a, __m128 b) noexcept { return _mm_cmpge_ps (a, b); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128 a, __m128 b ) noexcept { return (_mm_movemask_ps (equal (a, b)) == 0xf); }
|
||||
static forcedinline __m128 JUCE_VECTOR_CALLTYPE multiplyAdd (__m128 a, __m128 b, __m128 c) noexcept { return _mm_add_ps (a, _mm_mul_ps (b, c)); }
|
||||
static forcedinline __m128 JUCE_VECTOR_CALLTYPE dupeven (__m128 a) noexcept { return _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 2, 0, 0)); }
|
||||
static forcedinline __m128 JUCE_VECTOR_CALLTYPE dupodd (__m128 a) noexcept { return _mm_shuffle_ps (a, a, _MM_SHUFFLE (3, 3, 1, 1)); }
|
||||
|
|
@ -142,6 +143,7 @@ struct SIMDNativeOps<double>
|
|||
static forcedinline __m128d JUCE_VECTOR_CALLTYPE notEqual (__m128d a, __m128d b) noexcept { return _mm_cmpneq_pd (a, b); }
|
||||
static forcedinline __m128d JUCE_VECTOR_CALLTYPE greaterThan (__m128d a, __m128d b) noexcept { return _mm_cmpgt_pd (a, b); }
|
||||
static forcedinline __m128d JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128d a, __m128d b) noexcept { return _mm_cmpge_pd (a, b); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128d a, __m128d b ) noexcept { return (_mm_movemask_pd (equal (a, b)) == 0x3); }
|
||||
static forcedinline __m128d JUCE_VECTOR_CALLTYPE multiplyAdd (__m128d a, __m128d b, __m128d c) noexcept { return _mm_add_pd (a, _mm_mul_pd (b, c)); }
|
||||
static forcedinline __m128d JUCE_VECTOR_CALLTYPE dupeven (__m128d a) noexcept { return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (0, 0)); }
|
||||
static forcedinline __m128d JUCE_VECTOR_CALLTYPE dupodd (__m128d a) noexcept { return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (1, 1)); }
|
||||
|
|
@ -201,6 +203,7 @@ struct SIMDNativeOps<int8_t>
|
|||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const int8_t* a) noexcept
|
||||
|
|
@ -282,6 +285,7 @@ struct SIMDNativeOps<uint8_t>
|
|||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint8_t* a) noexcept
|
||||
|
|
@ -363,6 +367,7 @@ struct SIMDNativeOps<int16_t>
|
|||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const int16_t* a) noexcept
|
||||
|
|
@ -431,6 +436,7 @@ struct SIMDNativeOps<uint16_t>
|
|||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint16_t* a) noexcept
|
||||
|
|
@ -490,6 +496,7 @@ struct SIMDNativeOps<int32_t>
|
|||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline void JUCE_VECTOR_CALLTYPE store (__m128i value, int32_t* dest) noexcept
|
||||
|
|
@ -575,6 +582,7 @@ struct SIMDNativeOps<uint32_t>
|
|||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint32_t* a) noexcept
|
||||
|
|
@ -671,6 +679,7 @@ struct SIMDNativeOps<int64_t>
|
|||
static forcedinline __m128i greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline void JUCE_VECTOR_CALLTYPE store (__m128i value, int64_t* dest) noexcept
|
||||
|
|
@ -762,6 +771,7 @@ struct SIMDNativeOps<uint64_t>
|
|||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept { return bit_or (greaterThan (a, b), equal (a,b)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept { return add (a, mul (b, c)); }
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept { return bit_not (equal (a, b)); }
|
||||
static forcedinline bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
|
||||
|
||||
//==============================================================================
|
||||
static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint64_t* a) noexcept
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ namespace FIR
|
|||
static SampleType JUCE_VECTOR_CALLTYPE processSingleSample (SampleType sample, SampleType* buf,
|
||||
const NumericType* fir, size_t m, size_t& p) noexcept
|
||||
{
|
||||
SampleType out = {};
|
||||
SampleType out (0);
|
||||
|
||||
buf[p] = sample;
|
||||
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ class FIRFilterTest : public UnitTest
|
|||
|
||||
buffer[0] = input[i];
|
||||
|
||||
SampleType sum{};
|
||||
SampleType sum (0);
|
||||
|
||||
for (size_t j = 0; j < numCoefficients; ++j)
|
||||
sum += buffer[j] * firCoefficients[j];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue