1
0
Fork 0
mirror of https://github.com/juce-framework/JUCE.git synced 2026-01-10 23:44:24 +00:00

SSE SIMDNativeOps: Reimplement sum for SSE3 to work around an AppleClang bug

With clang 13.0.0, and Apple clang version 13.1.6 (clang-1316.0.21.2),
the following code fails to compile with `-std=c++20 -O3 -msse3`:

    #include <immintrin.h>

    auto test (__m128 a)
    {
        return _mm_hadd_ps (_mm_hadd_ps (a, a), a);
    }
This commit is contained in:
reuk 2022-03-22 16:03:28 +00:00
parent c6f703aa57
commit 970483b1cd
No known key found for this signature in database
GPG key ID: FCB43929F012EE5C

View file

@ -106,11 +106,13 @@ struct SIMDNativeOps<float>
static forcedinline float JUCE_VECTOR_CALLTYPE sum (__m128 a) noexcept
{
#if defined(__SSE4__)
__m128 retval = _mm_dp_ps (a, _mm_loadu_ps (kOne), 0xff);
const auto retval = _mm_dp_ps (a, _mm_loadu_ps (kOne), 0xff);
#elif defined(__SSE3__)
__m128 retval = _mm_hadd_ps (_mm_hadd_ps (a, a), a);
const auto shuffled = _mm_movehdup_ps (a);
const auto sums = _mm_add_ps (a, shuffled);
const auto retval = _mm_add_ss (sums, _mm_movehl_ps (shuffled, sums));
#else
__m128 retval = _mm_add_ps (_mm_shuffle_ps (a, a, 0x4e), a);
auto retval = _mm_add_ps (_mm_shuffle_ps (a, a, 0x4e), a);
retval = _mm_add_ps (retval, _mm_shuffle_ps (retval, retval, 0xb1));
#endif
return _mm_cvtss_f32 (retval);