mirror of
https://github.com/juce-framework/JUCE.git
synced 2026-01-10 23:44:24 +00:00
SSE SIMDNativeOps: Reimplement sum for SSE3 to work around an AppleClang bug
With clang 13.0.0, and Apple clang version 13.1.6 (clang-1316.0.21.2),
the following code fails to compile with `-std=c++20 -O3 -msse3`:
#include <immintrin.h>
auto test (__m128 a)
{
return _mm_hadd_ps (_mm_hadd_ps (a, a), a);
}
This commit is contained in:
parent
c6f703aa57
commit
970483b1cd
1 changed files with 5 additions and 3 deletions
|
|
@ -106,11 +106,13 @@ struct SIMDNativeOps<float>
|
|||
static forcedinline float JUCE_VECTOR_CALLTYPE sum (__m128 a) noexcept
|
||||
{
|
||||
#if defined(__SSE4__)
|
||||
__m128 retval = _mm_dp_ps (a, _mm_loadu_ps (kOne), 0xff);
|
||||
const auto retval = _mm_dp_ps (a, _mm_loadu_ps (kOne), 0xff);
|
||||
#elif defined(__SSE3__)
|
||||
__m128 retval = _mm_hadd_ps (_mm_hadd_ps (a, a), a);
|
||||
const auto shuffled = _mm_movehdup_ps (a);
|
||||
const auto sums = _mm_add_ps (a, shuffled);
|
||||
const auto retval = _mm_add_ss (sums, _mm_movehl_ps (shuffled, sums));
|
||||
#else
|
||||
__m128 retval = _mm_add_ps (_mm_shuffle_ps (a, a, 0x4e), a);
|
||||
auto retval = _mm_add_ps (_mm_shuffle_ps (a, a, 0x4e), a);
|
||||
retval = _mm_add_ps (retval, _mm_shuffle_ps (retval, retval, 0xb1));
|
||||
#endif
|
||||
return _mm_cvtss_f32 (retval);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue