From 60335815113edd8a41b78d12daf5f529816f69fc Mon Sep 17 00:00:00 2001
From: jules <jules@rawmaterialsoftware.com>
Date: Mon, 25 Feb 2013 16:23:04 +0000
Subject: [PATCH] Additions and 64-bit fixes for FloatVectorOperations.

---
 .../buffers/juce_AudioSampleBuffer.cpp        |  2 +-
 .../buffers/juce_FloatVectorOperations.cpp    | 39 +++++++++++++------
 .../buffers/juce_FloatVectorOperations.h      |  5 +++
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/modules/juce_audio_basics/buffers/juce_AudioSampleBuffer.cpp b/modules/juce_audio_basics/buffers/juce_AudioSampleBuffer.cpp
index ef1284de92..3a411fc0d3 100644
--- a/modules/juce_audio_basics/buffers/juce_AudioSampleBuffer.cpp
+++ b/modules/juce_audio_basics/buffers/juce_AudioSampleBuffer.cpp
@@ -174,7 +174,7 @@ void AudioSampleBuffer::setSize (const int newNumChannels,
 
             const int numChansToCopy = jmin (numChannels, newNumChannels);
             for (int i = 0; i < numChansToCopy; ++i)
-                FloatVectorOperations::copy (newChannels[i], channels[i], numSamplesToCopy);
+                FloatVectorOperations::copy (newChannels[i], channels[i], (int) numSamplesToCopy);
 
             allocatedData.swapWith (newData);
             allocatedBytes = newTotalBytes;
diff --git a/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp b/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp
index 925330cbb0..92dfb6cfff 100644
--- a/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp
+++ b/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.cpp
@@ -29,7 +29,7 @@ namespace FloatVectorHelpers
 {
     static bool sse2Present = false;
 
-    static bool isSSE2Available()
+    static bool isSSE2Available() noexcept
     {
         if (sse2Present)
             return true;
@@ -38,10 +38,17 @@ namespace FloatVectorHelpers
         return sse2Present;
     }
 
-    inline static bool isAligned (const void* p)
+    inline static bool isAligned (const void* p) noexcept
     {
         return (((pointer_sized_int) p) & 15) == 0;
     }
+
+    inline static void mmEmpty() noexcept
+    {
+       #if ! JUCE_64BIT
+        _mm_empty();
+       #endif
+    }
 }
 
 #define JUCE_BEGIN_SSE_OP \
@@ -50,7 +57,7 @@ namespace FloatVectorHelpers
         const int numLongOps = num / 4;
 
 #define JUCE_FINISH_SSE_OP(normalOp) \
-        _mm_empty(); \
+        FloatVectorHelpers::mmEmpty(); \
         num &= 3; \
         if (num == 0) return; \
     } \
@@ -72,10 +79,10 @@ namespace FloatVectorHelpers
 #define JUCE_LOAD_SRC(srcLoad, dstLoad)      const __m128 s = srcLoad (src);
 #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest); const __m128 s = srcLoad (src);
 
-#define JUCE_PERFORM_SSE_OP_DEST(normalOp, sseOp) \
+#define JUCE_PERFORM_SSE_OP_DEST(normalOp, sseOp, locals) \
     JUCE_BEGIN_SSE_OP \
-    if (FloatVectorHelpers::isAligned (dest))   JUCE_SSE_LOOP (sseOp, dummy, _mm_load_ps,  _mm_store_ps,  JUCE_LOAD_DEST, JUCE_INCREMENT_DEST) \
-    else                                        JUCE_SSE_LOOP (sseOp, dummy, _mm_loadu_ps, _mm_storeu_ps, JUCE_LOAD_DEST, JUCE_INCREMENT_DEST) \
+    if (FloatVectorHelpers::isAligned (dest))   JUCE_SSE_LOOP (sseOp, dummy, _mm_load_ps,  _mm_store_ps,  locals, JUCE_INCREMENT_DEST) \
+    else                                        JUCE_SSE_LOOP (sseOp, dummy, _mm_loadu_ps, _mm_storeu_ps, locals, JUCE_INCREMENT_DEST) \
     JUCE_FINISH_SSE_OP (normalOp)
 
 #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) \
@@ -103,6 +110,15 @@ void FloatVectorOperations::clear (float* dest, const int num) noexcept
     zeromem (dest, num * sizeof (float));
 }
 
+void FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
+{
+   #if JUCE_USE_SSE_INTRINSICS
+    const __m128 val = _mm_load1_ps (&valueToFill);
+   #endif
+
+    JUCE_PERFORM_SSE_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE)
+}
+
 void FloatVectorOperations::copy (float* dest, const float* src, const int num) noexcept
 {
     memcpy (dest, src, num * sizeof (float));
@@ -133,7 +149,8 @@ void FloatVectorOperations::add (float* dest, float amount, int num) noexcept
    #endif
 
     JUCE_PERFORM_SSE_OP_DEST (dest[i] += amount,
-                              _mm_add_ps (d, amountToAdd))
+                              _mm_add_ps (d, amountToAdd),
+                              JUCE_LOAD_DEST)
 }
 
 void FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
@@ -161,7 +178,8 @@ void FloatVectorOperations::multiply (float* dest, float multiplier, int num) no
    #endif
 
     JUCE_PERFORM_SSE_OP_DEST (dest[i] *= multiplier,
-                              _mm_mul_ps (d, mult))
+                              _mm_mul_ps (d, mult),
+                              JUCE_LOAD_DEST)
 }
 
 void FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
@@ -171,8 +189,7 @@ void FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, fl
    #endif
 
     JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
-                                  _mm_mul_ps (mult, _mm_movelh_ps (_mm_cvt_pi2ps (_mm_setzero_ps(), ((const __m64*) src)[0]),
-                                                                   _mm_cvt_pi2ps (_mm_setzero_ps(), ((const __m64*) src)[1]))),
+                                  _mm_mul_ps (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
                                   JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST)
 }
 
@@ -206,7 +223,7 @@ void FloatVectorOperations::findMinAndMax (const float* src, int num, float& min
             float mns[4], mxs[4];
             _mm_storeu_ps (mns, mn);
             _mm_storeu_ps (mxs, mx);
-            _mm_empty();
+            FloatVectorHelpers::mmEmpty();
 
             localMin = jmin (mns[0], mns[1], mns[2], mns[3]);
             localMax = jmax (mxs[0], mxs[1], mxs[2], mxs[3]);
diff --git a/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.h b/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.h
index 28ba2a1248..533baed665 100644
--- a/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.h
+++ b/modules/juce_audio_basics/buffers/juce_FloatVectorOperations.h
@@ -29,6 +29,8 @@
 
 //==============================================================================
 /**
+    A collection of simple vector operations on arrays of floats, accelerated with
+    SIMD instructions where possible.
 */
 class JUCE_API  FloatVectorOperations
 {
@@ -37,6 +39,9 @@ public:
     /** Clears a vector of floats. */
     static void clear (float* dest, int numValues) noexcept;
 
+    /** Copies a repeated value into a vector of floats. */
+    static void fill (float* dest, float valueToFill, int numValues) noexcept;
+
     /** Copies a vector of floats. */
     static void copy (float* dest, const float* src, int numValues) noexcept;