endianmacros: Replace borked PA_FLOAT32_SWAP() with PA_READ_FLOAT32RE() / PA_WRITE_FLOAT32RE()

building PA with -O0 leads to test failure in mix-test on i386 issue reported by Felipe, see http://lists.freedesktop.org/archives/pulseaudio-discuss/2014-August/021406.html the problem is the value 0xbeffbd7f: when byte-swapped it becomes 0x7fbdffbe and according to IEEE-754 represents a signalling NaN (starting with s111 1111 10, see http://en.wikipedia.org/wiki/NaN) when this value is assigned to a floating point register, it becomes 0x7ffdffbe, representing a quiet NaN (starting with s111 1111 11) -- a signalling NaN is turned into a quiet NaN! so PA_FLOAT32_SWAP(PA_FLOAT32_SWAP(x)) != x for certain values, uhuh! the following test code can be used; due to volatile, it will always demonstrate the issue; without volatile, it depends on the optimization level (i386, 32-bit, gcc 4.9): // snip static inline float PA_FLOAT32_SWAP(float x) { union { float f; uint32_t u; } t; t.f = x; t.u = bswap_32(t.u); return t.f; } int main() { unsigned x = 0xbeffbd7f; volatile float f = PA_FLOAT32_SWAP(*(float *)&x); printf("%08x %08x %08x %f\n", 0xbeffbd7f, *(unsigned *)&f, bswap_32(*(unsigned *)&f), f); } // snip the problem goes away with optimization when no temporary floating point registers are used the proposed solution is to avoid passing swapped floating point data in a float; this is done with new functions PA_READ_FLOAT32RE() and PA_WRITE_FLOAT32RE() which use uint32_t to dereference a pointer and byte-swap the data, hence no temporary float variable is used also delete PA_FLOAT32_TO_LE()/_BE(), not used Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net> Reported-by: Felipe Sateler <fsateler@debian.org>
author: Peter Meerwald <pmeerw@pmeerw.net> 2014-09-02 23:53:09 +0200
committer: Peter Meerwald <pmeerw@pmeerw.net> 2014-09-04 09:58:20 +0200
commit: 293a1739e25707c25321d5bc6cd3e2e8aa96a910 (patch)
tree: 804e45dc71c8db4c18fca3143bdc08f180293e08
parent: 5d7b5e509ce3a9a5c17f3ee3cec2eee2979e190f (diff)
7 files changed, 32 insertions, 40 deletions
diff --git a/src/pulsecore/endianmacros.h b/src/pulsecore/endianmacros.h
index 4822fc4e5..2e73136c8 100644
--- a/src/pulsecore/endianmacros.h
+++ b/src/pulsecore/endianmacros.h
@@ -71,25 +71,32 @@ static inline void PA_WRITE24LE(uint8_t *p, uint32_t u) {
     p[0] = (uint8_t) u;
 }
 
-static inline float PA_FLOAT32_SWAP(float x) {
+static inline float PA_READ_FLOAT32RE(const void *p) {
     union {
         float f;
         uint32_t u;
     } t;
 
-    t.f = x;
-    t.u = PA_UINT32_SWAP(t.u);
+    t.u = PA_UINT32_SWAP(*(uint32_t *) p);
     return t.f;
 }
 
+static inline void PA_WRITE_FLOAT32RE(void *p, float x) {
+    union {
+        float f;
+        uint32_t u;
+    } t;
+
+    t.f = x;
+    *(uint32_t *) p = PA_UINT32_SWAP(t.u);
+}
+
 #define PA_MAYBE_INT16_SWAP(c,x) ((c) ? PA_INT16_SWAP(x) : (x))
 #define PA_MAYBE_UINT16_SWAP(c,x) ((c) ? PA_UINT16_SWAP(x) : (x))
 
 #define PA_MAYBE_INT32_SWAP(c,x) ((c) ? PA_INT32_SWAP(x) : (x))
 #define PA_MAYBE_UINT32_SWAP(c,x) ((c) ? PA_UINT32_SWAP(x) : (x))
 
-#define PA_MAYBE_FLOAT32_SWAP(c,x) ((c) ? PA_FLOAT32_SWAP(x) : (x))
-
 #ifdef WORDS_BIGENDIAN
  #define PA_INT16_FROM_LE(x) PA_INT16_SWAP(x)
  #define PA_INT16_FROM_BE(x) ((int16_t)(x))
@@ -115,9 +122,6 @@ static inline float PA_FLOAT32_SWAP(float x) {
  #define PA_UINT32_TO_LE(x) PA_UINT32_SWAP(x)
  #define PA_UINT32_TO_BE(x) ((uint32_t)(x))
 
- #define PA_FLOAT32_TO_LE(x) PA_FLOAT32_SWAP(x)
- #define PA_FLOAT32_TO_BE(x) ((float) (x))
-
  #define PA_READ24NE(x) PA_READ24BE(x)
  #define PA_WRITE24NE(x,y) PA_WRITE24BE((x),(y))
 
@@ -148,9 +152,6 @@ static inline float PA_FLOAT32_SWAP(float x) {
  #define PA_UINT32_TO_LE(x) ((uint32_t)(x))
  #define PA_UINT32_TO_BE(x) PA_UINT32_SWAP(x)
 
- #define PA_FLOAT32_TO_LE(x) ((float) (x))
- #define PA_FLOAT32_TO_BE(x) PA_FLOAT32_SWAP(x)
-
  #define PA_READ24NE(x) PA_READ24LE(x)
  #define PA_WRITE24NE(x,y) PA_WRITE24LE((x),(y))
 
diff --git a/src/pulsecore/mix.c b/src/pulsecore/mix.c
index 10567d6f7..03c71f0a1 100644
--- a/src/pulsecore/mix.c
+++ b/src/pulsecore/mix.c
@@ -576,17 +576,14 @@ static void pa_mix_float32re_c(pa_mix_info streams[], unsigned nstreams, unsigne
 
         for (i = 0; i < nstreams; i++) {
             pa_mix_info *m = streams + i;
-            float v, cv = m->linear[channel].f;
+            float cv = m->linear[channel].f;
 
-            if (PA_LIKELY(cv > 0)) {
-                v = PA_FLOAT32_SWAP(*(float*) m->ptr);
-                v *= cv;
-                sum += v;
-            }
+            if (PA_LIKELY(cv > 0))
+                sum += PA_READ_FLOAT32RE(m->ptr) * cv;
             m->ptr = (uint8_t*) m->ptr + sizeof(float);
         }
 
-        *data = PA_FLOAT32_SWAP(sum);
+        PA_WRITE_FLOAT32RE(data, sum);
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c
index dc5c1fd23..539f3b800 100644
--- a/src/pulsecore/sample-util.c
+++ b/src/pulsecore/sample-util.c
@@ -296,9 +296,9 @@ void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const vo
         for (; n > 0; n--) {
             float f;
 
-            f = PA_FLOAT32_SWAP(*s);
+            f = PA_READ_FLOAT32RE(s);
             f = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f);
-            *d = PA_FLOAT32_SWAP(f);
+            PA_WRITE_FLOAT32RE(d, f);
 
             s = (const float*) ((const uint8_t*) s + sstr);
             d = (float*) ((uint8_t*) d + dstr);
diff --git a/src/pulsecore/sconv-s16le.c b/src/pulsecore/sconv-s16le.c
index 45cf97293..0c1d16c1e 100644
--- a/src/pulsecore/sconv-s16le.c
+++ b/src/pulsecore/sconv-s16le.c
@@ -157,8 +157,7 @@ void pa_sconv_s16le_to_float32re(unsigned n, const int16_t *a, float *b) {
     for (; n > 0; n--) {
         int16_t s = *(a++);
         float k = INT16_FROM(s) * (1.0f / (1 << 15));
-        k = PA_FLOAT32_SWAP(k);
-        *(b++) = k;
+        PA_WRITE_FLOAT32RE(b++, k);
     }
 }
 
@@ -169,8 +168,7 @@ void pa_sconv_s32le_to_float32re(unsigned n, const int32_t *a, float *b) {
     for (; n > 0; n--) {
         int32_t s = *(a++);
         float k = INT32_FROM(s) * (1.0f / (1U << 31));
-        k = PA_FLOAT32_SWAP(k);
-        *(b++) = k;
+        PA_WRITE_FLOAT32RE(b++, k);
     }
 }
 
@@ -180,8 +178,7 @@ void pa_sconv_s16le_from_float32re(unsigned n, const float *a, int16_t *b) {
 
     for (; n > 0; n--) {
         int16_t s;
-        float v = *(a++);
-        v = PA_FLOAT32_SWAP(v) * (1 << 15);
+        float v = PA_READ_FLOAT32RE(a++) * (1 << 15);
         s = (int16_t) PA_CLAMP_UNLIKELY(lrintf(v), -0x8000, 0x7FFF);
         *(b++) = INT16_TO(s);
     }
@@ -193,8 +190,7 @@ void pa_sconv_s32le_from_float32re(unsigned n, const float *a, int32_t *b) {
 
     for (; n > 0; n--) {
         int32_t s;
-        float v = *(a++);
-        v = PA_FLOAT32_SWAP(v) * (1U << 31);
+        float v = PA_READ_FLOAT32RE(a++) * (1U << 31);
         s = (int32_t) PA_CLAMP_UNLIKELY(llrintf(v), -0x80000000LL, 0x7FFFFFFFLL);
         *(b++) = INT32_TO(s);
     }
@@ -325,7 +321,7 @@ void pa_sconv_s24le_to_float32re(unsigned n, const uint8_t *a, float *b) {
     for (; n > 0; n--) {
         int32_t s = READ24(a) << 8;
         float k = s * (1.0f / (1U << 31));
-        *b = PA_FLOAT32_SWAP(k);
+        PA_WRITE_FLOAT32RE(b, k);
         a += 3;
         b++;
     }
@@ -337,8 +333,7 @@ void pa_sconv_s24le_from_float32re(unsigned n, const float *a, uint8_t *b) {
 
     for (; n > 0; n--) {
         int32_t s;
-        float v = *a;
-        v = PA_FLOAT32_SWAP(v) * (1U << 31);
+        float v = PA_READ_FLOAT32RE(a) * (1U << 31);
         s = (int32_t) PA_CLAMP_UNLIKELY(llrint(v), -0x80000000LL, 0x7FFFFFFFLL);
         WRITE24(b, ((uint32_t) s) >> 8);
         a++;
@@ -411,7 +406,7 @@ void pa_sconv_s24_32le_to_float32re(unsigned n, const uint32_t *a, float *b) {
     for (; n > 0; n--) {
         int32_t s = (int32_t) (UINT32_FROM(*a) << 8);
         float k = s * (1.0f / (1U << 31));
-        *b = PA_FLOAT32_SWAP(k);
+        PA_WRITE_FLOAT32RE(b, k);
         a++;
         b++;
     }
@@ -437,8 +432,7 @@ void pa_sconv_s24_32le_from_float32re(unsigned n, const float *a, uint32_t *b) {
 
     for (; n > 0; n--) {
         int32_t s;
-        float v = *a;
-        v = PA_FLOAT32_SWAP(v) * (1U << 31);
+        float v = PA_READ_FLOAT32RE(a) * (1U << 31);
         s = (int32_t) PA_CLAMP_UNLIKELY(llrint(v), -0x80000000LL, 0x7FFFFFFFLL);
         *b = UINT32_TO(((uint32_t) s) >> 8);
         a++;
diff --git a/src/pulsecore/svolume_c.c b/src/pulsecore/svolume_c.c
index ad2df18e4..2e2db1232 100644
--- a/src/pulsecore/svolume_c.c
+++ b/src/pulsecore/svolume_c.c
@@ -125,9 +125,9 @@ static void pa_volume_float32re_c(float *samples, const float *volumes, unsigned
     for (channel = 0; length; length--) {
         float t;
 
-        t = PA_FLOAT32_SWAP(*samples);
+        t = PA_READ_FLOAT32RE(samples);
         t *= volumes[channel];
-        *samples++ = PA_FLOAT32_SWAP(t);
+        PA_WRITE_FLOAT32RE(samples++, t);
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
diff --git a/src/tests/mix-test.c b/src/tests/mix-test.c
index d5bae13ed..e95607dee 100644
--- a/src/tests/mix-test.c
+++ b/src/tests/mix-test.c
@@ -150,7 +150,7 @@ static void compare_block(const pa_sample_spec *ss, const pa_memchunk *chunk, in
             float *u = d;
 
             for (i = 0; i < chunk->length / pa_frame_size(ss); i++) {
-                float uu = PA_MAYBE_FLOAT32_SWAP(ss->format != PA_SAMPLE_FLOAT32NE, *u);
+                float uu = ss->format == PA_SAMPLE_FLOAT32NE ? *u : PA_READ_FLOAT32RE(u);
                 fail_unless(fabsf(uu - *v) <= 1e-6f, NULL);
                 ++u;
                 ++v;
@@ -264,7 +264,7 @@ static pa_memblock* generate_block(pa_mempool *pool, const pa_sample_spec *ss) {
             if (ss->format == PA_SAMPLE_FLOAT32RE) {
                 float *u = d;
                 for (i = 0; i < 10; i++)
-                    u[i] = PA_FLOAT32_SWAP(float32ne_result[0][i]);
+                    PA_WRITE_FLOAT32RE(&u[i], float32ne_result[0][i]);
             } else
                 memcpy(d, float32ne_result[0], sizeof(float32ne_result[0]));
 
diff --git a/src/tests/resampler-test.c b/src/tests/resampler-test.c
index 60345afb3..f547770b8 100644
--- a/src/tests/resampler-test.c
+++ b/src/tests/resampler-test.c
@@ -98,7 +98,7 @@ static void dump_block(const char *label, const pa_sample_spec *ss, const pa_mem
             float *u = d;
 
             for (i = 0; i < chunk->length / pa_frame_size(ss); i++) {
-                printf("%4.3g ", ss->format == PA_SAMPLE_FLOAT32NE ? *u : PA_FLOAT32_SWAP(*u));
+                printf("%4.3g ", ss->format == PA_SAMPLE_FLOAT32NE ? *u : PA_READ_FLOAT32RE(u));
                 u++;
             }
 
@@ -222,7 +222,7 @@ static pa_memblock* generate_block(pa_mempool *pool, const pa_sample_spec *ss) {
 
             if (ss->format == PA_SAMPLE_FLOAT32RE)
                 for (i = 0; i < 10; i++)
-                    u[i] = PA_FLOAT32_SWAP(u[i]);
+                    PA_WRITE_FLOAT32RE(&u[i], u[i]);
 
             break;
         }
author	Peter Meerwald <pmeerw@pmeerw.net>	2014-09-02 23:53:09 +0200
committer	Peter Meerwald <pmeerw@pmeerw.net>	2014-09-04 09:58:20 +0200
commit	293a1739e25707c25321d5bc6cd3e2e8aa96a910 (patch)
tree	804e45dc71c8db4c18fca3143bdc08f180293e08
parent	5d7b5e509ce3a9a5c17f3ee3cec2eee2979e190f (diff)