opus: Force stack alignment on sse optimized functions

This fixes a crash on Win32. Our old GCC 4.7 isn't smart enough to do it by itself. On Win32, the stack alignment is 4 bytes, while GCC assumes 16 bytes when using __m128 vectors. Fixes https://gitlab.freedesktop.org/gstreamer/gst-plugins-base/issues/582 Related to https://bugzilla.gnome.org/show_bug.cgi?id=797092
author: Nicolas Dufresne <nicolas.dufresne@collabora.com> 2018-09-22 17:27:17 -0400
committer: Nicolas Dufresne <nicolas.dufresne@collabora.com> 2019-04-07 16:31:13 -0400
commit: 27c3b2f53a4461a589173bd24627f95b79a11758 (patch)
tree: 80409bf60298c5a62983425247603dff02bd8c27
parent: 70098b0d2f727f41f48545b1bbe78da0142c97a2 (diff)
2 files changed, 47 insertions, 0 deletions
diff --git a/recipes/opus.recipe b/recipes/opus.recipe
index 7cf592de..e3c78877 100644
--- a/recipes/opus.recipe
+++ b/recipes/opus.recipe
@@ -9,6 +9,8 @@ class Recipe(recipe.Recipe):
     url = 'xiph://.tar.gz'
     tarball_checksum = '4f3d69aefdf2dbaf9825408e452a8a414ffc60494c70633560700398820dc550'
 
+    patches = ['opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch']
+
     files_libs = ['libopus']
     files_devel = ['include/opus', 'lib/pkgconfig/opus.pc']
     # Starting with v1.1, Opus has special ARM assembly optimizations when
diff --git a/recipes/opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch b/recipes/opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch
new file mode 100644
index 00000000..dd3eed1b
--- /dev/null
+++ b/recipes/opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch
@@ -0,0 +1,45 @@
+From 19cdfbefcf600c878fd55c144c4dea7186fbc395 Mon Sep 17 00:00:00 2001
+From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
+Date: Sat, 22 Sep 2018 17:21:30 -0400
+Subject: [PATCH] celt: Force stack alignment for functions using __m128
+
+We are using an old GCC for Windows 32bit builds, and it assumes the
+stack will be 16bytes aligned when dealing with __m128 which should be
+aligned. As a side effect, when it's not, movaps will crash. This should
+be fixed in newer version of GCC.
+---
+ celt/x86/pitch_sse.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/celt/x86/pitch_sse.c b/celt/x86/pitch_sse.c
+index 20e7312..3eee8da 100644
+--- a/celt/x86/pitch_sse.c
++++ b/celt/x86/pitch_sse.c
+@@ -40,6 +40,16 @@
+ #include <xmmintrin.h>
+ #include "arch.h"
+ 
++#ifdef _WIN32
++void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) __attribute__((force_align_arg_pointer));
++void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
++      int N, opus_val32 *xy1, opus_val32 *xy2) __attribute__((force_align_arg_pointer));
++opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y,
++      int N) __attribute__((force_align_arg_pointer));
++void comb_filter_const_sse(opus_val32 *y, opus_val32 *x, int T, int N,
++      opus_val16 g10, opus_val16 g11, opus_val16 g12) __attribute__((force_align_arg_pointer));
++#endif
++
+ void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
+ {
+    int j;
+@@ -75,7 +85,6 @@ void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4
+    _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
+ }
+ 
+-
+ void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+       int N, opus_val32 *xy1, opus_val32 *xy2)
+ {
+-- 
+2.17.1
+
author	Nicolas Dufresne <nicolas.dufresne@collabora.com>	2018-09-22 17:27:17 -0400
committer	Nicolas Dufresne <nicolas.dufresne@collabora.com>	2019-04-07 16:31:13 -0400
commit	27c3b2f53a4461a589173bd24627f95b79a11758 (patch)
tree	80409bf60298c5a62983425247603dff02bd8c27
parent	70098b0d2f727f41f48545b1bbe78da0142c97a2 (diff)