From bd99c4980f38b8245a9ff58469c80b56d6f1b8dc Mon Sep 17 00:00:00 2001
From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Date: Sat, 22 Sep 2018 17:27:17 -0400
Subject: opus: Force stack alignment on sse optimized functions

This fixes a crash on Win32. Our old GCC 4.7 isn't smart enough to do it
by itself. On Win32, the stack alignment is 4 bytes, while GCC assumes
16 bytes when using __m128 vectors.

https://bugzilla.gnome.org/show_bug.cgi?id=797092
---
 recipes/opus.recipe                                |  2 +
 ...stack-alignment-for-functions-using-__m12.patch | 45 ++++++++++++++++++++++
 2 files changed, 47 insertions(+)
 create mode 100644 recipes/opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch

diff --git a/recipes/opus.recipe b/recipes/opus.recipe
index 36b3d97a..c969e15e 100644
--- a/recipes/opus.recipe
+++ b/recipes/opus.recipe
@@ -8,6 +8,8 @@ class Recipe(recipe.Recipe):
     stype = SourceType.TARBALL
     url = 'http://downloads.xiph.org/releases/{0}/{0}-{1}.tar.gz'.format(name, version)
 
+    patches = ['opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch']
+
     files_libs = ['libopus']
     files_devel = ['include/opus', 'lib/pkgconfig/opus.pc']
     # Starting with v1.1, Opus has special ARM assembly optimizations when
diff --git a/recipes/opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch b/recipes/opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch
new file mode 100644
index 00000000..dd3eed1b
--- /dev/null
+++ b/recipes/opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch
@@ -0,0 +1,45 @@
+From 19cdfbefcf600c878fd55c144c4dea7186fbc395 Mon Sep 17 00:00:00 2001
+From: Nicolas Dufresne <nicolas.dufresne@collabora.com>
+Date: Sat, 22 Sep 2018 17:21:30 -0400
+Subject: [PATCH] celt: Force stack alignment for functions using __m128
+
+We are using an old GCC for Windows 32bit builds, and it assumes the
+stack will be 16bytes aligned when dealing with __m128 which should be
+aligned. As a side effect, when it's not, movaps will crash. This should
+be fixed in newer version of GCC.
+---
+ celt/x86/pitch_sse.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/celt/x86/pitch_sse.c b/celt/x86/pitch_sse.c
+index 20e7312..3eee8da 100644
+--- a/celt/x86/pitch_sse.c
++++ b/celt/x86/pitch_sse.c
+@@ -40,6 +40,16 @@
+ #include <xmmintrin.h>
+ #include "arch.h"
+ 
++#ifdef _WIN32
++void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) __attribute__((force_align_arg_pointer));
++void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
++      int N, opus_val32 *xy1, opus_val32 *xy2) __attribute__((force_align_arg_pointer));
++opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y,
++      int N) __attribute__((force_align_arg_pointer));
++void comb_filter_const_sse(opus_val32 *y, opus_val32 *x, int T, int N,
++      opus_val16 g10, opus_val16 g11, opus_val16 g12) __attribute__((force_align_arg_pointer));
++#endif
++
+ void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
+ {
+    int j;
+@@ -75,7 +85,6 @@ void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4
+    _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
+ }
+ 
+-
+ void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+       int N, opus_val32 *xy1, opus_val32 *xy2)
+ {
+-- 
+2.17.1
+
-- 
cgit v1.2.3