From bd99c4980f38b8245a9ff58469c80b56d6f1b8dc Mon Sep 17 00:00:00 2001 From: Nicolas Dufresne Date: Sat, 22 Sep 2018 17:27:17 -0400 Subject: opus: Force stack alignment on sse optimized functions This fixes a crash on Win32. Our old GCC 4.7 isn't smart enough to do it by itself. On Win32, the stack alignment is 4 bytes, while GCC assumes 16 bytes when using __m128 vectors. https://bugzilla.gnome.org/show_bug.cgi?id=797092 --- recipes/opus.recipe | 2 + ...stack-alignment-for-functions-using-__m12.patch | 45 ++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 recipes/opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch diff --git a/recipes/opus.recipe b/recipes/opus.recipe index 36b3d97a..c969e15e 100644 --- a/recipes/opus.recipe +++ b/recipes/opus.recipe @@ -8,6 +8,8 @@ class Recipe(recipe.Recipe): stype = SourceType.TARBALL url = 'http://downloads.xiph.org/releases/{0}/{0}-{1}.tar.gz'.format(name, version) + patches = ['opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch'] + files_libs = ['libopus'] files_devel = ['include/opus', 'lib/pkgconfig/opus.pc'] # Starting with v1.1, Opus has special ARM assembly optimizations when diff --git a/recipes/opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch b/recipes/opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch new file mode 100644 index 00000000..dd3eed1b --- /dev/null +++ b/recipes/opus/0001-celt-Force-stack-alignment-for-functions-using-__m12.patch @@ -0,0 +1,45 @@ +From 19cdfbefcf600c878fd55c144c4dea7186fbc395 Mon Sep 17 00:00:00 2001 +From: Nicolas Dufresne +Date: Sat, 22 Sep 2018 17:21:30 -0400 +Subject: [PATCH] celt: Force stack alignment for functions using __m128 + +We are using an old GCC for Windows 32bit builds, and it assumes the +stack will be 16bytes aligned when dealing with __m128 which should be +aligned. As a side effect, when it's not, movaps will crash. This should +be fixed in newer version of GCC. +--- + celt/x86/pitch_sse.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/celt/x86/pitch_sse.c b/celt/x86/pitch_sse.c +index 20e7312..3eee8da 100644 +--- a/celt/x86/pitch_sse.c ++++ b/celt/x86/pitch_sse.c +@@ -40,6 +40,16 @@ + #include + #include "arch.h" + ++#ifdef _WIN32 ++void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) __attribute__((force_align_arg_pointer)); ++void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, ++ int N, opus_val32 *xy1, opus_val32 *xy2) __attribute__((force_align_arg_pointer)); ++opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y, ++ int N) __attribute__((force_align_arg_pointer)); ++void comb_filter_const_sse(opus_val32 *y, opus_val32 *x, int T, int N, ++ opus_val16 g10, opus_val16 g11, opus_val16 g12) __attribute__((force_align_arg_pointer)); ++#endif ++ + void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) + { + int j; +@@ -75,7 +85,6 @@ void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4 + _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2)); + } + +- + void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, + int N, opus_val32 *xy1, opus_val32 *xy2) + { +-- +2.17.1 + -- cgit v1.2.3