diff options
author | Wim Taymans <wtaymans@redhat.com> | 2016-03-16 12:55:56 +0100 |
---|---|---|
committer | Wim Taymans <wtaymans@redhat.com> | 2016-03-28 11:40:29 +0200 |
commit | 84ed848998279817cb66a3bd4f8172eb5b19b116 (patch) | |
tree | 83a2878de6193b1478b2d982cf5da985ce1fab14 | |
parent | 303da3ce79853a44421a99267b75a234bc56f5fa (diff) |
audio-resampler: small optimizationsresampler
-rw-r--r-- | gst-libs/gst/audio/audio-resampler-x86.h | 8 | ||||
-rw-r--r-- | gst-libs/gst/audio/audio-resampler.c | 43 |
2 files changed, 25 insertions, 26 deletions
diff --git a/gst-libs/gst/audio/audio-resampler-x86.h b/gst-libs/gst/audio/audio-resampler-x86.h index cb1b854ab..20067fc2d 100644 --- a/gst-libs/gst/audio/audio-resampler-x86.h +++ b/gst-libs/gst/audio/audio-resampler-x86.h @@ -46,7 +46,6 @@ inner_product_gfloat_linear_1_sse (gfloat * o, const gfloat * a, { gint i = 0; __m128 sum[2], t; - __m128 f = _mm_loadu_ps(icoeff); const gfloat *c[2] = {(gfloat*)((gint8*)b + 0*bstride), (gfloat*)((gint8*)b + 1*bstride)}; @@ -60,8 +59,7 @@ inner_product_gfloat_linear_1_sse (gfloat * o, const gfloat * a, sum[0] = _mm_add_ps (sum[0], _mm_mul_ps (t, _mm_load_ps (c[0] + i + 4))); sum[1] = _mm_add_ps (sum[1], _mm_mul_ps (t, _mm_load_ps (c[1] + i + 4))); } - sum[0] = _mm_mul_ps (sum[0], _mm_shuffle_ps (f, f, 0x00)); - sum[1] = _mm_mul_ps (sum[1], _mm_shuffle_ps (f, f, 0x55)); + sum[0] = _mm_mul_ps (_mm_sub_ps (sum[0], sum[1]), _mm_load1_ps (icoeff)); sum[0] = _mm_add_ps (sum[0], sum[1]); sum[0] = _mm_add_ps (sum[0], _mm_movehl_ps (sum[0], sum[0])); sum[0] = _mm_add_ss (sum[0], _mm_shuffle_ps (sum[0], sum[0], 0x55)); @@ -299,7 +297,6 @@ inner_product_gdouble_linear_1_sse2 (gdouble * o, const gdouble * a, { gint i = 0; __m128d sum[2], t; - __m128d f = _mm_loadu_pd (icoeff); const gdouble *c[2] = {(gdouble*)((gint8*)b + 0*bstride), (gdouble*)((gint8*)b + 1*bstride)}; @@ -313,8 +310,7 @@ inner_product_gdouble_linear_1_sse2 (gdouble * o, const gdouble * a, sum[0] = _mm_add_pd (sum[0], _mm_mul_pd (t, _mm_load_pd (c[0] + i + 2))); sum[1] = _mm_add_pd (sum[1], _mm_mul_pd (t, _mm_load_pd (c[1] + i + 2))); } - sum[0] = _mm_mul_pd (sum[0], _mm_shuffle_pd (f, f, _MM_SHUFFLE2 (0, 0))); - sum[1] = _mm_mul_pd (sum[1], _mm_shuffle_pd (f, f, _MM_SHUFFLE2 (1, 1))); + sum[0] = _mm_mul_pd (_mm_sub_pd (sum[0], sum[1]), _mm_load1_pd (icoeff)); sum[0] = _mm_add_pd (sum[0], sum[1]); sum[0] = _mm_add_sd (sum[0], _mm_unpackhi_pd (sum[0], sum[0])); _mm_store_sd (o, sum[0]); diff --git a/gst-libs/gst/audio/audio-resampler.c b/gst-libs/gst/audio/audio-resampler.c index 815cb08ce..695cb8eb3 100644 --- a/gst-libs/gst/audio/audio-resampler.c +++ b/gst-libs/gst/audio/audio-resampler.c @@ -471,15 +471,14 @@ interpolate_##type##_linear_c (gpointer op, const gpointer ap, \ { \ gint i; \ type *o = op, *a = ap, *ic = icp; \ - type2 tmp; \ + type2 tmp, c0 = ic[0]; \ const type *c[2] = {(type*)((gint8*)a + 0*astride), \ (type*)((gint8*)a + 1*astride)}; \ \ for (i = 0; i < len; i++) { \ - tmp = (type2)c[0][i] * (type2)ic[0] + \ - (type2)c[1][i] * (type2)ic[1]; \ - tmp = (tmp + ((type2)1 << ((prec) - 1))) >> (prec); \ - o[i] = CLAMP (tmp, -(limit), (limit) - 1); \ + tmp = ((type2)c[0][i] - (type2)c[1][i]) * c0 + \ + (((type2)c[1][i]) << (prec)); \ + o[i] = (tmp + ((type2)1 << ((prec) - 1))) >> (prec); \ } \ } #define INTERPOLATE_FLOAT_LINEAR_FUNC(type) \ @@ -489,11 +488,12 @@ interpolate_##type##_linear_c (gpointer op, const gpointer ap, \ { \ gint i; \ type *o = op, *a = ap, *ic = icp; \ + type c0 = ic[0]; \ const type *c[2] = {(type*)((gint8*)a + 0*astride), \ (type*)((gint8*)a + 1*astride)}; \ \ for (i = 0; i < len; i++) { \ - o[i] = (c[0][i] - c[1][i]) * ic[0] + c[1][i]; \ + o[i] = (c[0][i] - c[1][i]) * c0 + c[1][i]; \ } \ } @@ -509,17 +509,15 @@ interpolate_##type##_cubic_c (gpointer op, const gpointer ap, \ { \ gint i; \ type *o = op, *a = ap, *ic = icp; \ - type2 tmp; \ + type2 tmp, c0 = ic[0], c1 = ic[1], c2 = ic[2], c3 = ic[3]; \ const type *c[4] = {(type*)((gint8*)a + 0*astride), \ (type*)((gint8*)a + 1*astride), \ (type*)((gint8*)a + 2*astride), \ (type*)((gint8*)a + 3*astride)}; \ \ for (i = 0; i < len; i++) { \ - tmp = (type2)c[0][i] * (type2)ic[0] + \ - (type2)c[1][i] * (type2)ic[1] + \ - (type2)c[2][i] * (type2)ic[2] + \ - (type2)c[3][i] * (type2)ic[3]; \ + tmp = (type2)c[0][i] * c0 + (type2)c[1][i] * c1 + \ + (type2)c[2][i] * c2 + (type2)c[3][i] * c3; \ tmp = (tmp + ((type2)1 << ((prec) - 1))) >> (prec); \ o[i] = CLAMP (tmp, -(limit), (limit) - 1); \ } \ @@ -531,14 +529,15 @@ interpolate_##type##_cubic_c (gpointer op, const gpointer ap, \ { \ gint i; \ type *o = op, *a = ap, *ic = icp; \ + type c0 = ic[0], c1 = ic[1], c2 = ic[2], c3 = ic[3]; \ const type *c[4] = {(type*)((gint8*)a + 0*astride), \ (type*)((gint8*)a + 1*astride), \ (type*)((gint8*)a + 2*astride), \ (type*)((gint8*)a + 3*astride)}; \ \ for (i = 0; i < len; i++) { \ - o[i] = c[0][i] * ic[0] + c[1][i] * ic[1] + \ - c[2][i] * ic[2] + c[3][i] * ic[3]; \ + o[i] = c[0][i] * c0 + c[1][i] * c1 + \ + c[2][i] * c2 + c[3][i] * c3; \ } \ } @@ -736,7 +735,7 @@ inner_product_##type##_linear_1_c (type * o, const type * a, \ const type * b, gint len, const type *ic, gint bstride) \ { \ gint i; \ - type2 res[4] = { 0, 0, 0, 0 }; \ + type2 res[4] = { 0, 0, 0, 0 }, c0 = ic[0]; \ const type *c[2] = {(type*)((gint8*)b + 0*bstride), \ (type*)((gint8*)b + 1*bstride)}; \ \ @@ -748,8 +747,8 @@ inner_product_##type##_linear_1_c (type * o, const type * a, \ } \ res[0] = (res[0] + res[2]) >> (prec); \ res[1] = (res[1] + res[3]) >> (prec); \ - res[0] = (type2)(type)res[0] * (type2) ic[0] + \ - (type2)(type)res[1] * (type2) ic[1]; \ + res[0] = ((type2)(type)res[0] - (type2)(type)res[1]) * c0 + \ + ((type2)(type)res[1] << (prec)); \ res[0] = (res[0] + ((type2)1 << ((prec) - 1))) >> (prec); \ *o = CLAMP (res[0], -(limit), (limit) - 1); \ } @@ -822,8 +821,9 @@ inner_product_##type##_linear_1_c (type * o, const type * a, \ res[2] += a[i + 1] * c[0][i + 1]; \ res[3] += a[i + 1] * c[1][i + 1]; \ } \ - *o = (res[0] + res[2]) * ic[0] + \ - (res[1] + res[3]) * ic[1]; \ + res[0] += res[2]; \ + res[1] += res[3]; \ + *o = (res[0] - res[1]) * ic[0] + res[1]; \ } INNER_PRODUCT_FLOAT_LINEAR_FUNC (gfloat); INNER_PRODUCT_FLOAT_LINEAR_FUNC (gdouble); @@ -1146,14 +1146,15 @@ setup_functions (GstAudioResampler * resampler) fidx = 0; break; case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_LINEAR: - GST_DEBUG ("using linear interpolation filter function"); + GST_DEBUG ("using linear interpolation for filter coefficients"); fidx = 0; break; case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_CUBIC: - GST_DEBUG ("using cubic interpolation filter function"); + GST_DEBUG ("using cubic interpolation for filter coefficients"); fidx = 4; break; } + GST_DEBUG ("using filter interpolate function %d", index + fidx); resampler->interpolate = interpolate_funcs[index + fidx]; switch (resampler->method) { @@ -1169,10 +1170,12 @@ setup_functions (GstAudioResampler * resampler) break; case GST_AUDIO_RESAMPLER_FILTER_MODE_INTERPOLATED: index += 4 + fidx; + GST_DEBUG ("using interpolated filter function"); break; } break; } + GST_DEBUG ("using resample function %d", index); resampler->resample = resample_funcs[index]; } } |