summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--src/Makefile.am5
-rw-r--r--src/arm_vfp_synt.S102
-rw-r--r--src/mp3-c-synth.c13
4 files changed, 124 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 1b49292..89e6a6f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2011-11-20 Josep Torra <josep@fluendo.com>
+
+ * src/Makefile.am:
+ * src/arm_vfp_synt.S:
+ * src/mp3-c-synth.c: (mp3_dewindow_output), (mp3_SubBandSynthesis):
+ Initial attempt to rewrite part of the subband synthesis in ARM/VFP
+ assembly.
+
2011-11-18 Josep Torra <josep@fluendo.com>
* src/mp3-c-synth.c: (mp3_dewindow_output), (mp3_SubBandSynthesis):
diff --git a/src/Makefile.am b/src/Makefile.am
index a51dc49..3637fe9 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -5,6 +5,10 @@ if USE_IPP
SOURCE_FILES += mp3-ipp.c
endif
+if USE_ARM_VFP
+SOURCE_FILES += arm_vfp_synt.S
+endif
+
libgstflump3dec_la_SOURCES = \
flump3dec.c \
bitstream.c \
@@ -35,3 +39,4 @@ libgstflump3dec_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS) $(CPU_TUNE_LDFLAGS)
libgstflump3dec_la_CFLAGS = \
$(GST_ERROR) $(GST_CFLAGS) $(CPU_TUNE_CFLAGS) \
$(IPP_INCLUDES) $(LIBOIL_CFLAGS)
+libgstflump3dec_la_CCASFLAGS = $(CPU_TUNE_CCASFLAGS)
diff --git a/src/arm_vfp_synt.S b/src/arm_vfp_synt.S
new file mode 100644
index 0000000..ada8329
--- /dev/null
+++ b/src/arm_vfp_synt.S
@@ -0,0 +1,102 @@
+/*
+ * FLUENDO S.A.
+ * Copyright (C) <2005 - 2011> <support@fluendo.com>
+ */
+
+#if defined(__VFP_FP__) && !defined(__SOFTFP__)
+
+/* mp3_dewindow_output (gfloat *uvec, short *samples, const gfloat* window)
+ *
+ * uvec: $r0 | samples: $r1 | window: $r2
+ *
+ * uvec[0..31] = uvec [0..31] * window [0..31] + uvec[32..63] * window[32..63] +
+ * ...
+ * + uvec[448..479] * window[480..511] + uvec[480..511] * window[480..511]
+ * samples[0..31] = convert_round_to_short(uvec[0..31])
+ *
+ * lr: original fpscr
+ */
+ .SCALE:
+ .word 1191182336
+ .C_0dot5:
+ .word 1056964608
+
+#define CONVERT_TO_INTEGER(v,d) \
+ fcmpezs v ; \
+ fmstat; \
+ faddsgt d, v, s2; /* v > 0 ? d = v + 0.5 */ \
+ fsubsle d, v, s2; /* v <= 0 ? d = v - 0.5 */ \
+ ftosizs d, d
+
+ .global mp3_dewindow_output ;
+ mp3_dewindow_output:
+ stmdb sp!, {r4-r8, fp, lr}; /* save registers to stack */
+ fmrx lr, fpscr; /* read fpscr register into arm */
+ mov fp, #7;
+ orr fp, lr, fp, lsl #16; /* set vector lenght to 8 */
+ fmxr fpscr, fp;
+ mov fp, #4; /* main iterator */
+ flds s1, .SCALE /* load SCALE constant */
+ flds s2, .C_0dot5 /* load 0.5 constant */
+
+ mp3_dewindow_output_loop:
+ mov r8, r2; /* r8 = &win[0] */
+ mov r3, r0; /* r3 = &uvec[0] */
+ fldmias r2!, {s8-s15}; /* win[0..8] */
+ fldmias r0!, {s16-s23}; /* uvec[0..8] */
+ fmuls s24, s8, s16; /* s24..s31 = win[0..8] * uvec[0..8] */
+
+ mov ip, #15;
+ mp3_dewindow_output_mac_loop:
+ add r8, r8, #128 /* r8 = &win[32] */
+ add r3, r3, #128 /* r3 = &uvec[32] */
+ fldmias r8, {s8-s15}; /* win[32..39] */
+ fldmias r3, {s16-s23}; /* uvec[32..39] */
+ fmacs s24, s8, s16; /* s24..s31 += win[32..39] * uvec[32..39] */
+ subs ip, ip, #1;
+ bne mp3_dewindow_output_mac_loop;
+
+ /* Scale result */
+ fmuls s8, s24, s1; /* uvec[0..8] *= SCALE */
+
+ /* Write 4 samples */
+ CONVERT_TO_INTEGER(s8,s4);
+ CONVERT_TO_INTEGER(s9,s5);
+ CONVERT_TO_INTEGER(s10,s6);
+ CONVERT_TO_INTEGER(s11,s7);
+ fmrrs r4, r5, {s4, s5}
+ fmrrs r6, r7, {s6, s7}
+ ssat r4, #16, r4
+ ssat r5, #16, r5
+ ssat r6, #16, r6
+ ssat r7, #16, r7
+ strh r4, [r1, #0];
+ strh r5, [r1, #2];
+ strh r6, [r1, #4];
+ strh r7, [r1, #6];
+ add r1, r1, #8;
+
+ /* Write 4 samples */
+ CONVERT_TO_INTEGER(s12,s4);
+ CONVERT_TO_INTEGER(s13,s5);
+ CONVERT_TO_INTEGER(s14,s6);
+ CONVERT_TO_INTEGER(s15,s7);
+ fmrrs r4, r5, {s4, s5}
+ fmrrs r6, r7, {s6, s7}
+ ssat r4, #16, r4
+ ssat r5, #16, r5
+ ssat r6, #16, r6
+ ssat r7, #16, r7
+ strh r4, [r1, #0];
+ strh r5, [r1, #2];
+ strh r6, [r1, #4];
+ strh r7, [r1, #6];
+ add r1, r1, #8;
+
+ subs fp, fp, #1;
+ bne mp3_dewindow_output_loop;
+
+ fmxr fpscr, lr; /* restore original fpscr */
+ ldmia sp!, {r4-r8, fp, pc}; /* recovering from stack and return */
+
+#endif /* defined(__VFP_FP__) && !defined(__SOFTFP__) */
diff --git a/src/mp3-c-synth.c b/src/mp3-c-synth.c
index e9ab181..0f37b33 100644
--- a/src/mp3-c-synth.c
+++ b/src/mp3-c-synth.c
@@ -48,15 +48,19 @@ static void MPG_DCT_32 (gfloat in[32], gfloat out[32]);
static void MPG_DCT_16 (gfloat in[16], gfloat out[16]);
static void MPG_DCT_8 (gfloat in[8], gfloat out[8]);
+#if defined(__VFP_FP__) && !defined(__SOFTFP__)
+extern void mp3_dewindow_output (gfloat *u_vec, short *samples,
+ gfloat* window);
+#else
static inline void
-mp3_dewindow_output (gfloat *u_vec, short *samples)
+mp3_dewindow_output (gfloat *u_vec, short *samples, gfloat* window)
{
gint i;
gfloat *u_vec0;
/* dewindowing */
#ifdef USE_LIBOIL
- oil_multiply_f32 (u_vec, u_vec, dewindow, HAN_SIZE);
+ oil_multiply_f32 (u_vec, u_vec, window, HAN_SIZE);
#else
for (i = 0; i < HAN_SIZE; i++)
u_vec[i] *= dewindow[i];
@@ -110,6 +114,7 @@ mp3_dewindow_output (gfloat *u_vec, short *samples)
}
}
}
+#endif
void
mp3_SubBandSynthesis (mp3tl * tl ATTR_UNUSED, frame_params * fr_ps,
@@ -169,9 +174,9 @@ mp3_SubBandSynthesis (mp3tl * tl ATTR_UNUSED, frame_params * fr_ps,
}
#endif
- mp3_dewindow_output (u_vec, samples);
-
fr_ps->bufOffset[channel] = buf_offset;
+
+ mp3_dewindow_output (u_vec, samples, (gfloat*) dewindow);
}
/* Synthesis matrixing variant which uses a 32 point DCT to compute