1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
/*
* FLUENDO S.A.
* Copyright (C) <2005 - 2011> <support@fluendo.com>
*/
#if defined(__VFP_FP__) && !defined(__SOFTFP__)
/* mp3_dewindow_output (gfloat *uvec, short *samples, const gfloat* window)
*
* uvec: $r0 | samples: $r1 | window: $r2
*
* uvec[0..31] = uvec [0..31] * window [0..31] + uvec[32..63] * window[32..63] +
* ...
* + uvec[448..479] * window[480..511] + uvec[480..511] * window[480..511]
* samples[0..31] = convert_round_to_short(uvec[0..31])
*
* lr: original fpscr
*/
.SCALE:
.word 1191182336
.C_0dot5:
.word 1056964608
#define CONVERT_TO_INTEGER(v,d) \
fcmpezs v ; \
fmstat; \
faddsgt d, v, s2; /* v > 0 ? d = v + 0.5 */ \
fsubsle d, v, s2; /* v <= 0 ? d = v - 0.5 */ \
ftosizs d, d
.global mp3_dewindow_output ;
mp3_dewindow_output:
stmdb sp!, {r4-r8, fp, lr}; /* save registers to stack */
fmrx lr, fpscr; /* read fpscr register into arm */
mov fp, #7;
orr fp, lr, fp, lsl #16; /* set vector lenght to 8 */
fmxr fpscr, fp;
mov fp, #4; /* main iterator */
flds s1, .SCALE /* load SCALE constant */
flds s2, .C_0dot5 /* load 0.5 constant */
mp3_dewindow_output_loop:
mov r8, r2; /* r8 = &win[0] */
mov r3, r0; /* r3 = &uvec[0] */
fldmias r2!, {s8-s15}; /* win[0..8] */
fldmias r0!, {s16-s23}; /* uvec[0..8] */
fmuls s24, s8, s16; /* s24..s31 = win[0..8] * uvec[0..8] */
mov ip, #15;
mp3_dewindow_output_mac_loop:
add r8, r8, #128 /* r8 = &win[32] */
add r3, r3, #128 /* r3 = &uvec[32] */
fldmias r8, {s8-s15}; /* win[32..39] */
fldmias r3, {s16-s23}; /* uvec[32..39] */
fmacs s24, s8, s16; /* s24..s31 += win[32..39] * uvec[32..39] */
subs ip, ip, #1;
bne mp3_dewindow_output_mac_loop;
/* Scale result */
fmuls s8, s24, s1; /* uvec[0..8] *= SCALE */
/* Write 4 samples */
CONVERT_TO_INTEGER(s8,s4);
CONVERT_TO_INTEGER(s9,s5);
CONVERT_TO_INTEGER(s10,s6);
CONVERT_TO_INTEGER(s11,s7);
fmrrs r4, r5, {s4, s5}
fmrrs r6, r7, {s6, s7}
ssat r4, #16, r4
ssat r5, #16, r5
ssat r6, #16, r6
ssat r7, #16, r7
strh r4, [r1, #0];
strh r5, [r1, #2];
strh r6, [r1, #4];
strh r7, [r1, #6];
add r1, r1, #8;
/* Write 4 samples */
CONVERT_TO_INTEGER(s12,s4);
CONVERT_TO_INTEGER(s13,s5);
CONVERT_TO_INTEGER(s14,s6);
CONVERT_TO_INTEGER(s15,s7);
fmrrs r4, r5, {s4, s5}
fmrrs r6, r7, {s6, s7}
ssat r4, #16, r4
ssat r5, #16, r5
ssat r6, #16, r6
ssat r7, #16, r7
strh r4, [r1, #0];
strh r5, [r1, #2];
strh r6, [r1, #4];
strh r7, [r1, #6];
add r1, r1, #8;
subs fp, fp, #1;
bne mp3_dewindow_output_loop;
fmxr fpscr, lr; /* restore original fpscr */
ldmia sp!, {r4-r8, fp, pc}; /* recovering from stack and return */
#endif /* defined(__VFP_FP__) && !defined(__SOFTFP__) */
|