summaryrefslogtreecommitdiff
path: root/src/arm_vfp_synt.S
blob: ada8329203c9a883b0369472c6c44b9cc591ce83 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*
 * FLUENDO S.A.
 * Copyright (C) <2005 - 2011>  <support@fluendo.com>
 */

#if defined(__VFP_FP__) && !defined(__SOFTFP__)

/* mp3_dewindow_output (gfloat *uvec, short *samples, const gfloat* window)
 *
 * uvec: $r0    | samples: $r1 | window: $r2
 *
 * uvec[0..31] = uvec [0..31] * window [0..31] + uvec[32..63] * window[32..63] +
 * ...
 * + uvec[448..479] * window[480..511] + uvec[480..511] * window[480..511]
 * samples[0..31] = convert_round_to_short(uvec[0..31]) 
 *
 * lr: original fpscr
 */
  .SCALE:
  .word 1191182336
  .C_0dot5:
  .word 1056964608

#define CONVERT_TO_INTEGER(v,d)                                               \
    fcmpezs       v ;                                                         \
    fmstat;                                                                   \
    faddsgt       d, v, s2;  /* v > 0 ? d = v + 0.5 */                        \
    fsubsle       d, v, s2;  /* v <= 0 ? d = v - 0.5 */                       \
    ftosizs       d, d

  .global mp3_dewindow_output ;
  mp3_dewindow_output:
    stmdb         sp!, {r4-r8, fp, lr}; /* save registers to stack */
    fmrx          lr, fpscr;            /* read fpscr register into arm */
    mov           fp, #7;
    orr           fp, lr, fp, lsl #16;  /* set vector lenght to 8 */
    fmxr          fpscr, fp;
    mov           fp, #4;               /* main iterator */
    flds          s1, .SCALE            /* load SCALE constant */
    flds          s2, .C_0dot5          /* load 0.5 constant */

  mp3_dewindow_output_loop:
    mov           r8, r2;               /* r8 = &win[0] */
    mov           r3, r0;               /* r3 = &uvec[0] */
    fldmias       r2!, {s8-s15};        /* win[0..8] */
    fldmias       r0!, {s16-s23};       /* uvec[0..8] */
    fmuls         s24, s8, s16;         /* s24..s31 = win[0..8] * uvec[0..8] */

    mov           ip, #15;
  mp3_dewindow_output_mac_loop:
    add           r8, r8, #128          /* r8 = &win[32] */
    add           r3, r3, #128          /* r3 = &uvec[32] */
    fldmias       r8, {s8-s15};         /* win[32..39] */
    fldmias       r3, {s16-s23};        /* uvec[32..39] */
    fmacs         s24, s8, s16;         /* s24..s31 += win[32..39] * uvec[32..39] */
    subs          ip, ip, #1;
    bne           mp3_dewindow_output_mac_loop;

    /* Scale result */
    fmuls         s8, s24, s1;          /* uvec[0..8] *= SCALE */

    /* Write 4 samples */
    CONVERT_TO_INTEGER(s8,s4);
    CONVERT_TO_INTEGER(s9,s5);
    CONVERT_TO_INTEGER(s10,s6);
    CONVERT_TO_INTEGER(s11,s7);
    fmrrs         r4, r5, {s4, s5}
    fmrrs         r6, r7, {s6, s7}
    ssat          r4, #16, r4
    ssat          r5, #16, r5
    ssat          r6, #16, r6
    ssat          r7, #16, r7
    strh          r4, [r1, #0];
    strh          r5, [r1, #2];
    strh          r6, [r1, #4];
    strh          r7, [r1, #6];
    add           r1, r1, #8;

    /* Write 4 samples */
    CONVERT_TO_INTEGER(s12,s4);
    CONVERT_TO_INTEGER(s13,s5);
    CONVERT_TO_INTEGER(s14,s6);
    CONVERT_TO_INTEGER(s15,s7);
    fmrrs         r4, r5, {s4, s5}
    fmrrs         r6, r7, {s6, s7}
    ssat          r4, #16, r4
    ssat          r5, #16, r5
    ssat          r6, #16, r6
    ssat          r7, #16, r7
    strh          r4, [r1, #0];
    strh          r5, [r1, #2];
    strh          r6, [r1, #4];
    strh          r7, [r1, #6];
    add           r1, r1, #8;

    subs          fp, fp, #1;
    bne           mp3_dewindow_output_loop;

    fmxr          fpscr, lr;            /* restore original fpscr */
    ldmia         sp!, {r4-r8, fp, pc}; /* recovering from stack and return */   

#endif /* defined(__VFP_FP__) && !defined(__SOFTFP__) */