1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
|
.file "nr-compose.c"
# Ensure Inkscape is execshield protected
.section .note.GNU-stack
.previous
.text
.align 2
.globl nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP
.type nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP,@function
/*
* This code is in public domain
*
* c 32(%ebp)
* srs 28(%ebp)
* spx 24(%ebp)
* rs 20(%ebp)
* h 16(%ebp)
* w 12(%ebp)
* px 8(%ebp)
* r -8(%ebp)
* g -12(%ebp)
* b -16(%ebp)
* a -20(%ebp)
* s -24(%ebp) -> %esi
* d -28(%ebp) -> %edi
* x -32(%ebp) -> %ebx
* y -36(%ebp)
* ca -40(%ebp)
*
* mm0 Fg
* mm1 MMMM
* mm2 FgM
* mm3
* mm4
* mm5 255
* mm6 128
* mm7 0
*
*/
nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP:
pushl %ebp
movl %esp, %ebp
pushl %ebx
subl $36, %esp
pushl %edi
pushl %esi
/* Load %mm7 with [0 0 0 0] */
movl $0, %eax
movd %eax, %mm7
/* Load %mm6 with [128 128 128 128] */
movl $0x80808080, %eax
movd %eax, %mm6
punpcklbw %mm7, %mm6
/* Load %mm5 with [255 255 255 255] */
movl $0xffffffff, %eax
movd %eax, %mm5
punpcklbw %mm7, %mm5
/* FgC -> %mm0 */
movl 32(%ebp), %eax
movd (%eax), %mm0
punpcklbw %mm7, %mm0
/* Check full opacity */
cmpb $0xff, %al
jz .opaque
/* for (y = ...) */
movl 16(%ebp), %ecx
.fory:
/* d = px */
/* s = spx */
movl 8(%ebp), %edi
movl 24(%ebp), %esi
/* for (x = ...) */
movl 12(%ebp), %ebx
.forx:
/* [m m m m] -> %mm1 */
movzbl (%esi), %eax
testb $0xff, %al
jz .clip
movd %eax, %mm1
punpcklwd %mm1, %mm1
punpckldq %mm1, %mm1
/* Fg -> mm2 */
movq %mm0, %mm2
pmullw %mm1, %mm2
paddw %mm6, %mm2
movq %mm2, %mm3
psrlw $8, %mm3
paddw %mm3, %mm2
psrlw $8, %mm2
/* [255 - FgA] -> mm1 */
movq %mm2, %mm1
punpckhwd %mm1, %mm1
punpckhdq %mm1, %mm1
pxor %mm5, %mm1
/* Bg -> mm3 */
movd (%edi), %mm3
punpcklbw %mm7, %mm3
/* Fg + ((255 - FgA) * Bg) / 255 */
pmullw %mm1, %mm3
paddw %mm6, %mm3
movq %mm3, %mm4
psrlw $8, %mm4
paddw %mm4, %mm3
psrlw $8, %mm3
paddw %mm2, %mm3
/* Store pixel */
packuswb %mm3, %mm3
movd %mm3, (%edi)
.clip:
addl $4, %edi
incl %esi
decl %ebx
jnz .forx
movl 20(%ebp), %eax
addl %eax, 8(%ebp)
movl 28(%ebp), %eax
addl %eax, 24(%ebp)
decl %ecx
jnz .fory
.exit:
emms
popl %esi
popl %edi
addl $36, %esp
popl %ebx
popl %ebp
ret
.opaque:
/* for (y = ...) */
movl 16(%ebp), %ecx
.o_fory:
/* d = px */
/* s = spx */
movl 8(%ebp), %edi
movl 24(%ebp), %esi
/* for (x = ...) */
movl 12(%ebp), %ebx
.o_forx:
/* [m m m m] -> %mm1 */
movzbl (%esi), %eax
testb $0xff, %al
jz .o_clip
cmpb $0xff, %al
jz .o_full
movd %eax, %mm1
punpcklwd %mm1, %mm1
punpckldq %mm1, %mm1
/* Fg -> mm2 */
movq %mm0, %mm2
pmullw %mm1, %mm2
paddw %mm6, %mm2
movq %mm2, %mm3
psrlw $8, %mm3
paddw %mm3, %mm2
psrlw $8, %mm2
/* [255 - FgA] -> mm1 */
movq %mm2, %mm1
punpckhwd %mm1, %mm1
punpckhdq %mm1, %mm1
pxor %mm5, %mm1
/* Bg -> mm3 */
movd (%edi), %mm3
punpcklbw %mm7, %mm3
/* Fg + ((255 - FgA) * Bg) / 255 */
pmullw %mm1, %mm3
paddw %mm6, %mm3
movq %mm3, %mm4
psrlw $8, %mm4
paddw %mm4, %mm3
psrlw $8, %mm3
paddw %mm2, %mm3
jmp .o_store
.o_full:
movq %mm0, %mm3
.o_store:
/* Store pixel */
packuswb %mm3, %mm3
movd %mm3, (%edi)
.o_clip:
addl $4, %edi
incl %esi
decl %ebx
jnz .o_forx
movl 20(%ebp), %eax
addl %eax, 8(%ebp)
movl 28(%ebp), %eax
addl %eax, 24(%ebp)
decl %ecx
jnz .o_fory
jmp .exit
.Lfe1:
.size nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP,.Lfe1-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP
.ident "GCC: (GNU) 3.2"
|