summaryrefslogtreecommitdiff
path: root/patches/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S
blob: fe1d9be57f8930413748e14f2bf0b9f44196eb68 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
	.file	"nr-compose.c"

# Ensure Inkscape is execshield protected
	.section .note.GNU-stack
	.previous
	
	.text
	.align 2
.globl nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP
	.type	nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP,@function

/*
 * This code is in public domain
 *
 * c	 32(%ebp)
 * srs	 28(%ebp)
 * spx	 24(%ebp)
 * rs	 20(%ebp)
 * h	 16(%ebp)
 * w	 12(%ebp)
 * px	 8(%ebp)
 * r	-8(%ebp)
 * g	-12(%ebp)
 * b	-16(%ebp)
 * a	-20(%ebp)
 * s	-24(%ebp) -> %esi
 * d	-28(%ebp) -> %edi
 * x	-32(%ebp) -> %ebx
 * y	-36(%ebp)
 * ca	-40(%ebp)
 *
 * mm0 Fg
 * mm1 MMMM
 * mm2 FgM
 * mm3
 * mm4
 * mm5 255
 * mm6 128
 * mm7 0
 *
*/

nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%ebx
	subl	$36, %esp
	pushl	%edi
	pushl	%esi

/* Load %mm7 with [0 0 0 0] */
	movl	$0, %eax
	movd	%eax, %mm7

/* Load %mm6 with [128 128 128 128] */
	movl	$0x80808080, %eax
	movd	%eax, %mm6
	punpcklbw %mm7, %mm6

/* Load %mm5 with [255 255 255 255] */
	movl	$0xffffffff, %eax
	movd	%eax, %mm5
	punpcklbw %mm7, %mm5

/* FgC -> %mm0 */
	movl	32(%ebp), %eax
	movd	(%eax), %mm0
	punpcklbw %mm7, %mm0

/* Check full opacity */
	cmpb	$0xff, %al
	jz	.opaque

/* for (y = ...) */
	movl    16(%ebp), %ecx
.fory:

/* d = px */
/* s = spx */
	movl	8(%ebp), %edi
	movl	24(%ebp), %esi

/* for (x = ...) */
	movl	12(%ebp), %ebx
.forx:

/* [m m m m] -> %mm1 */
	movzbl	(%esi), %eax
	testb	$0xff, %al
	jz	.clip
	movd	%eax, %mm1
	punpcklwd %mm1, %mm1
	punpckldq %mm1, %mm1

/* Fg -> mm2 */
	movq	%mm0, %mm2
	pmullw	%mm1, %mm2
	paddw	%mm6, %mm2
	movq	%mm2, %mm3
	psrlw	$8, %mm3
	paddw	%mm3, %mm2
	psrlw	$8, %mm2

/* [255 - FgA] -> mm1 */
	movq	%mm2, %mm1
	punpckhwd %mm1, %mm1
	punpckhdq %mm1, %mm1
	pxor	%mm5, %mm1

/* Bg -> mm3 */
	movd	(%edi), %mm3
	punpcklbw %mm7, %mm3

/* Fg + ((255 - FgA) * Bg) / 255 */
	pmullw	%mm1, %mm3
	paddw	%mm6, %mm3
	movq	%mm3, %mm4
	psrlw	$8, %mm4
	paddw	%mm4, %mm3
	psrlw	$8, %mm3
	paddw	%mm2, %mm3

/* Store pixel */
	packuswb %mm3, %mm3
	movd	%mm3, (%edi)

.clip:
	addl	$4, %edi
	incl	%esi

	decl	%ebx
	jnz	.forx

	movl	20(%ebp), %eax
	addl	%eax, 8(%ebp)
	movl	28(%ebp), %eax
	addl	%eax, 24(%ebp)

	decl	%ecx
	jnz	.fory

.exit:
	emms
	popl	%esi
	popl	%edi
	addl	$36, %esp
	popl	%ebx
	popl	%ebp
	ret

.opaque:
/* for (y = ...) */
	movl    16(%ebp), %ecx
.o_fory:

/* d = px */
/* s = spx */
	movl	8(%ebp), %edi
	movl	24(%ebp), %esi

/* for (x = ...) */
	movl	12(%ebp), %ebx
.o_forx:

/* [m m m m] -> %mm1 */
	movzbl	(%esi), %eax
	testb	$0xff, %al
	jz	.o_clip
	cmpb	$0xff, %al
	jz	.o_full
	movd	%eax, %mm1
	punpcklwd %mm1, %mm1
	punpckldq %mm1, %mm1

/* Fg -> mm2 */
	movq	%mm0, %mm2
	pmullw	%mm1, %mm2
	paddw	%mm6, %mm2
	movq	%mm2, %mm3
	psrlw	$8, %mm3
	paddw	%mm3, %mm2
	psrlw	$8, %mm2

/* [255 - FgA] -> mm1 */
	movq	%mm2, %mm1
	punpckhwd %mm1, %mm1
	punpckhdq %mm1, %mm1
	pxor	%mm5, %mm1

/* Bg -> mm3 */
	movd	(%edi), %mm3
	punpcklbw %mm7, %mm3

/* Fg + ((255 - FgA) * Bg) / 255 */
	pmullw	%mm1, %mm3
	paddw	%mm6, %mm3
	movq	%mm3, %mm4
	psrlw	$8, %mm4
	paddw	%mm4, %mm3
	psrlw	$8, %mm3
	paddw	%mm2, %mm3

	jmp	.o_store

.o_full:
	movq	%mm0, %mm3

.o_store:	
/* Store pixel */
	packuswb %mm3, %mm3
	movd	%mm3, (%edi)

.o_clip:
	addl	$4, %edi
	incl	%esi

	decl	%ebx
	jnz	.o_forx

	movl	20(%ebp), %eax
	addl	%eax, 8(%ebp)
	movl	28(%ebp), %eax
	addl	%eax, 24(%ebp)

	decl	%ecx
	jnz	.o_fory
	jmp	.exit

.Lfe1:
	.size	nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP,.Lfe1-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP
	.ident	"GCC: (GNU) 3.2"