summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorM Joonas Pihlaja <jpihlaja@cc.helsinki.fi>2009-01-13 11:33:21 +0200
committerM Joonas Pihlaja <jpihlaja@cc.helsinki.fi>2009-01-13 12:27:23 +0200
commit321f658793fc427d66b877f81036c40518419179 (patch)
tree7aceec9ab28e93ffcfad0dd4782814231c83a89c
parent32ca093f19006b9beb0259237a1a4e1869f15807 (diff)
Fix some typos and misc. cleanup.
-rw-r--r--unpremultiply-sse2-test.S21
1 files changed, 11 insertions, 10 deletions
diff --git a/unpremultiply-sse2-test.S b/unpremultiply-sse2-test.S
index 1fce594..e8bef21 100644
--- a/unpremultiply-sse2-test.S
+++ b/unpremultiply-sse2-test.S
@@ -1,14 +1,17 @@
- section .text
;;;
;;; Unpremultiply routine for SSE2/AMD64.
;;;
+;;; This file exports a function unpremultiply_with_sse2_test() that
+;;; can be used to unpremultiply a contiguous buffer of 32 bit pixels.
+;;;
+ section .text
; We're only using rax-rbp in this file so that
; conversion to 32 bit SSE2 would be easier by
; updating the register names and the
; argument extraction to the calling convention.
-; Location of alpha in a 32 bit pixel.
+; Location of alpha in a 32 bit pixel. Alpha measures opaqueness.
%define ASHIFT 24
;%define ASHIFT 0
@@ -45,8 +48,7 @@ reciprocal_table_D:
%endrep
unpremultiply_single_pixels:
-;; Slower version for the odd pixels at the beginning and
-;; and.
+;; Slower version for the odd pixels at the ends.
;;
;; In:
;; uint32_t *dst/rdi: Destination pixels.
@@ -76,7 +78,7 @@ unpremultiply_single_pixels:
mov ebx, eax
mov ebp, eax ; Initialise result pixel register.
and ebp, 0xFF000000 ; Mask off non-alpha from result pix.
- jz .prolog
+ jz .next
shr ebx, 24 ; Load alpha.
mov ebx, DWORD [reciprocal_table_D + 4*ebx] ; Load reciprocal.
@@ -109,7 +111,7 @@ unpremultiply_single_pixels:
shr eax, 8 ; Shift out alpha.
and ebp, 255 ; Mask off non-alpha.
mov ebx, ebp ; Initialise result pixel.
- jz .prolog
+ jz .next
mov ebx, DWORD [reciprocal_table_D + 4*ebx] ; Load reciprocal.
; Do the component from bits 8..15.
@@ -135,7 +137,7 @@ unpremultiply_single_pixels:
shl eax, 8
or ebp, eax
%endif
-.prolog:
+.next:
; Write the result pixel.
mov [rdi + rcx*4], ebp
@@ -165,7 +167,6 @@ unpremultiply_single_pixels:
;; rsi: rsi + 16*floor(num_pixels/4); advanced past src.
;;
;; Scratched: xmm1-xmm4, rax-rdx, rbx
-
; Advance the src and dst pointers to the end. The bias
; of +-15 is used to have the loop condition trigger an exit
; just before we access the last incomplete block.
@@ -239,7 +240,7 @@ global unpremultiply_with_sse2_test
unpremultiply_with_sse2_test:
;;
-;; void unpremultiply_with_sse2(
+;; void unpremultiply_with_sse2_test(
;; uint32_t *dst/rdi,
;; uint32_t const *src/rsi,
;; ulong n/rdx);
@@ -251,7 +252,7 @@ unpremultiply_with_sse2_test:
push rbp
push rbx
- ; Save start dst for alignment tests later.
+ ; Save start of dst for alignment tests later.
mov rcx, rdi
; If we don't have enough pixels for at least a few iterations