diff options
author | M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> | 2009-01-13 11:33:21 +0200 |
---|---|---|
committer | M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> | 2009-01-13 12:27:23 +0200 |
commit | 321f658793fc427d66b877f81036c40518419179 (patch) | |
tree | 7aceec9ab28e93ffcfad0dd4782814231c83a89c | |
parent | 32ca093f19006b9beb0259237a1a4e1869f15807 (diff) |
Fix some typos and misc. cleanup.
-rw-r--r-- | unpremultiply-sse2-test.S | 21 |
1 files changed, 11 insertions, 10 deletions
diff --git a/unpremultiply-sse2-test.S b/unpremultiply-sse2-test.S index 1fce594..e8bef21 100644 --- a/unpremultiply-sse2-test.S +++ b/unpremultiply-sse2-test.S @@ -1,14 +1,17 @@ - section .text ;;; ;;; Unpremultiply routine for SSE2/AMD64. ;;; +;;; This file exports a function unpremultiply_with_sse2_test() that +;;; can be used to unpremultiply a contiguous buffer of 32 bit pixels. +;;; + section .text ; We're only using rax-rbp in this file so that ; conversion to 32 bit SSE2 would be easier by ; updating the register names and the ; argument extraction to the calling convention. -; Location of alpha in a 32 bit pixel. +; Location of alpha in a 32 bit pixel. Alpha measures opaqueness. %define ASHIFT 24 ;%define ASHIFT 0 @@ -45,8 +48,7 @@ reciprocal_table_D: %endrep unpremultiply_single_pixels: -;; Slower version for the odd pixels at the beginning and -;; and. +;; Slower version for the odd pixels at the ends. ;; ;; In: ;; uint32_t *dst/rdi: Destination pixels. @@ -76,7 +78,7 @@ unpremultiply_single_pixels: mov ebx, eax mov ebp, eax ; Initialise result pixel register. and ebp, 0xFF000000 ; Mask off non-alpha from result pix. - jz .prolog + jz .next shr ebx, 24 ; Load alpha. mov ebx, DWORD [reciprocal_table_D + 4*ebx] ; Load reciprocal. @@ -109,7 +111,7 @@ unpremultiply_single_pixels: shr eax, 8 ; Shift out alpha. and ebp, 255 ; Mask off non-alpha. mov ebx, ebp ; Initialise result pixel. - jz .prolog + jz .next mov ebx, DWORD [reciprocal_table_D + 4*ebx] ; Load reciprocal. ; Do the component from bits 8..15. @@ -135,7 +137,7 @@ unpremultiply_single_pixels: shl eax, 8 or ebp, eax %endif -.prolog: +.next: ; Write the result pixel. mov [rdi + rcx*4], ebp @@ -165,7 +167,6 @@ unpremultiply_single_pixels: ;; rsi: rsi + 16*floor(num_pixels/4); advanced past src. ;; ;; Scratched: xmm1-xmm4, rax-rdx, rbx - ; Advance the src and dst pointers to the end. The bias ; of +-15 is used to have the loop condition trigger an exit ; just before we access the last incomplete block. @@ -239,7 +240,7 @@ global unpremultiply_with_sse2_test unpremultiply_with_sse2_test: ;; -;; void unpremultiply_with_sse2( +;; void unpremultiply_with_sse2_test( ;; uint32_t *dst/rdi, ;; uint32_t const *src/rsi, ;; ulong n/rdx); @@ -251,7 +252,7 @@ unpremultiply_with_sse2_test: push rbp push rbx - ; Save start dst for alignment tests later. + ; Save start of dst for alignment tests later. mov rcx, rdi ; If we don't have enough pixels for at least a few iterations |