diff options
author | M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> | 2009-01-13 11:10:23 +0200 |
---|---|---|
committer | M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> | 2009-01-13 11:10:23 +0200 |
commit | 724e9b675b314602a3072ce708b2d1679ce05a84 (patch) | |
tree | d4aa5ce4ca5549eee51ad085590551ec607b8553 | |
parent | 342cac39e84d02c11d0ea894cc29086d99c0faed (diff) |
[tester] sync it.
-rw-r--r-- | unpremultiply.c | 77 |
1 files changed, 63 insertions, 14 deletions
diff --git a/unpremultiply.c b/unpremultiply.c index 027477b..2f2b422 100644 --- a/unpremultiply.c +++ b/unpremultiply.c @@ -2,7 +2,7 @@ nasm -g -f elf64 unpremultiply-sse2.S nasm -g -f elf64 unpremultiply-sse2-test.S nasm -g -f elf64 unpremultiply-sse2-float.S -gcc -W -Wall -std=c99 -fomit-frame-pointer -funroll-all-loops -O3 -g -o `basename $0 .c` unpremultiply-sse2*.o $0 +gcc -W -Wall -Wextra -std=c99 -fomit-frame-pointer -funroll-all-loops -O3 -g -o `basename $0 .c` unpremultiply-sse2*.o $0 exit $? */ #include <assert.h> @@ -11,6 +11,9 @@ exit $? #include <stdlib.h> #include <string.h> +#include <sys/types.h> +#include <sys/time.h> + #if 1 # define ASHIFT 24 # define RSHIFT 16 @@ -352,11 +355,29 @@ make_reciprocal_table_B() } static void -saturate(uint32_t *buf, size_t n) +saturate(void *buf, size_t n) { size_t i; + uint8_t *p = buf; + for (i=0; i<n; i++) { - uint32_t rgba = buf[i]; + /* Recycle the pixel via a union to confuse gcc's optimiser + * about the fact that it's accessing uint32_t values. The + * confusion is currently enough to make it doubt the + * alignment of the buf pointer and not let the vectoriser at + * this loop. The new vectoriser aggressively uses aligned + * accesses, and sometimes this loop is called with unaligned + * addresses. */ + union { + uint32_t u32; + uint8_t u8[4]; + } pix; + pix.u8[0] = p[i*4+0]; + pix.u8[1] = p[i*4+1]; + pix.u8[2] = p[i*4+2]; + pix.u8[3] = p[i*4+3]; + + uint32_t rgba = pix.u32; uint32_t a = (rgba >> ASHIFT) & 0xFF; uint32_t r = (rgba >> RSHIFT) & 0xFF; uint32_t g = (rgba >> GSHIFT) & 0xFF; @@ -364,10 +385,15 @@ saturate(uint32_t *buf, size_t n) r = r < a ? r : a; g = g < a ? g : a; b = b < a ? b : a; - buf[i] = (a << ASHIFT) | + pix.u32 = (a << ASHIFT) | (r << RSHIFT) | (g << GSHIFT) | (b << BSHIFT); + + p[i*4 + 0] = pix.u8[0]; + p[i*4 + 1] = pix.u8[1]; + p[i*4 + 2] = pix.u8[2]; + p[i*4 + 3] = pix.u8[3]; } } @@ -401,7 +427,7 @@ fill_solid(uint32_t *buf, size_t n) } static void -fill_empty(uint32_t *buf, size_t n) +fill_empty(void *buf, size_t n) { memset(buf, 0, 4*n); } @@ -413,20 +439,35 @@ getenvlong(char const *name, long default_value) return val ? atol(val) : default_value; } +static double +now_ms() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec*1000.0 + tv.tv_usec/1000.0; +} + int main(int argc, char **argv) { long nloops = getenvlong("loops", 50); size_t n = getenvlong("pixels", 2*1024*1024); - long ofs = getenvlong("offset", 0); - uint32_t *dst = calloc(n, 4); - uint32_t *src = calloc(n, 4); + long offset = getenvlong("offset", 0); + /* non-zero is liable to segfault due to gcc alignment breakage */ + union { + uint32_t *u32; + char *u8; + } udst, usrc; char const *method = "lut"; + uint32_t *ref = NULL; int verify = 0; + double elapsed_ms; int i; - dst = (uint32_t*)((uintptr_t)dst + ofs); - src = (uint32_t*)((uintptr_t)src + ofs); + udst.u8 = calloc(n*4+offset, 1) + offset; + usrc.u8 = calloc(n*4+offset, 1) + offset; +#define dst udst.u32 +#define src usrc.u32 make_division_table(); make_reciprocal_table_A(); @@ -463,7 +504,8 @@ main(int argc, char **argv) 0 == strcmp(argv[i], "sse2-float") || 0 == strcmp(argv[i], "copy") || 0 == strcmp(argv[i], "read") || - 0 == strcmp(argv[i], "write")) + 0 == strcmp(argv[i], "write") || + 0 == strcmp(argv[i], "noop")) { method = argv[i]; } @@ -474,7 +516,12 @@ main(int argc, char **argv) } saturate(src, n); + if (verify) { + ref = malloc(n*4); + unpremultiply_with_div(ref, src, n); + } + elapsed_ms = now_ms(); if (0 == strcmp(method, "div")) { while (nloops-- > 0) { unpremultiply_with_div(dst, src, n); @@ -540,16 +587,18 @@ main(int argc, char **argv) unpremultiply_with_sse2_float(dst, src, n); } } - else { + else if (0 == strcmp(method, "noop")) { + /* do nothing. */ + } else { fprintf(stderr, "unknown method %s\n", method); return 1; } + elapsed_ms = now_ms() - elapsed_ms; + printf("%f\n", elapsed_ms); if (verify) { - uint32_t *ref = malloc(n*4); size_t i; int maxdiff = 0; - unpremultiply_with_div(ref, src, n); for (i=0; i<n; i++) { uint32_t x = dst[i]; uint32_t y = ref[i]; |