summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorM Joonas Pihlaja <jpihlaja@cc.helsinki.fi>2009-01-13 11:10:23 +0200
committerM Joonas Pihlaja <jpihlaja@cc.helsinki.fi>2009-01-13 11:10:23 +0200
commit724e9b675b314602a3072ce708b2d1679ce05a84 (patch)
treed4aa5ce4ca5549eee51ad085590551ec607b8553
parent342cac39e84d02c11d0ea894cc29086d99c0faed (diff)
[tester] sync it.
-rw-r--r--unpremultiply.c77
1 files changed, 63 insertions, 14 deletions
diff --git a/unpremultiply.c b/unpremultiply.c
index 027477b..2f2b422 100644
--- a/unpremultiply.c
+++ b/unpremultiply.c
@@ -2,7 +2,7 @@
nasm -g -f elf64 unpremultiply-sse2.S
nasm -g -f elf64 unpremultiply-sse2-test.S
nasm -g -f elf64 unpremultiply-sse2-float.S
-gcc -W -Wall -std=c99 -fomit-frame-pointer -funroll-all-loops -O3 -g -o `basename $0 .c` unpremultiply-sse2*.o $0
+gcc -W -Wall -Wextra -std=c99 -fomit-frame-pointer -funroll-all-loops -O3 -g -o `basename $0 .c` unpremultiply-sse2*.o $0
exit $?
*/
#include <assert.h>
@@ -11,6 +11,9 @@ exit $?
#include <stdlib.h>
#include <string.h>
+#include <sys/types.h>
+#include <sys/time.h>
+
#if 1
# define ASHIFT 24
# define RSHIFT 16
@@ -352,11 +355,29 @@ make_reciprocal_table_B()
}
static void
-saturate(uint32_t *buf, size_t n)
+saturate(void *buf, size_t n)
{
size_t i;
+ uint8_t *p = buf;
+
for (i=0; i<n; i++) {
- uint32_t rgba = buf[i];
+ /* Recycle the pixel via a union to confuse gcc's optimiser
+ * about the fact that it's accessing uint32_t values. The
+ * confusion is currently enough to make it doubt the
+ * alignment of the buf pointer and not let the vectoriser at
+ * this loop. The new vectoriser aggressively uses aligned
+ * accesses, and sometimes this loop is called with unaligned
+ * addresses. */
+ union {
+ uint32_t u32;
+ uint8_t u8[4];
+ } pix;
+ pix.u8[0] = p[i*4+0];
+ pix.u8[1] = p[i*4+1];
+ pix.u8[2] = p[i*4+2];
+ pix.u8[3] = p[i*4+3];
+
+ uint32_t rgba = pix.u32;
uint32_t a = (rgba >> ASHIFT) & 0xFF;
uint32_t r = (rgba >> RSHIFT) & 0xFF;
uint32_t g = (rgba >> GSHIFT) & 0xFF;
@@ -364,10 +385,15 @@ saturate(uint32_t *buf, size_t n)
r = r < a ? r : a;
g = g < a ? g : a;
b = b < a ? b : a;
- buf[i] = (a << ASHIFT) |
+ pix.u32 = (a << ASHIFT) |
(r << RSHIFT) |
(g << GSHIFT) |
(b << BSHIFT);
+
+ p[i*4 + 0] = pix.u8[0];
+ p[i*4 + 1] = pix.u8[1];
+ p[i*4 + 2] = pix.u8[2];
+ p[i*4 + 3] = pix.u8[3];
}
}
@@ -401,7 +427,7 @@ fill_solid(uint32_t *buf, size_t n)
}
static void
-fill_empty(uint32_t *buf, size_t n)
+fill_empty(void *buf, size_t n)
{
memset(buf, 0, 4*n);
}
@@ -413,20 +439,35 @@ getenvlong(char const *name, long default_value)
return val ? atol(val) : default_value;
}
+static double
+now_ms()
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return tv.tv_sec*1000.0 + tv.tv_usec/1000.0;
+}
+
int
main(int argc, char **argv)
{
long nloops = getenvlong("loops", 50);
size_t n = getenvlong("pixels", 2*1024*1024);
- long ofs = getenvlong("offset", 0);
- uint32_t *dst = calloc(n, 4);
- uint32_t *src = calloc(n, 4);
+ long offset = getenvlong("offset", 0);
+ /* non-zero is liable to segfault due to gcc alignment breakage */
+ union {
+ uint32_t *u32;
+ char *u8;
+ } udst, usrc;
char const *method = "lut";
+ uint32_t *ref = NULL;
int verify = 0;
+ double elapsed_ms;
int i;
- dst = (uint32_t*)((uintptr_t)dst + ofs);
- src = (uint32_t*)((uintptr_t)src + ofs);
+ udst.u8 = calloc(n*4+offset, 1) + offset;
+ usrc.u8 = calloc(n*4+offset, 1) + offset;
+#define dst udst.u32
+#define src usrc.u32
make_division_table();
make_reciprocal_table_A();
@@ -463,7 +504,8 @@ main(int argc, char **argv)
0 == strcmp(argv[i], "sse2-float") ||
0 == strcmp(argv[i], "copy") ||
0 == strcmp(argv[i], "read") ||
- 0 == strcmp(argv[i], "write"))
+ 0 == strcmp(argv[i], "write") ||
+ 0 == strcmp(argv[i], "noop"))
{
method = argv[i];
}
@@ -474,7 +516,12 @@ main(int argc, char **argv)
}
saturate(src, n);
+ if (verify) {
+ ref = malloc(n*4);
+ unpremultiply_with_div(ref, src, n);
+ }
+ elapsed_ms = now_ms();
if (0 == strcmp(method, "div")) {
while (nloops-- > 0) {
unpremultiply_with_div(dst, src, n);
@@ -540,16 +587,18 @@ main(int argc, char **argv)
unpremultiply_with_sse2_float(dst, src, n);
}
}
- else {
+ else if (0 == strcmp(method, "noop")) {
+ /* do nothing. */
+ } else {
fprintf(stderr, "unknown method %s\n", method);
return 1;
}
+ elapsed_ms = now_ms() - elapsed_ms;
+ printf("%f\n", elapsed_ms);
if (verify) {
- uint32_t *ref = malloc(n*4);
size_t i;
int maxdiff = 0;
- unpremultiply_with_div(ref, src, n);
for (i=0; i<n; i++) {
uint32_t x = dst[i];
uint32_t y = ref[i];