diff options
author | Oded Gabbay <oded.gabbay@gmail.com> | 2015-06-28 09:42:19 +0300 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2015-07-16 16:13:35 +0300 |
commit | 0cc8a2e9714efcb7cdd7e2a94c9cba49c3e29e00 (patch) | |
tree | 82197e7adb8fdecfa9bdded888993cad105649f8 | |
parent | c12ee95089e7d281a29a24bf56b81f5c16dec6ee (diff) |
vmx: implement fast path vmx_fill
Based on sse2 impl.
It was benchmarked against commid id e2d211a from pixman/master
Tested cairo trimmed benchmarks on POWER8, 8 cores, 3.4GHz,
RHEL 7.1 ppc64le :
speedups
========
t-swfdec-giant-steps 1383.09 -> 718.63 : 1.92x speedup
t-gnome-system-monitor 1403.53 -> 918.77 : 1.53x speedup
t-evolution 552.34 -> 415.24 : 1.33x speedup
t-xfce4-terminal-a1 1573.97 -> 1351.46 : 1.16x speedup
t-firefox-paintball 847.87 -> 734.50 : 1.15x speedup
t-firefox-asteroids 565.99 -> 492.77 : 1.15x speedup
t-firefox-canvas-swscroll 1656.87 -> 1447.48 : 1.14x speedup
t-midori-zoomed 724.73 -> 642.16 : 1.13x speedup
t-firefox-planet-gnome 975.78 -> 911.92 : 1.07x speedup
t-chromium-tabs 292.12 -> 274.74 : 1.06x speedup
t-firefox-chalkboard 690.78 -> 653.93 : 1.06x speedup
t-firefox-talos-gfx 1375.30 -> 1303.74 : 1.05x speedup
t-firefox-canvas-alpha 1016.79 -> 967.24 : 1.05x speedup
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Acked-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
-rw-r--r-- | pixman/pixman-vmx.c | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c index 39d1a06..61fdb80 100644 --- a/pixman/pixman-vmx.c +++ b/pixman/pixman-vmx.c @@ -2537,6 +2537,157 @@ vmx_combine_add_ca (pixman_implementation_t *imp, } } +static pixman_bool_t +vmx_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + uint32_t byte_width; + uint8_t *byte_line; + + vector unsigned int vfiller; + + if (bpp == 8) + { + uint8_t b; + uint16_t w; + + stride = stride * (int) sizeof (uint32_t) / 1; + byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); + byte_width = width; + stride *= 1; + + b = filler & 0xff; + w = (b << 8) | b; + filler = (w << 16) | w; + } + else if (bpp == 16) + { + stride = stride * (int) sizeof (uint32_t) / 2; + byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); + byte_width = 2 * width; + stride *= 2; + + filler = (filler & 0xffff) * 0x00010001; + } + else if (bpp == 32) + { + stride = stride * (int) sizeof (uint32_t) / 4; + byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); + byte_width = 4 * width; + stride *= 4; + } + else + { + return FALSE; + } + + vfiller = create_mask_1x32_128(&filler); + + while (height--) + { + int w; + uint8_t *d = byte_line; + byte_line += stride; + w = byte_width; + + if (w >= 1 && ((uintptr_t)d & 1)) + { + *(uint8_t *)d = filler; + w -= 1; + d += 1; + } + + while (w >= 2 && ((uintptr_t)d & 3)) + { + *(uint16_t *)d = filler; + w -= 2; + d += 2; + } + + while (w >= 4 && ((uintptr_t)d & 15)) + { + *(uint32_t *)d = filler; + + w -= 4; + d += 4; + } + + while (w >= 128) + { + vec_st(vfiller, 0, (uint32_t *) d); + vec_st(vfiller, 0, (uint32_t *) d + 4); + vec_st(vfiller, 0, (uint32_t *) d + 8); + vec_st(vfiller, 0, (uint32_t *) d + 12); + vec_st(vfiller, 0, (uint32_t *) d + 16); + vec_st(vfiller, 0, (uint32_t *) d + 20); + vec_st(vfiller, 0, (uint32_t *) d + 24); + vec_st(vfiller, 0, (uint32_t *) d + 28); + + d += 128; + w -= 128; + } + + if (w >= 64) + { + vec_st(vfiller, 0, (uint32_t *) d); + vec_st(vfiller, 0, (uint32_t *) d + 4); + vec_st(vfiller, 0, (uint32_t *) d + 8); + vec_st(vfiller, 0, (uint32_t *) d + 12); + + d += 64; + w -= 64; + } + + if (w >= 32) + { + vec_st(vfiller, 0, (uint32_t *) d); + vec_st(vfiller, 0, (uint32_t *) d + 4); + + d += 32; + w -= 32; + } + + if (w >= 16) + { + vec_st(vfiller, 0, (uint32_t *) d); + + d += 16; + w -= 16; + } + + while (w >= 4) + { + *(uint32_t *)d = filler; + + w -= 4; + d += 4; + } + + if (w >= 2) + { + *(uint16_t *)d = filler; + w -= 2; + d += 2; + } + + if (w >= 1) + { + *(uint8_t *)d = filler; + w -= 1; + d += 1; + } + } + + return TRUE; +} + static const pixman_fast_path_t vmx_fast_paths[] = { { PIXMAN_OP_NONE }, @@ -2582,5 +2733,7 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback) imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; + imp->fill = vmx_fill; + return imp; } |