diff options
author | Oded Gabbay <oded.gabbay@gmail.com> | 2015-06-29 15:31:02 +0300 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2015-07-16 16:13:35 +0300 |
commit | 47f74ca94637d79ee66c37a81eea0200e453fcc1 (patch) | |
tree | 6094dca4a6d640d526a66acc7d0f3a256c0bfc01 | |
parent | fcbb97d4458d717b9c15858aedcbee2d33c8ac5a (diff) |
vmx: implement fast path iterator vmx_fetch_x8r8g8b8
It was benchmarked against commid id 2be523b from pixman/master
POWER8, 8 cores, 3.4GHz, RHEL 7.1 ppc64le.
cairo trimmed benchmarks :
Speedups
========
t-firefox-asteroids 533.92 -> 489.94 : 1.09x
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Acked-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
-rw-r--r-- | pixman/pixman-vmx.c | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c index 0950850..773ad76 100644 --- a/pixman/pixman-vmx.c +++ b/pixman/pixman-vmx.c @@ -3105,6 +3105,52 @@ static const pixman_fast_path_t vmx_fast_paths[] = { PIXMAN_OP_NONE }, }; +static uint32_t * +vmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + vector unsigned int ff000000 = mask_ff000000; + uint32_t *dst = iter->buffer; + uint32_t *src = (uint32_t *)iter->bits; + + iter->bits += iter->stride; + + while (w && ((uintptr_t)dst) & 0x0f) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + while (w >= 4) + { + save_128_aligned(dst, vec_or(load_128_unaligned(src), ff000000)); + + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + return iter->buffer; +} + +#define IMAGE_FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + +static const pixman_iter_info_t vmx_iters[] = +{ + { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, vmx_fetch_x8r8g8b8, NULL + }, + { PIXMAN_null }, +}; + pixman_implementation_t * _pixman_implementation_create_vmx (pixman_implementation_t *fallback) { @@ -3147,5 +3193,7 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback) imp->fill = vmx_fill; + imp->iter_info = vmx_iters; + return imp; } |