diff options
author | Oded Gabbay <oded.gabbay@gmail.com> | 2015-06-18 14:56:47 +0300 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2015-07-16 16:13:35 +0300 |
commit | d5b5343c7df99082597e0c37aec937dcf5b6602d (patch) | |
tree | a25193a4ab7ae039fb3d6cfa35d10562831831a9 | |
parent | 339eeaf095f949694d7f79a45171ac03a3b06f90 (diff) |
vmx: implement fast path composite_add_8_8
Copied impl. from sse2 file and edited to use vmx functions
It was benchmarked against commid id 2be523b from pixman/master
POWER8, 16 cores, 3.4GHz, ppc64le :
reference memcpy speed = 27036.4MB/s (6759.1MP/s for 32bpp fills)
Before After Change
---------------------------------------------
L1 687.63 9140.84 +1229.33%
L2 715 7495.78 +948.36%
M 717.39 8460.14 +1079.29%
HT 569.56 1020.12 +79.11%
VT 520.3 1215.56 +133.63%
R 514.81 874.35 +69.84%
RT 341.28 305.42 -10.51%
Kops/s 1621 1579 -2.59%
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Acked-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
-rw-r--r-- | pixman/pixman-vmx.c | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c index 9eae31c..e49e8aa 100644 --- a/pixman/pixman-vmx.c +++ b/pixman/pixman-vmx.c @@ -2714,12 +2714,67 @@ vmx_composite_over_8888_8888 (pixman_implementation_t *imp, } } +static void +vmx_composite_add_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + src = src_line; + + dst_line += dst_stride; + src_line += src_stride; + w = width; + + /* Small head */ + while (w && (uintptr_t)dst & 3) + { + t = (*dst) + (*src++); + *dst++ = t | (0 - (t >> 8)); + w--; + } + + vmx_combine_add_u (imp, op, + (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2); + + /* Small tail */ + dst += w & 0xfffc; + src += w & 0xfffc; + + w &= 3; + + while (w) + { + t = (*dst) + (*src++); + *dst++ = t | (0 - (t >> 8)); + w--; + } + } +} + static const pixman_fast_path_t vmx_fast_paths[] = { PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, vmx_composite_over_8888_8888), + + /* PIXMAN_OP_ADD */ + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8), + { PIXMAN_OP_NONE }, }; |