summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOded Gabbay <oded.gabbay@gmail.com>2015-06-18 14:56:47 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2015-07-16 16:13:35 +0300
commitd5b5343c7df99082597e0c37aec937dcf5b6602d (patch)
treea25193a4ab7ae039fb3d6cfa35d10562831831a9
parent339eeaf095f949694d7f79a45171ac03a3b06f90 (diff)
vmx: implement fast path composite_add_8_8
Copied impl. from sse2 file and edited to use vmx functions It was benchmarked against commid id 2be523b from pixman/master POWER8, 16 cores, 3.4GHz, ppc64le : reference memcpy speed = 27036.4MB/s (6759.1MP/s for 32bpp fills) Before After Change --------------------------------------------- L1 687.63 9140.84 +1229.33% L2 715 7495.78 +948.36% M 717.39 8460.14 +1079.29% HT 569.56 1020.12 +79.11% VT 520.3 1215.56 +133.63% R 514.81 874.35 +69.84% RT 341.28 305.42 -10.51% Kops/s 1621 1579 -2.59% Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Acked-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
-rw-r--r--pixman/pixman-vmx.c55
1 files changed, 55 insertions, 0 deletions
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 9eae31c8..e49e8aa7 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -2714,12 +2714,67 @@ vmx_composite_over_8888_8888 (pixman_implementation_t *imp,
}
}
+static void
+vmx_composite_add_8_8 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint8_t *dst_line, *dst;
+ uint8_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+ uint16_t t;
+
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ src = src_line;
+
+ dst_line += dst_stride;
+ src_line += src_stride;
+ w = width;
+
+ /* Small head */
+ while (w && (uintptr_t)dst & 3)
+ {
+ t = (*dst) + (*src++);
+ *dst++ = t | (0 - (t >> 8));
+ w--;
+ }
+
+ vmx_combine_add_u (imp, op,
+ (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
+
+ /* Small tail */
+ dst += w & 0xfffc;
+ src += w & 0xfffc;
+
+ w &= 3;
+
+ while (w)
+ {
+ t = (*dst) + (*src++);
+ *dst++ = t | (0 - (t >> 8));
+ w--;
+ }
+ }
+}
+
static const pixman_fast_path_t vmx_fast_paths[] =
{
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, vmx_composite_over_8888_8888),
+
+ /* PIXMAN_OP_ADD */
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8),
+
{ PIXMAN_OP_NONE },
};