summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOded Gabbay <oded.gabbay@gmail.com>2015-06-29 13:36:00 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2015-08-25 18:35:56 +0300
commit6577035966cb0c5a8a25f61d6bc7cb11c6ce60e7 (patch)
tree1a3643ce7fd7bc5d2d209adab80bda9117a4d022
parent30f7415ab2ebadec2368e3b8907292b55feb54b0 (diff)
vmx: implement fast path vmx_blt and vmx_composite_copy_area
No changes were observed when running cairo trimmed benchmarks. Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
-rw-r--r--pixman/pixman-vmx.c148
1 files changed, 148 insertions, 0 deletions
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 86a90af..2da204f 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -2836,6 +2836,142 @@ vmx_fill (pixman_implementation_t *imp,
return TRUE;
}
+static pixman_bool_t
+vmx_blt (pixman_implementation_t *imp,
+ uint32_t * src_bits,
+ uint32_t * dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dest_x,
+ int dest_y,
+ int width,
+ int height)
+{
+ uint8_t * src_bytes;
+ uint8_t * dst_bytes;
+ int byte_width;
+
+ if (src_bpp != dst_bpp)
+ return FALSE;
+
+ if (src_bpp == 16)
+ {
+ src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+ src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
+ byte_width = 2 * width;
+ src_stride *= 2;
+ dst_stride *= 2;
+ }
+ else if (src_bpp == 32)
+ {
+ src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+ src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
+ byte_width = 4 * width;
+ src_stride *= 4;
+ dst_stride *= 4;
+ }
+ else
+ {
+ return FALSE;
+ }
+
+ while (height--)
+ {
+ int w;
+ uint8_t *s = src_bytes;
+ uint8_t *d = dst_bytes;
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ w = byte_width;
+
+ while (w >= 2 && ((uintptr_t)d & 3))
+ {
+ *(uint16_t *)d = *(uint16_t *)s;
+ w -= 2;
+ s += 2;
+ d += 2;
+ }
+
+ while (w >= 4 && ((uintptr_t)d & 15))
+ {
+ *(uint32_t *)d = *(uint32_t *)s;
+
+ w -= 4;
+ s += 4;
+ d += 4;
+ }
+
+ while (w >= 64)
+ {
+ vector unsigned int vmx0, vmx1, vmx2, vmx3;
+
+ vmx0 = load_128_unaligned ((uint32_t*) s);
+ vmx1 = load_128_unaligned ((uint32_t*)(s + 16));
+ vmx2 = load_128_unaligned ((uint32_t*)(s + 32));
+ vmx3 = load_128_unaligned ((uint32_t*)(s + 48));
+
+ save_128_aligned ((uint32_t*)(d), vmx0);
+ save_128_aligned ((uint32_t*)(d + 16), vmx1);
+ save_128_aligned ((uint32_t*)(d + 32), vmx2);
+ save_128_aligned ((uint32_t*)(d + 48), vmx3);
+
+ s += 64;
+ d += 64;
+ w -= 64;
+ }
+
+ while (w >= 16)
+ {
+ save_128_aligned ((uint32_t*) d, load_128_unaligned ((uint32_t*) s));
+
+ w -= 16;
+ d += 16;
+ s += 16;
+ }
+
+ while (w >= 4)
+ {
+ *(uint32_t *)d = *(uint32_t *)s;
+
+ w -= 4;
+ s += 4;
+ d += 4;
+ }
+
+ if (w >= 2)
+ {
+ *(uint16_t *)d = *(uint16_t *)s;
+ w -= 2;
+ s += 2;
+ d += 2;
+ }
+ }
+
+ return TRUE;
+}
+
+static void
+vmx_composite_copy_area (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ vmx_blt (imp, src_image->bits.bits,
+ dest_image->bits.bits,
+ src_image->bits.rowstride,
+ dest_image->bits.rowstride,
+ PIXMAN_FORMAT_BPP (src_image->bits.format),
+ PIXMAN_FORMAT_BPP (dest_image->bits.format),
+ src_x, src_y, dest_x, dest_y, width, height);
+}
+
static void
vmx_composite_src_x888_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
@@ -3227,6 +3363,7 @@ FAST_NEAREST_MAINLOOP (vmx_8888_8888_normal_OVER,
static const pixman_fast_path_t vmx_fast_paths[] =
{
+ /* PIXMAN_OP_OVER */
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),
@@ -3235,6 +3372,8 @@ static const pixman_fast_path_t vmx_fast_paths[] =
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, vmx_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, vmx_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, vmx_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, vmx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, vmx_composite_copy_area),
/* PIXMAN_OP_ADD */
PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8),
@@ -3244,6 +3383,14 @@ static const pixman_fast_path_t vmx_fast_paths[] =
/* PIXMAN_OP_SRC */
PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, vmx_composite_src_x888_8888),
PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, vmx_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, vmx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, vmx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, vmx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, vmx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, vmx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, vmx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, vmx_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, vmx_composite_copy_area),
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, vmx_8888_8888),
SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, vmx_8888_8888),
@@ -3385,6 +3532,7 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;
imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;
+ imp->blt = vmx_blt;
imp->fill = vmx_fill;
imp->iter_info = vmx_iters;