diff options
author | Oded Gabbay <oded.gabbay@gmail.com> | 2015-06-29 13:36:00 +0300 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2015-08-25 18:35:56 +0300 |
commit | 6577035966cb0c5a8a25f61d6bc7cb11c6ce60e7 (patch) | |
tree | 1a3643ce7fd7bc5d2d209adab80bda9117a4d022 | |
parent | 30f7415ab2ebadec2368e3b8907292b55feb54b0 (diff) |
vmx: implement fast path vmx_blt and vmx_composite_copy_area
No changes were observed when running cairo trimmed benchmarks.
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
-rw-r--r-- | pixman/pixman-vmx.c | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c index 86a90af..2da204f 100644 --- a/pixman/pixman-vmx.c +++ b/pixman/pixman-vmx.c @@ -2836,6 +2836,142 @@ vmx_fill (pixman_implementation_t *imp, return TRUE; } +static pixman_bool_t +vmx_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + uint8_t * src_bytes; + uint8_t * dst_bytes; + int byte_width; + + if (src_bpp != dst_bpp) + return FALSE; + + if (src_bpp == 16) + { + src_stride = src_stride * (int) sizeof (uint32_t) / 2; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; + src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); + byte_width = 2 * width; + src_stride *= 2; + dst_stride *= 2; + } + else if (src_bpp == 32) + { + src_stride = src_stride * (int) sizeof (uint32_t) / 4; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; + src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); + byte_width = 4 * width; + src_stride *= 4; + dst_stride *= 4; + } + else + { + return FALSE; + } + + while (height--) + { + int w; + uint8_t *s = src_bytes; + uint8_t *d = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; + w = byte_width; + + while (w >= 2 && ((uintptr_t)d & 3)) + { + *(uint16_t *)d = *(uint16_t *)s; + w -= 2; + s += 2; + d += 2; + } + + while (w >= 4 && ((uintptr_t)d & 15)) + { + *(uint32_t *)d = *(uint32_t *)s; + + w -= 4; + s += 4; + d += 4; + } + + while (w >= 64) + { + vector unsigned int vmx0, vmx1, vmx2, vmx3; + + vmx0 = load_128_unaligned ((uint32_t*) s); + vmx1 = load_128_unaligned ((uint32_t*)(s + 16)); + vmx2 = load_128_unaligned ((uint32_t*)(s + 32)); + vmx3 = load_128_unaligned ((uint32_t*)(s + 48)); + + save_128_aligned ((uint32_t*)(d), vmx0); + save_128_aligned ((uint32_t*)(d + 16), vmx1); + save_128_aligned ((uint32_t*)(d + 32), vmx2); + save_128_aligned ((uint32_t*)(d + 48), vmx3); + + s += 64; + d += 64; + w -= 64; + } + + while (w >= 16) + { + save_128_aligned ((uint32_t*) d, load_128_unaligned ((uint32_t*) s)); + + w -= 16; + d += 16; + s += 16; + } + + while (w >= 4) + { + *(uint32_t *)d = *(uint32_t *)s; + + w -= 4; + s += 4; + d += 4; + } + + if (w >= 2) + { + *(uint16_t *)d = *(uint16_t *)s; + w -= 2; + s += 2; + d += 2; + } + } + + return TRUE; +} + +static void +vmx_composite_copy_area (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + vmx_blt (imp, src_image->bits.bits, + dest_image->bits.bits, + src_image->bits.rowstride, + dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (src_image->bits.format), + PIXMAN_FORMAT_BPP (dest_image->bits.format), + src_x, src_y, dest_x, dest_y, width, height); +} + static void vmx_composite_src_x888_8888 (pixman_implementation_t *imp, pixman_composite_info_t *info) @@ -3227,6 +3363,7 @@ FAST_NEAREST_MAINLOOP (vmx_8888_8888_normal_OVER, static const pixman_fast_path_t vmx_fast_paths[] = { + /* PIXMAN_OP_OVER */ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888), @@ -3235,6 +3372,8 @@ static const pixman_fast_path_t vmx_fast_paths[] = PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, vmx_composite_over_n_8888_8888_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, vmx_composite_over_n_8888_8888_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, vmx_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, vmx_composite_copy_area), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, vmx_composite_copy_area), /* PIXMAN_OP_ADD */ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8), @@ -3244,6 +3383,14 @@ static const pixman_fast_path_t vmx_fast_paths[] = /* PIXMAN_OP_SRC */ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, vmx_composite_src_x888_8888), PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, vmx_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, vmx_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, vmx_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, vmx_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, vmx_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, vmx_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, vmx_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, vmx_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, vmx_composite_copy_area), SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, vmx_8888_8888), SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, vmx_8888_8888), @@ -3385,6 +3532,7 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback) imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; + imp->blt = vmx_blt; imp->fill = vmx_fill; imp->iter_info = vmx_iters; |