summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNemanja Lukic <nemanja.lukic@rt-rk.com>2012-02-29 12:04:33 +0100
committerSøren Sandmann Pedersen <ssp@redhat.com>2012-03-04 01:09:56 -0500
commit304f57644ac6a991c6e538675de935356252c0a5 (patch)
tree040dccf50e3ba110f8a2bfbf4d2491af3b16d361
parent999e72b80bd5e3fab5f45b6ad19511389b58d9ab (diff)
MIPS: DSPr2: Added mips_dspr2_blt and mips_dspr2_fill routines.
Performance numbers before/after on MIPS-74kc @ 1GHz Referent (before): lowlevel-blt-bench: src_n_0565 = L1: 238.14 L2: 233.15 M: 57.88 ( 77.23%) HT: 53.22 VT: 49.99 R: 47.73 RT: 24.79 ( 91Kops/s) src_n_8888 = L1: 190.19 L2: 187.57 M: 28.94 ( 77.23%) HT: 27.91 VT: 27.33 R: 26.64 RT: 14.68 ( 77Kops/s) cairo-perf-trace: [ # ] backend test min(s) median(s) stddev. count [ # ] image: pixman 0.25.1 [ 0] image gnome-system-monitor 268.460 269.712 0.22% 6/6 Optimized: lowlevel-blt-bench: src_n_0565 = L1:1081.39 L2: 258.22 M:189.59 (252.91%) HT: 60.23 VT: 55.01 R: 53.44 RT: 23.68 ( 89Kops/s) src_n_8888 = L1: 653.46 L2: 113.55 M:135.26 (360.86%) HT: 38.99 VT: 37.38 R: 34.95 RT: 18.67 ( 84Kops/s) cairo-perf-trace: [ # ] backend test min(s) median(s) stddev. count [ # ] image: pixman 0.25.1 [ 0] image gnome-system-monitor 246.565 246.706 0.04% 6/6
-rw-r--r--pixman/pixman-mips-dspr2-asm.S105
-rw-r--r--pixman/pixman-mips-dspr2.c163
-rw-r--r--pixman/pixman-mips-dspr2.h4
3 files changed, 272 insertions, 0 deletions
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 0a4c87e..f1087a7 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -31,6 +31,111 @@
#include "pixman-mips-dspr2-asm.h"
+LEAF_MIPS_DSPR2(pixman_fill_buff16_mips)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+ beqz a1, 3f
+ andi t1, a0, 0x0002
+ beqz t1, 0f /* check if address is 4-byte aligned */
+ nop
+ sh a2, 0(a0)
+ addiu a0, a0, 2
+ addiu a1, a1, -2
+0:
+ srl t1, a1, 5 /* t1 how many multiples of 32 bytes */
+ replv.ph a2, a2 /* replicate fill value (16bit) in a2 */
+ beqz t1, 2f
+ nop
+1:
+ addiu t1, t1, -1
+ beqz t1, 11f
+ addiu a1, a1, -32
+ pref 30, 32(a0)
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ b 1b
+ addiu a0, a0, 32
+11:
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ addiu a0, a0, 32
+2:
+ blez a1, 3f
+ addiu a1, a1, -2
+ sh a2, 0(a0)
+ b 2b
+ addiu a0, a0, 2
+3:
+ jr ra
+ nop
+
+END(pixman_fill_buff16_mips)
+
+LEAF_MIPS32R2(pixman_fill_buff32_mips)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+ beqz a1, 3f
+ nop
+ srl t1, a1, 5 /* t1 how many multiples of 32 bytes */
+ beqz t1, 2f
+ nop
+1:
+ addiu t1, t1, -1
+ beqz t1, 11f
+ addiu a1, a1, -32
+ pref 30, 32(a0)
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ b 1b
+ addiu a0, a0, 32
+11:
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ addiu a0, a0, 32
+2:
+ blez a1, 3f
+ addiu a1, a1, -4
+ sw a2, 0(a0)
+ b 2b
+ addiu a0, a0, 4
+3:
+ jr ra
+ nop
+
+END(pixman_fill_buff32_mips)
+
LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
/*
* a0 - dst (r5g6b5)
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index e331853..2beada3 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -49,6 +49,119 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
uint8_t, 3, uint8_t, 3)
+static pixman_bool_t
+pixman_fill_mips (uint32_t *bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t _xor)
+{
+ uint8_t *byte_line;
+ uint32_t byte_width;
+ switch (bpp)
+ {
+ case 16:
+ stride = stride * (int) sizeof (uint32_t) / 2;
+ byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+ byte_width = width * 2;
+ stride *= 2;
+
+ while (height--)
+ {
+ uint8_t *dst = byte_line;
+ byte_line += stride;
+ pixman_fill_buff16_mips (dst, byte_width, _xor & 0xffff);
+ }
+ return TRUE;
+ case 32:
+ stride = stride * (int) sizeof (uint32_t) / 4;
+ byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+ byte_width = width * 4;
+ stride *= 4;
+
+ while (height--)
+ {
+ uint8_t *dst = byte_line;
+ byte_line += stride;
+ pixman_fill_buff32_mips (dst, byte_width, _xor);
+ }
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static pixman_bool_t
+pixman_blt_mips (uint32_t *src_bits,
+ uint32_t *dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dest_x,
+ int dest_y,
+ int width,
+ int height)
+{
+ if (src_bpp != dst_bpp)
+ return FALSE;
+
+ uint8_t *src_bytes;
+ uint8_t *dst_bytes;
+ uint32_t byte_width;
+
+ switch (src_bpp)
+ {
+ case 16:
+ src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+ src_bytes =(uint8_t *)(((uint16_t *)src_bits)
+ + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint16_t *)dst_bits)
+ + dst_stride * (dest_y) + (dest_x));
+ byte_width = width * 2;
+ src_stride *= 2;
+ dst_stride *= 2;
+
+ while (height--)
+ {
+ uint8_t *src = src_bytes;
+ uint8_t *dst = dst_bytes;
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ pixman_mips_fast_memcpy (dst, src, byte_width);
+ }
+ return TRUE;
+ case 32:
+ src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+ src_bytes = (uint8_t *)(((uint32_t *)src_bits)
+ + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint32_t *)dst_bits)
+ + dst_stride * (dest_y) + (dest_x));
+ byte_width = width * 4;
+ src_stride *= 4;
+ dst_stride *= 4;
+
+ while (height--)
+ {
+ uint8_t *src = src_bytes;
+ uint8_t *dst = dst_bytes;
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ pixman_mips_fast_memcpy (dst, src, byte_width);
+ }
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
static const pixman_fast_path_t mips_dspr2_fast_paths[] =
{
PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mips_composite_src_0565_0565),
@@ -74,11 +187,61 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
{ PIXMAN_OP_NONE },
};
+static pixman_bool_t
+mips_dspr2_blt (pixman_implementation_t *imp,
+ uint32_t * src_bits,
+ uint32_t * dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dest_x,
+ int dest_y,
+ int width,
+ int height)
+{
+ if (!pixman_blt_mips (
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dest_x, dest_y, width, height))
+
+ {
+ return _pixman_implementation_blt (
+ imp->delegate,
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dest_x, dest_y, width, height);
+ }
+
+ return TRUE;
+}
+
+static pixman_bool_t
+mips_dspr2_fill (pixman_implementation_t *imp,
+ uint32_t * bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ if (pixman_fill_mips (bits, stride, bpp, x, y, width, height, xor))
+ return TRUE;
+
+ return _pixman_implementation_fill (
+ imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+}
+
pixman_implementation_t *
_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback)
{
pixman_implementation_t *imp =
_pixman_implementation_create (fallback, mips_dspr2_fast_paths);
+ imp->blt = mips_dspr2_blt;
+ imp->fill = mips_dspr2_fill;
+
return imp;
}
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index 449c42a..a40e7c8 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -41,6 +41,10 @@
void
pixman_mips_fast_memcpy (void *dst, void *src, uint32_t n_bytes);
+void
+pixman_fill_buff16_mips (void *dst, uint32_t n_bytes, uint16_t value);
+void
+pixman_fill_buff32_mips (void *dst, uint32_t n_bytes, uint32_t value);
/****************************************************************/