summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSøren Sandmann Pedersen <ssp@redhat.com>2010-03-23 11:00:04 -0400
committerSøren Sandmann Pedersen <ssp@redhat.com>2010-03-23 11:00:04 -0400
commit27a9f0468bdfa257e70270bf9addd5ad064f918b (patch)
treec2b1c350d81f9700192767f249ae79412f9754e2
parent69f1ec9a7827aeb522fcae99846237ef0f896e7b (diff)
parent3ef203331f124bf137c6e0c8d5516b1209c92dd9 (diff)
Merge remote branch 'ssvb/arm-fixes'
-rw-r--r--configure.ac54
-rw-r--r--pixman/Makefile.am9
-rw-r--r--pixman/pixman-arm-common.h273
-rw-r--r--pixman/pixman-arm-neon-asm.S16
-rw-r--r--pixman/pixman-arm-neon.c304
-rw-r--r--pixman/pixman-arm-simd-asm.S330
-rw-r--r--pixman/pixman-arm-simd.c171
7 files changed, 769 insertions, 388 deletions
diff --git a/configure.ac b/configure.ac
index fc3ee24..ed7d16a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -361,30 +361,24 @@ AC_SUBST(VMX_CFLAGS)
AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
-dnl ===========================================================================
-dnl Check for ARM SIMD instructions
-ARM_SIMD_CFLAGS=""
-
+dnl ==========================================================================
+dnl Check if assembler is gas compatible and supports ARM SIMD instructions
have_arm_simd=no
AC_MSG_CHECKING(whether to use ARM SIMD assembler)
-# check with default CFLAGS in case the toolchain turns on a sufficiently recent -mcpu=
-AC_COMPILE_IFELSE([
-int main () {
- asm("uqadd8 r1, r1, r2");
- return 0;
-}], have_arm_simd=yes,
- # check again with an explicit -mcpu= in case the toolchain defaults to an
- # older one; note that uqadd8 isn't available in Thumb mode on arm1136j-s
- # so we force ARM mode
- ARM_SIMD_CFLAGS="-mcpu=arm1136j-s -marm"
- xserver_save_CFLAGS=$CFLAGS
- CFLAGS="$ARM_SIMD_CFLAGS $CFLAGS"
- AC_COMPILE_IFELSE([
- int main () {
- asm("uqadd8 r1, r1, r2");
- return 0;
- }], have_arm_simd=yes)
- CFLAGS=$xserver_save_CFLAGS)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="-x assembler-with-cpp $CFLAGS"
+AC_COMPILE_IFELSE([[
+.text
+.arch armv6
+.object_arch armv4
+.arm
+.altmacro
+#ifndef __ARM_EABI__
+#error EABI is required (to be sure that calling conventions are compatible)
+#endif
+pld [r0]
+uqadd8 r0, r0, r0]], have_arm_simd=yes)
+CFLAGS=$xserver_save_CFLAGS
AC_ARG_ENABLE(arm-simd,
[AC_HELP_STRING([--disable-arm-simd],
@@ -396,29 +390,29 @@ if test $enable_arm_simd = no ; then
fi
if test $have_arm_simd = yes ; then
- AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD compiler intrinsics])
-else
- ARM_SIMD_CFLAGS=
+ AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD assembly optimizations])
fi
+AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
+
AC_MSG_RESULT($have_arm_simd)
if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
AC_MSG_ERROR([ARM SIMD intrinsics not detected])
fi
-AC_SUBST(ARM_SIMD_CFLAGS)
-
-AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
-
dnl ==========================================================================
dnl Check if assembler is gas compatible and supports NEON instructions
have_arm_neon=no
AC_MSG_CHECKING(whether to use ARM NEON assembler)
xserver_save_CFLAGS=$CFLAGS
-CFLAGS="-x assembler-with-cpp"
+CFLAGS="-x assembler-with-cpp $CFLAGS"
AC_COMPILE_IFELSE([[
.text
.fpu neon
+.arch armv7a
+.object_arch armv4
+.eabi_attribute 10, 0
+.arm
.altmacro
#ifndef __ARM_EABI__
#error EABI is required (to be sure that calling conventions are compatible)
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 5a0e7a9..66ad7f0 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -97,12 +97,14 @@ endif
if USE_ARM_SIMD
noinst_LTLIBRARIES += libpixman-arm-simd.la
libpixman_arm_simd_la_SOURCES = \
- pixman-arm-simd.c
-libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_SIMD_CFLAGS)
+ pixman-arm-simd.c \
+ pixman-arm-common.h \
+ pixman-arm-simd-asm.S
+libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS)
libpixman_arm_simd_la_LIBADD = $(DEP_LIBS)
libpixman_1_la_LIBADD += libpixman-arm-simd.la
-ASM_CFLAGS_arm_simd=$(ARM_SIMD_CFLAGS)
+ASM_CFLAGS_arm_simd=
endif
# arm neon code
@@ -110,6 +112,7 @@ if USE_ARM_NEON
noinst_LTLIBRARIES += libpixman-arm-neon.la
libpixman_arm_neon_la_SOURCES = \
pixman-arm-neon.c \
+ pixman-arm-common.h \
pixman-arm-neon-asm.S \
pixman-arm-neon-asm.h
libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS)
diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
new file mode 100644
index 0000000..8d432b1
--- /dev/null
+++ b/pixman/pixman-arm-common.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright © 2010 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
+ */
+
+#ifndef PIXMAN_ARM_COMMON_H
+#define PIXMAN_ARM_COMMON_H
+
+/* Define some macros which can expand into proxy functions between
+ * ARM assembly optimized functions and the rest of pixman fast path API.
+ *
+ * All the low level ARM assembly functions have to use ARM EABI
+ * calling convention and take up to 8 arguments:
+ * width, height, dst, dst_stride, src, src_stride, mask, mask_stride
+ *
+ * The arguments are ordered with the most important coming first (the
+ * first 4 arguments are passed to function in registers, the rest are
+ * on stack). The last arguments are optional, for example if the
+ * function is not using mask, then 'mask' and 'mask_stride' can be
+ * omitted when doing a function call.
+ *
+ * Arguments 'src' and 'mask' contain either a pointer to the top left
+ * pixel of the composited rectangle or a pixel color value depending
+ * on the function type. In the case of just a color value (solid source
+ * or mask), the corresponding stride argument is unused.
+ */
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \
+ src_type, src_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ src_type *src_line; \
+ int32_t dst_stride, src_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(cputype, name, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ uint32_t src); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ int32_t dst_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
+ \
+ if (src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(cputype, name, \
+ mask_type, mask_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ uint32_t src, \
+ int32_t unused, \
+ mask_type *mask, \
+ int32_t mask_stride); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ mask_type *mask_line; \
+ int32_t dst_stride, mask_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
+ \
+ if (src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
+ mask_stride, mask_line, mask_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src, 0, \
+ mask_line, mask_stride); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(cputype, name, \
+ src_type, src_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride, \
+ uint32_t mask); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ src_type *src_line; \
+ int32_t dst_stride, src_stride; \
+ uint32_t mask; \
+ \
+ mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
+ \
+ if (mask == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride, \
+ mask); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \
+ src_type, src_cnt, \
+ mask_type, mask_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride, \
+ mask_type *mask, \
+ int32_t mask_stride); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ src_type *src_line; \
+ mask_type *mask_line; \
+ int32_t dst_stride, src_stride, mask_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
+ mask_stride, mask_line, mask_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride, \
+ mask_line, mask_stride); \
+}
+
+#endif
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index eb8cc4c..51bc347 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -42,6 +42,10 @@
.text
.fpu neon
.arch armv7a
+ .object_arch armv4
+ .eabi_attribute 10, 0 /* suppress Tag_FP_arch */
+ .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */
+ .arm
.altmacro
#include "pixman-arm-neon-asm.h"
@@ -872,8 +876,8 @@ generate_composite_function \
vsli.u64 d0, d0, #8
vsli.u64 d0, d0, #16
vsli.u64 d0, d0, #32
- vmov d1, d0
- vmov q1, q0
+ vorr d1, d0, d0
+ vorr q1, q0, q0
.endm
.macro pixman_composite_src_n_8_cleanup
@@ -911,8 +915,8 @@ generate_composite_function \
vld1.32 {d0[0]}, [DUMMY]
vsli.u64 d0, d0, #16
vsli.u64 d0, d0, #32
- vmov d1, d0
- vmov q1, q0
+ vorr d1, d0, d0
+ vorr q1, q0, q0
.endm
.macro pixman_composite_src_n_0565_cleanup
@@ -949,8 +953,8 @@ generate_composite_function \
add DUMMY, sp, #ARGS_STACK_OFFSET
vld1.32 {d0[0]}, [DUMMY]
vsli.u64 d0, d0, #32
- vmov d1, d0
- vmov q1, q0
+ vorr d1, d0, d0
+ vorr q1, q0, q0
.endm
.macro pixman_composite_src_n_8888_cleanup
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 24ceeeb..6808b36 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -32,254 +32,62 @@
#include <string.h>
#include "pixman-private.h"
-
-#define BIND_SRC_NULL_DST(name, src_type, src_cnt, dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride); \
- \
-static void \
-neon_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- src_type *src_line; \
- int32_t dst_stride, src_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- \
- pixman_composite_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride); \
-}
-
-#define BIND_N_NULL_DST(name, dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- uint32_t src); \
- \
-static void \
-neon_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- int32_t dst_stride; \
- uint32_t src; \
- \
- src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
- \
- if (src == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- \
- pixman_composite_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src); \
-}
-
-#define BIND_N_MASK_DST(name, mask_type, mask_cnt, dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- uint32_t src, \
- int32_t unused, \
- mask_type *mask, \
- int32_t mask_stride); \
- \
-static void \
-neon_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- mask_type *mask_line; \
- int32_t dst_stride, mask_stride; \
- uint32_t src; \
- \
- src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
- \
- if (src == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
- mask_stride, mask_line, mask_cnt); \
- \
- pixman_composite_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src, 0, \
- mask_line, mask_stride); \
-}
-
-#define BIND_SRC_N_DST(name, src_type, src_cnt, dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride, \
- uint32_t mask); \
- \
-static void \
-neon_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- src_type *src_line; \
- int32_t dst_stride, src_stride; \
- uint32_t mask; \
- \
- mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
- \
- if (mask == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- \
- pixman_composite_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride, \
- mask); \
-}
-
-#define BIND_SRC_MASK_DST(name, src_type, src_cnt, mask_type, mask_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride, \
- mask_type *mask, \
- int32_t mask_stride); \
- \
-static void \
-neon_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- src_type *src_line; \
- mask_type *mask_line; \
- int32_t dst_stride, src_stride, mask_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
- mask_stride, mask_line, mask_cnt); \
- \
- pixman_composite_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride, \
- mask_line, mask_stride); \
-}
-
-
-BIND_SRC_NULL_DST(src_8888_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(src_x888_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
-BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(src_0888_8888_rev, uint8_t, 3, uint32_t, 1)
-BIND_SRC_NULL_DST(src_0888_0565_rev, uint8_t, 3, uint16_t, 1)
-BIND_SRC_NULL_DST(src_pixbuf_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
-BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
-
-BIND_N_NULL_DST(over_n_0565, uint16_t, 1)
-BIND_N_NULL_DST(over_n_8888, uint32_t, 1)
-BIND_N_NULL_DST(over_reverse_n_8888, uint32_t, 1)
-
-BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
-
-BIND_N_MASK_DST(over_n_8_0565, uint8_t, 1, uint16_t, 1)
-BIND_N_MASK_DST(over_n_8_8888, uint8_t, 1, uint32_t, 1)
-BIND_N_MASK_DST(over_n_8888_8888_ca, uint32_t, 1, uint32_t, 1)
-BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
-
-BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
-
-BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
-BIND_SRC_MASK_DST(add_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
-BIND_SRC_MASK_DST(over_8888_8_8888, uint32_t, 1, uint8_t, 1, uint32_t, 1)
-BIND_SRC_MASK_DST(over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
+#include "pixman-arm-common.h"
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
+ uint16_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
+ uint8_t, 3, uint8_t, 3)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
+ uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
+ uint16_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
+ uint8_t, 3, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
+ uint8_t, 3, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8000_8000,
+ uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
+ uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565,
+ uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_8888,
+ uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_reverse_n_8888,
+ uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_0565,
+ uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888,
+ uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
+ uint8_t, 1, uint8_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
+ uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
+ uint8_t, 1, uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
+ uint32_t, 1, uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
+ uint32_t, 1, uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
+ uint32_t, 1, uint32_t, 1, uint32_t, 1)
void
pixman_composite_src_n_8_asm_neon (int32_t w,
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
new file mode 100644
index 0000000..1a1a0d6
--- /dev/null
+++ b/pixman/pixman-arm-simd-asm.S
@@ -0,0 +1,330 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author: Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+
+/* Prevent the stack from becoming executable */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+ .text
+ .arch armv6
+ .object_arch armv4
+ .arm
+ .altmacro
+
+/* Supplementary macro for setting function attributes */
+.macro pixman_asm_function fname
+ .func fname
+ .global fname
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+.endm
+
+/*
+ * The code below was generated by gcc 4.3.4 from the commented out
+ * functions in 'pixman-arm-simd.c' file with the following optimization
+ * options: "-O3 -mcpu=arm1136jf-s -fomit-frame-pointer"
+ *
+ * TODO: replace gcc generated code with hand tuned versions because
+ * the code quality is not very good, introduce symbolic register
+ * aliases for better readability and maintainability.
+ */
+
+pixman_asm_function pixman_composite_add_8000_8000_asm_armv6
+ push {r4, r5, r6, r7, r8, r9, r10, r11}
+ mov r10, r1
+ sub sp, sp, #4
+ subs r10, r10, #1
+ mov r11, r0
+ mov r8, r2
+ str r3, [sp]
+ ldr r7, [sp, #36]
+ bcc 0f
+6: cmp r11, #0
+ beq 1f
+ orr r3, r8, r7
+ tst r3, #3
+ beq 2f
+ mov r1, r8
+ mov r0, r7
+ mov r12, r11
+ b 3f
+5: tst r3, #3
+ beq 4f
+3: ldrb r2, [r0], #1
+ subs r12, r12, #1
+ ldrb r3, [r1]
+ uqadd8 r3, r2, r3
+ strb r3, [r1], #1
+ orr r3, r1, r0
+ bne 5b
+1: ldr r3, [sp]
+ add r8, r8, r3
+ ldr r3, [sp, #40]
+ add r7, r7, r3
+10: subs r10, r10, #1
+ bcs 6b
+0: add sp, sp, #4
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+2: mov r12, r11
+ mov r1, r8
+ mov r0, r7
+4: cmp r12, #3
+ subgt r6, r12, #4
+ movgt r9, r12
+ lsrgt r5, r6, #2
+ addgt r3, r5, #1
+ movgt r12, #0
+ lslgt r4, r3, #2
+ ble 7f
+8: ldr r3, [r0, r12]
+ ldr r2, [r1, r12]
+ uqadd8 r3, r3, r2
+ str r3, [r1, r12]
+ add r12, r12, #4
+ cmp r12, r4
+ bne 8b
+ sub r3, r9, #4
+ bic r3, r3, #3
+ add r3, r3, #4
+ subs r12, r6, r5, lsl #2
+ add r1, r1, r3
+ add r0, r0, r3
+ beq 1b
+7: mov r4, #0
+9: ldrb r3, [r1, r4]
+ ldrb r2, [r0, r4]
+ uqadd8 r3, r2, r3
+ strb r3, [r1, r4]
+ add r4, r4, #1
+ cmp r4, r12
+ bne 9b
+ ldr r3, [sp]
+ add r8, r8, r3
+ ldr r3, [sp, #40]
+ add r7, r7, r3
+ b 10b
+.endfunc
+
+pixman_asm_function pixman_composite_over_8888_8888_asm_armv6
+ push {r4, r5, r6, r7, r8, r9, r10, r11}
+ sub sp, sp, #20
+ cmp r1, #0
+ mov r12, r2
+ str r1, [sp, #12]
+ str r0, [sp, #16]
+ ldr r2, [sp, #52]
+ beq 0f
+ lsl r3, r3, #2
+ str r3, [sp]
+ ldr r3, [sp, #56]
+ mov r10, #0
+ lsl r3, r3, #2
+ str r3, [sp, #8]
+ mov r11, r3
+ b 1f
+6: ldr r11, [sp, #8]
+1: ldr r9, [sp]
+ mov r0, r12
+ add r12, r12, r9
+ mov r1, r2
+ str r12, [sp, #4]
+ add r2, r2, r11
+ ldr r12, [sp, #16]
+ ldr r3, =0x00800080
+ ldr r9, =0xff00ff00
+ mov r11, #255
+ cmp r12, #0
+ beq 4f
+5: ldr r5, [r1], #4
+ ldr r4, [r0]
+ sub r8, r11, r5, lsr #24
+ uxtb16 r6, r4
+ uxtb16 r7, r4, ror #8
+ mla r6, r6, r8, r3
+ mla r7, r7, r8, r3
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ and r7, r7, r9
+ uxtab16 r6, r7, r6, ror #8
+ uqadd8 r5, r6, r5
+ str r5, [r0], #4
+ subs r12, r12, #1
+ bne 5b
+4: ldr r3, [sp, #12]
+ add r10, r10, #1
+ cmp r10, r3
+ ldr r12, [sp, #4]
+ bne 6b
+0: add sp, sp, #20
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+.endfunc
+
+pixman_asm_function pixman_composite_over_8888_n_8888_asm_armv6
+ push {r4, r5, r6, r7, r8, r9, r10, r11}
+ sub sp, sp, #28
+ cmp r1, #0
+ str r1, [sp, #12]
+ ldrb r1, [sp, #71]
+ mov r12, r2
+ str r0, [sp, #16]
+ ldr r2, [sp, #60]
+ str r1, [sp, #24]
+ beq 0f
+ lsl r3, r3, #2
+ str r3, [sp, #20]
+ ldr r3, [sp, #64]
+ mov r10, #0
+ lsl r3, r3, #2
+ str r3, [sp, #8]
+ mov r11, r3
+ b 1f
+5: ldr r11, [sp, #8]
+1: ldr r4, [sp, #20]
+ mov r0, r12
+ mov r1, r2
+ add r12, r12, r4
+ add r2, r2, r11
+ str r12, [sp]
+ str r2, [sp, #4]
+ ldr r12, [sp, #16]
+ ldr r2, =0x00800080
+ ldr r3, [sp, #24]
+ mov r11, #255
+ cmp r12, #0
+ beq 3f
+4: ldr r5, [r1], #4
+ ldr r4, [r0]
+ uxtb16 r6, r5
+ uxtb16 r7, r5, ror #8
+ mla r6, r6, r3, r2
+ mla r7, r7, r3, r2
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ uxtb16 r6, r6, ror #8
+ uxtb16 r7, r7, ror #8
+ orr r5, r6, r7, lsl #8
+ uxtb16 r6, r4
+ uxtb16 r7, r4, ror #8
+ sub r8, r11, r5, lsr #24
+ mla r6, r6, r8, r2
+ mla r7, r7, r8, r2
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ uxtb16 r6, r6, ror #8
+ uxtb16 r7, r7, ror #8
+ orr r6, r6, r7, lsl #8
+ uqadd8 r5, r6, r5
+ str r5, [r0], #4
+ subs r12, r12, #1
+ bne 4b
+3: ldr r1, [sp, #12]
+ add r10, r10, #1
+ cmp r10, r1
+ ldr r12, [sp]
+ ldr r2, [sp, #4]
+ bne 5b
+0: add sp, sp, #28
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+.endfunc
+
+pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
+ push {r4, r5, r6, r7, r8, r9, r10, r11}
+ sub sp, sp, #28
+ cmp r1, #0
+ ldr r9, [sp, #60]
+ str r1, [sp, #12]
+ bic r1, r9, #-16777216
+ str r1, [sp, #20]
+ mov r12, r2
+ lsr r1, r9, #8
+ ldr r2, [sp, #20]
+ bic r1, r1, #-16777216
+ bic r2, r2, #65280
+ bic r1, r1, #65280
+ str r2, [sp, #20]
+ str r0, [sp, #16]
+ str r1, [sp, #4]
+ ldr r2, [sp, #68]
+ beq 0f
+ lsl r3, r3, #2
+ str r3, [sp, #24]
+ mov r0, #0
+ b 1f
+5: ldr r3, [sp, #24]
+1: ldr r4, [sp, #72]
+ mov r10, r12
+ mov r1, r2
+ add r12, r12, r3
+ add r2, r2, r4
+ str r12, [sp, #8]
+ str r2, [sp]
+ ldr r12, [sp, #16]
+ ldr r11, =0x00800080
+ ldr r2, [sp, #4]
+ ldr r3, [sp, #20]
+ cmp r12, #0
+ beq 3f
+4: ldrb r5, [r1], #1
+ ldr r4, [r10]
+ mla r6, r3, r5, r11
+ mla r7, r2, r5, r11
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ uxtb16 r6, r6, ror #8
+ uxtb16 r7, r7, ror #8
+ orr r5, r6, r7, lsl #8
+ uxtb16 r6, r4
+ uxtb16 r7, r4, ror #8
+ mvn r8, r5
+ lsr r8, r8, #24
+ mla r6, r6, r8, r11
+ mla r7, r7, r8, r11
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ uxtb16 r6, r6, ror #8
+ uxtb16 r7, r7, ror #8
+ orr r6, r6, r7, lsl #8
+ uqadd8 r5, r6, r5
+ str r5, [r10], #4
+ subs r12, r12, #1
+ bne 4b
+3: ldr r4, [sp, #12]
+ add r0, r0, #1
+ cmp r0, r4
+ ldr r12, [sp, #8]
+ ldr r2, [sp]
+ bne 5b
+0: add sp, sp, #28
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+.endfunc
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 09a2888..389c9e0 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -28,31 +28,22 @@
#endif
#include "pixman-private.h"
+#include "pixman-arm-common.h"
-static void
-arm_composite_add_8000_8000 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */
+
+void
+pixman_composite_add_8000_8000_asm_armv6 (int32_t width,
+ int32_t height,
+ uint8_t *dst_line,
+ int32_t dst_stride,
+ uint8_t *src_line,
+ int32_t src_stride)
{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
+ uint8_t *dst, *src;
int32_t w;
uint8_t s, d;
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
while (height--)
{
dst = dst_line;
@@ -101,32 +92,21 @@ arm_composite_add_8000_8000 (pixman_implementation_t * impl,
}
-static void
-arm_composite_over_8888_8888 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+void
+pixman_composite_over_8888_8888_asm_armv6 (int32_t width,
+ int32_t height,
+ uint32_t *dst_line,
+ int32_t dst_stride,
+ uint32_t *src_line,
+ int32_t src_stride)
{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
+ uint32_t *dst;
+ uint32_t *src;
int32_t w;
uint32_t component_half = 0x800080;
uint32_t upper_component_mask = 0xff00ff00;
uint32_t alpha_mask = 0xff;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
while (height--)
{
dst = dst_line;
@@ -194,33 +174,21 @@ arm_composite_over_8888_8888 (pixman_implementation_t * impl,
}
}
-static void
-arm_composite_over_8888_n_8888 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+void
+pixman_composite_over_8888_n_8888_asm_armv6 (int32_t width,
+ int32_t height,
+ uint32_t *dst_line,
+ int32_t dst_stride,
+ uint32_t *src_line,
+ int32_t src_stride,
+ uint32_t mask)
{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- uint32_t mask;
- int dst_stride, src_stride;
+ uint32_t *dst;
+ uint32_t *src;
int32_t w;
uint32_t component_half = 0x800080;
uint32_t alpha_mask = 0xff;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- mask = _pixman_image_get_solid (mask_image, PIXMAN_a8r8g8b8);
mask = (mask) >> 24;
while (height--)
@@ -303,33 +271,22 @@ arm_composite_over_8888_n_8888 (pixman_implementation_t * impl,
}
}
-static void
-arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+void
+pixman_composite_over_n_8_8888_asm_armv6 (int32_t width,
+ int32_t height,
+ uint32_t *dst_line,
+ int32_t dst_stride,
+ uint32_t src,
+ int32_t unused,
+ uint8_t *mask_line,
+ int32_t mask_stride)
{
- uint32_t src, srca;
- uint32_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
+ uint32_t srca;
+ uint32_t *dst;
+ uint8_t *mask;
int32_t w;
- src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
- /* bail out if fully transparent */
srca = src >> 24;
- if (src == 0)
- return;
uint32_t component_mask = 0xff00ff;
uint32_t component_half = 0x800080;
@@ -337,9 +294,6 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
uint32_t src_hi = (src >> 8) & component_mask;
uint32_t src_lo = src & component_mask;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
while (height--)
{
dst = dst_line;
@@ -419,21 +373,36 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
}
}
+#endif
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8000_8000,
+ uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (armv6, over_8888_n_8888,
+ uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (armv6, over_n_8_8888,
+ uint8_t, 1, uint32_t, 1)
+
static const pixman_fast_path_t arm_simd_fast_paths[] =
{
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, arm_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, arm_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, arm_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, arm_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, arm_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, arm_composite_over_8888_n_8888),
-
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, arm_composite_add_8000_8000),
-
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, arm_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, arm_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, arm_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, arm_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
+
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8000_8000),
+
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
{ PIXMAN_OP_NONE },
};