diff options
-rw-r--r-- | configure.ac | 4 | ||||
-rw-r--r-- | orc.mak | 73 | ||||
-rw-r--r-- | pixman/Makefile.am | 25 | ||||
-rw-r--r-- | pixman/pixman-cpu.c | 2 | ||||
-rw-r--r-- | pixman/pixman-orc.c | 76 | ||||
-rw-r--r-- | pixman/pixman-orccode.orc | 107 | ||||
-rw-r--r-- | pixman/pixman-private.h | 2 | ||||
-rw-r--r-- | test/Makefile.am | 2 | ||||
-rw-r--r-- | test/ds.c | 124 |
9 files changed, 386 insertions, 29 deletions
diff --git a/configure.ac b/configure.ac index 48c83e3..c8a7d44 100644 --- a/configure.ac +++ b/configure.ac @@ -621,9 +621,9 @@ if test $enable_orc = yes ; then ORCC=orcc fi AC_SUBST(ORCC) - AC_DEFINE(USE_ORC, 1, [use Orc]) + AC_DEFINE(HAVE_ORC, 1, [use Orc]) fi -AM_CONDITIONAL(USE_ORC, [test "x$enable_orc" = xyes]) +AM_CONDITIONAL(HAVE_ORC, [test "x$enable_orc" = xyes]) DEP_CFLAGS="$DEP_CFLAGS $ORC_CFLAGS" DEP_LIBS="$DEP_LIBS $ORC_LIBS" @@ -0,0 +1,73 @@ +# +# This is a makefile.am fragment to build Orc code. +# +# Define ORC_SOURCE and then include this file, such as: +# +# ORC_SOURCE=gstadderorc +# include $(top_srcdir)/common/orc.mak +# +# This fragment will create tmp-orc.c and gstadderorc.h from +# gstadderorc.orc. +# +# When 'make dist' is run at the top level, or 'make orc-update' +# in a directory including this fragment, the generated source +# files will be copied to $(ORC_SOURCE)-dist.[ch]. These files +# should be checked in to git, since they are used if Orc is +# disabled. +# +# Note that this file defines BUILT_SOURCES, so any later usage +# of BUILT_SOURCES in the Makefile.am that includes this file +# must use '+='. +# + + +EXTRA_DIST = $(ORC_SOURCE).orc + +ORC_NODIST_SOURCES = tmp-orc.c $(ORC_SOURCE).h +BUILT_SOURCES = tmp-orc.c $(ORC_SOURCE).h + + +orc-update: tmp-orc.c $(ORC_SOURCE).h + $(top_srcdir)/common/gst-indent tmp-orc.c + cp tmp-orc.c $(srcdir)/$(ORC_SOURCE)-dist.c + cp $(ORC_SOURCE).h $(srcdir)/$(ORC_SOURCE)-dist.h + +orcc_v_gen = $(orcc_v_gen_$(V)) +orcc_v_gen_ = $(orcc_v_gen_$(AM_DEFAULT_VERBOSITY)) +orcc_v_gen_0 = @echo " ORCC $@"; + +cp_v_gen = $(cp_v_gen_$(V)) +cp_v_gen_ = $(cp_v_gen_$(AM_DEFAULT_VERBOSITY)) +cp_v_gen_0 = @echo " CP $@"; + +if HAVE_ORC +tmp-orc.c: $(srcdir)/$(ORC_SOURCE).orc + $(orcc_v_gen)$(ORCC) --implementation -o tmp-orc.c $(srcdir)/$(ORC_SOURCE).orc + +$(ORC_SOURCE).h: $(srcdir)/$(ORC_SOURCE).orc + $(orcc_v_gen)$(ORCC) --header -o $(ORC_SOURCE).h $(srcdir)/$(ORC_SOURCE).orc +else +tmp-orc.c: $(srcdir)/$(ORC_SOURCE).orc + $(cp_v_gen)cp $(srcdir)/$(ORC_SOURCE)-dist.c tmp-orc.c + +$(ORC_SOURCE).h: $(srcdir)/$(ORC_SOURCE).orc + $(cp_v_gen)cp $(srcdir)/$(ORC_SOURCE)-dist.h $(ORC_SOURCE).h +endif + +clean-local: clean-orc +.PHONY: clean-orc +clean-orc: + rm -f tmp-orc.c $(ORC_SOURCE).h + +dist-hook: dist-hook-orc +.PHONY: dist-hook-orc +dist-hook-orc: tmp-orc.c $(ORC_SOURCE).h + $(top_srcdir)/common/gst-indent tmp-orc.c + rm -f tmp-orc.c~ + cmp -s tmp-orc.c $(srcdir)/$(ORC_SOURCE)-dist.c || \ + cp tmp-orc.c $(srcdir)/$(ORC_SOURCE)-dist.c + cmp -s $(ORC_SOURCE).h $(srcdir)/$(ORC_SOURCE)-dist.h || \ + cp $(ORC_SOURCE).h $(srcdir)/$(ORC_SOURCE)-dist.h + cp -p $(srcdir)/$(ORC_SOURCE)-dist.c $(distdir)/ + cp -p $(srcdir)/$(ORC_SOURCE)-dist.h $(distdir)/ + diff --git a/pixman/Makefile.am b/pixman/Makefile.am index 3443003..03e5958 100644 --- a/pixman/Makefile.am +++ b/pixman/Makefile.am @@ -39,7 +39,10 @@ libpixmanincludedir = $(includedir)/pixman-1 libpixmaninclude_HEADERS = pixman.h pixman-version.h noinst_LTLIBRARIES = -BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c +ORC_SOURCE=pixman-orccode +include $(top_srcdir)/orc.mak + +BUILT_SOURCES += pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c pixman-combine32.c : pixman-combine.c.template pixman-combine32.h make-combine.pl $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1) @@ -51,7 +54,7 @@ pixman-combine64.c : pixman-combine.c.template pixman-combine64.h make-combine.p pixman-combine64.h : pixman-combine.h.template make-combine.pl $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1) -EXTRA_DIST = Makefile.win32 pixman-combine.c.template make-combine.pl pixman-region.c \ +EXTRA_DIST += Makefile.win32 pixman-combine.c.template make-combine.pl pixman-region.c \ pixman-combine.h.template solaris-hwcap.mapfile CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-combine64.h @@ -125,29 +128,17 @@ ASM_CFLAGS_arm_neon= endif # orc code -if USE_ORC +if HAVE_ORC + noinst_LTLIBRARIES += libpixman-orc.la libpixman_orc_la_SOURCES = \ pixman-orc.c +nodist_libpixman_orc_la_SOURCES = $(ORC_NODIST_SOURCES) libpixman_orc_la_CFLAGS = $(DEP_CFLAGS) $(ORC_CFLAGS) libpixman_orc_la_LIBADD = $(DEP_LIBS) libpixman_1_la_LDFLAGS += $(ORC_LDFLAGS) libpixman_1_la_LIBADD += libpixman-orc.la -$(libpixman_orc_la_SOURCES:.c=.s) : ASM_CFLAGS= - -nodist_libpixman_orc_la_SOURCES = pixman-orccode.c pixman-orccode.h -BUILT_SOURCES += pixman-orccode.c pixman-orccode.h -CLEANFILES += pixman-orccode.c pixman-orccode.h - -EXTRA_DIST += pixman-orccode.orc - -pixman-orccode.c: $(srcdir)/pixman-orccode.orc - $(ORCC) --implementation -o pixman-orccode.c $(srcdir)/pixman-orccode.orc - -pixman-orccode.h: $(srcdir)/pixman-orccode.orc - $(ORCC) --header -o pixman-orccode.h $(srcdir)/pixman-orccode.orc - endif .c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES) diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c index 17060b1..9112278 100644 --- a/pixman/pixman-cpu.c +++ b/pixman/pixman-cpu.c @@ -581,7 +581,7 @@ _pixman_choose_implementation (void) imp = _pixman_implementation_create_general(); imp = _pixman_implementation_create_fast_path (imp); -#ifdef USE_ORC +#ifdef HAVE_ORC return _pixman_implementation_create_orc (); #endif #ifdef USE_MMX diff --git a/pixman/pixman-orc.c b/pixman/pixman-orc.c index 56c2042..58fa058 100644 --- a/pixman/pixman-orc.c +++ b/pixman/pixman-orc.c @@ -127,6 +127,36 @@ orc_combine_xor_u (pixman_implementation_t *imp, } } +static void +orc_combine_add_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) { + orc_code_combine_add_ca (dst, src, mask, width); + } else { + orc_code_combine_add_ca_n (dst, src, width); + } +} + +static void +orc_combine_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) { + orc_code_combine_over_ca (dst, src, mask, width); + } else { + orc_code_combine_over_ca_n (dst, src, width); + } +} + pixman_bool_t pixman_fill_orc (uint32_t *bits, @@ -329,6 +359,46 @@ orc_composite_add_n_8_8 (pixman_implementation_t *imp, } } +static void +orc_composite_over_8888_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *src, *src_line; + uint32_t *dst, *dst_line; + uint8_t *mask, *mask_line; + int src_stride, dst_stride, mask_stride; + + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + orc_composite_over_8888_8_8888_line (dst, src, mask, width); + } +} + + static const pixman_fast_path_t orc_fast_paths[] = { @@ -349,7 +419,9 @@ static const pixman_fast_path_t orc_fast_paths[] = { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, orc_composite_over_n_8_8888 }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, orc_composite_over_n_8_8888 }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, orc_composite_over_8888_8_8888 }, +#endif { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, orc_composite_over_8888_8_8888 }, +#if 0 { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, orc_composite_over_8888_8_8888 }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, orc_composite_over_8888_8_8888 }, { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, orc_composite_over_x888_8_8888 }, @@ -520,7 +592,7 @@ _pixman_implementation_create_orc (void) //imp->combine_32[PIXMAN_OP_SATURATE] = orc_combine_saturate_u; //imp->combine_32_ca[PIXMAN_OP_SRC] = orc_combine_src_ca; - //imp->combine_32_ca[PIXMAN_OP_OVER] = orc_combine_over_ca; + imp->combine_32_ca[PIXMAN_OP_OVER] = orc_combine_over_ca; //imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = orc_combine_over_reverse_ca; //imp->combine_32_ca[PIXMAN_OP_IN] = orc_combine_in_ca; //imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = orc_combine_in_reverse_ca; @@ -529,7 +601,7 @@ _pixman_implementation_create_orc (void) //imp->combine_32_ca[PIXMAN_OP_ATOP] = orc_combine_atop_ca; //imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = orc_combine_atop_reverse_ca; //imp->combine_32_ca[PIXMAN_OP_XOR] = orc_combine_xor_ca; - //imp->combine_32_ca[PIXMAN_OP_ADD] = orc_combine_add_ca; + imp->combine_32_ca[PIXMAN_OP_ADD] = orc_combine_add_ca; imp->composite = orc_composite; imp->blt = orc_blt; diff --git a/pixman/pixman-orccode.orc b/pixman/pixman-orccode.orc index 0d4d850..d39c9a8 100644 --- a/pixman/pixman-orccode.orc +++ b/pixman/pixman-orccode.orc @@ -370,3 +370,110 @@ x4 addusb d1, t4, t5 +.function orc_code_combine_add_ca +.dest 4 d1 +.source 4 s1 +.source 4 s2 +.temp 8 t1 +.temp 8 t2 +.temp 4 t3 + +x4 convubw t1, s1 +x4 convubw t2, s2 +#splatw0q t2, t2 +x4 mullw t1, t1, t2 +x4 div255w t1, t1 +x4 convwb t3, t1 +x4 addusb d1, d1, t3 + + +.function orc_code_combine_add_ca_n +.dest 4 d1 +.source 4 s1 + +x4 addusb d1, d1, s1 + + +.function orc_code_combine_over_ca +.dest 4 d1 +.source 4 s1 +.source 4 s2 +.temp 8 t1 +.temp 8 t2 +.temp 4 d +.temp 8 d_wide +.temp 8 m_wide +.temp 8 s_wide +.temp 8 xa +.temp 4 s + +x4 convubw s_wide, s1 +x4 convubw m_wide, s2 +splatw0q xa, s_wide +x4 mullw s_wide, s_wide, m_wide +x4 div255w s_wide, s_wide +x4 convwb s, s_wide +x4 mullw m_wide, m_wide, xa +x4 div255w m_wide, m_wide +loadl d, d1 +x4 convubw d_wide, d +x4 xorw m_wide, m_wide, 0x00ff +x4 mullw t1, d_wide, m_wide +x4 div255w t1, t1 +x4 convwb d, t1 +x4 addusb d1, d, s + + + +.function orc_code_combine_over_ca_n +.dest 4 d1 +.source 4 s1 +.temp 8 t1 +.temp 8 t2 +.temp 4 d +.temp 8 d_wide +.temp 8 m_wide +.temp 8 s_wide +.temp 8 xa +.temp 4 s + +x4 convubw s_wide, s1 +splatw0q xa, s_wide +x4 convwb s, s_wide +x4 copyw m_wide, xa +loadl d, d1 +x4 convubw d_wide, d +x4 xorw m_wide, m_wide, 0x00ff +x4 mullw t1, d_wide, m_wide +x4 div255w t1, t1 +x4 convwb d, t1 +x4 addusb d1, d, s + + + +.function orc_composite_over_8888_8_8888_line +.dest 4 d1 +.source 4 s1 +.source 1 s2 +.temp 8 t1 +.temp 8 t2 +.temp 4 t3 +.temp 4 d +.temp 4 mask +.temp 8 d_wide + +x4 convubw t1, s1 +splatbl mask, s2 +x4 convubw t2, mask +x4 mullw t1, t1, t2 +x4 div255w t1, t1 +x4 convwb t3, t1 +loadl d, d1 +x4 convubw d_wide, d +x4 xorw t1, t1, 0x00ff +splatw0q t2, t1 +x4 mullw t1, d_wide, t2 +x4 div255w t1, t1 +x4 convwb d, t1 +x4 addusb d1, d, t3 + diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index ed24d20..9887646 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -548,7 +548,7 @@ pixman_implementation_t * _pixman_implementation_create_vmx (pixman_implementation_t *fallback); #endif -#ifdef USE_ORC +#ifdef HAVE_ORC pixman_implementation_t * _pixman_implementation_create_orc (void); #endif diff --git a/test/Makefile.am b/test/Makefile.am index 6134920..80f78d7 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -30,7 +30,7 @@ blitters_test_SOURCES = blitters-test.c utils.c utils.h composite_traps_test_SOURCES = composite-traps-test.c utils.c utils.h ds_LDADD = $(TEST_LDADD) -ds_SOURCES = ds.c ../pixman/pixman-orccode.c +ds_SOURCES = ds.c ../pixman/tmp-orc.c scaling_test_SOURCES = scaling-test.c utils.c utils.h affine_test_SOURCES = affine-test.c utils.c utils.h @@ -164,6 +164,113 @@ combine_xor_u (pixman_implementation_t *imp, } } +static void +combine_mask_ca (uint32_t *src, uint32_t *mask) +{ + uint32_t a = *mask; + + uint32_t x; + uint16_t xa; + + if (!a) + { + *(src) = 0; + return; + } + + x = *(src); + if (a == ~0) + { + x = x >> A_SHIFT; + x |= x << G_SHIFT; + x |= x << R_SHIFT; + *(mask) = x; + return; + } + + xa = x >> A_SHIFT; + UN8x4_MUL_UN8x4 (x, a); + *(src) = x; + + UN8x4_MUL_UN8 (a, xa); + *(mask) = a; +} + +static void +combine_mask_value_ca (uint32_t *src, const uint32_t *mask) +{ + uint32_t a = *mask; + uint32_t x; + + if (!a) + { + *(src) = 0; + return; + } + + if (a == ~0) + return; + + x = *(src); + UN8x4_MUL_UN8x4 (x, a); + *(src) = x; +} + +static void +combine_add_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = *(src + i); + uint32_t m = *(mask + i); + uint32_t d = *(dest + i); + + combine_mask_value_ca (&s, &m); + + UN8x4_ADD_UN8x4 (d, s); + + *(dest + i) = d; + } +} + +static void +combine_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = *(src + i); + uint32_t m = *(mask + i); + uint32_t a; + + combine_mask_ca (&s, &m); + + a = ~m; + if (a) + { + uint32_t d = *(dest + i); + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, a, s); + s = d; + } + + *(dest + i) = s; + } +} + + uint32_t dest_orig[256]; uint32_t dest[256]; @@ -202,11 +309,15 @@ int main (int argc, char *argv[]) for(i=0;i<256;i++){ //src[i]=ORC_ARGB(i,i,0,0); - src[i]=rand_argb(); + //src[i]=rand_argb(); + src[i]=ORC_ARGB(i,i,i,i); } for(i=0;i<256;i++){ - mask[i]=ORC_ARGB(i,0,0,0); + //mask[i]=ORC_ARGB(i,0,0,0); + //mask[i]=ORC_ARGB(i,i,i,i); + mask[i]=rand_argb(); + //mask[i]=ORC_ARGB(255,255,255,255); } //combine_add_u (NULL, 0, dest_ref, src, mask, 256); @@ -215,8 +326,8 @@ int main (int argc, char *argv[]) //combine_over_u (NULL, 0, dest_ref, src, mask, 256); //orc_code_combine_over_u (dest, src, mask, 256); - combine_xor_u (NULL, 0, dest_ref, src, mask, 256); - orc_code_combine_xor_u (dest, src, mask, 256); + combine_over_ca (NULL, 0, dest_ref, src, mask, 256); + orc_code_combine_over_ca (dest, src, mask, 256); #if 0 for(i=0;i<256;i++){ @@ -244,7 +355,7 @@ int main (int argc, char *argv[]) #endif for(i=0;i<256;i++){ - printf("%02x %02x %02x %02x %02x %02x %02x %02x %02x -> " + printf("%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x -> " "%02x %02x %02x %02x %02x %02x %02x %02x %s\n", ORC_ARGB_A(dest_orig[i]), ORC_ARGB_R(dest_orig[i]), @@ -255,6 +366,9 @@ int main (int argc, char *argv[]) ORC_ARGB_G(src[i]), ORC_ARGB_B(src[i]), ORC_ARGB_A(mask[i]), + ORC_ARGB_R(mask[i]), + ORC_ARGB_G(mask[i]), + ORC_ARGB_B(mask[i]), ORC_ARGB_A(dest_ref[i]), ORC_ARGB_R(dest_ref[i]), ORC_ARGB_G(dest_ref[i]), |