summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2010-08-07 17:57:22 -0700
committerDavid Schleef <ds@schleef.org>2011-05-17 10:23:01 -0700
commit2d5fbcdf55bcd49f03b48e70b42ecf664a3492f8 (patch)
tree1ae53881fa16ebfadce5d605e0d9dfab6d3939c7
parent63bbdef0ca2c7b6f67a673b07c35de5a82a92248 (diff)
update for current orc
-rw-r--r--pixman/pixman-orccode.orc272
-rw-r--r--test/ds.c108
2 files changed, 302 insertions, 78 deletions
diff --git a/pixman/pixman-orccode.orc b/pixman/pixman-orccode.orc
index 969a334..0d4d850 100644
--- a/pixman/pixman-orccode.orc
+++ b/pixman/pixman-orccode.orc
@@ -50,7 +50,7 @@ copyl d1, s1
.dest 4 d1
.source 4 s1
-addusb4 d1, d1, s1
+x4 addusb d1, d1, s1
.function orc_composite_add_8_8_line
@@ -64,133 +64,309 @@ addusb d1, d1, s1
.dest 1 d1
.source 1 s1
.param 1 p1
-.temp 1 t1
+.temp 2 t1
+.temp 1 t2
-compina t1, p1, s1
-addusb d1, d1, t1
+#compina t1, p1, s1
+convubw t1, s1
+mullw t1, t1, p1
+div255w t1, t1
+convwb t2, t1
+addusb d1, d1, t2
.function orc_code_combine_add_u
.dest 4 d1
.source 4 s1
.source 4 s2
-.temp 4 t1
+.temp 8 t1
+.temp 8 t2
+.temp 4 t3
-compin t1, s1, s2
-addusb4 d1, d1, t1
+x4 convubw t1, s1
+x4 convubw t2, s2
+splatw0q t2, t2
+x4 mullw t1, t1, t2
+x4 div255w t1, t1
+x4 convwb t3, t1
+x4 addusb d1, d1, t3
.function orc_code_combine_add_u_n
.dest 4 d1
.source 4 s1
-addusb4 d1, d1, s1
+x4 addusb d1, d1, s1
.function orc_code_combine_over_u
.dest 4 d1
.source 4 s1
.source 4 s2
-.temp 4 t1
+.temp 8 t1
+.temp 8 t2
+.temp 4 t3
+.temp 4 d
+.temp 8 d_wide
-compin t1, s1, s2
-compover d1, d1, t1
+#compin t1, s1, s2
+#compover d1, d1, t1
+x4 convubw t1, s1
+x4 convubw t2, s2
+splatw0q t2, t2
+x4 mullw t1, t1, t2
+x4 div255w t1, t1
+x4 convwb t3, t1
+# ((d) + (s) - ORC_MULDIV_255((d),(m)))
+loadl d, d1
+x4 convubw d_wide, d
+x4 xorw t1, t1, 0x00ff
+splatw0q t2, t1
+x4 mullw t1, d_wide, t2
+x4 div255w t1, t1
+x4 convwb d, t1
+x4 addusb d1, d, t3
.function orc_code_combine_over_u_n
.dest 4 d1
.source 4 s1
-
-compover d1, d1, s1
+.temp 8 t1
+.temp 8 t2
+.temp 4 d
+.temp 4 s
+.temp 8 d_wide
+
+loadl s, s1
+x4 convubw t1, s
+loadl d, d1
+x4 convubw d_wide, d
+x4 xorw t1, t1, 0x00ff
+splatw0q t2, t1
+x4 mullw t1, d_wide, t2
+x4 div255w t1, t1
+x4 convwb d, t1
+x4 addusb d1, d, s
.function orc_code_combine_in_u
.dest 4 d1
.source 4 s1
.source 4 s2
-#.temp 4 t1
+.temp 8 d_wide
+.temp 8 s_wide
+.temp 8 m_wide
+.temp 8 t1
.temp 8 t2
-.temp 8 t3
-#compin t1, s1, s2
-convubw4 t2, s1
-convubw4 t3, s2
-splat0w4 t3, t3
-mullw4 t2, t2, t3
-div255w4 t2, t2
-#convwb4 t1, t2
-#compin d1, t1, d1
-#convubw4 t2, t1
-convubw4 t3, d1
-splat0w4 t3, t3
-mullw4 t2, t2, t3
-div255w4 t2, t2
-convwb4 d1, t2
+x4 convubw t1, s1
+x4 convubw t2, s2
+splatw0q t2, t2
+x4 mullw t1, t1, t2
+x4 div255w t1, t1
+# ORC_MULDIV_255((s),(m)), m is from dest
+x4 convubw d_wide, d1
+splatw0q t2, d_wide
+x4 mullw t1, t1, t2
+x4 div255w t1, t1
+x4 convwb d1, t1
+
.function orc_code_combine_in_u_n
.dest 4 d1
.source 4 s1
+.temp 8 d_wide
+.temp 8 s_wide
+.temp 8 m_wide
.temp 8 t1
.temp 8 t2
-convubw4 t1, s1
-convubw4 t2, d1
-splat0w4 t2, t2
-mullw4 t1, t1, t2
-div255w4 t1, t1
-convwb4 d1, t1
-#compin d1, s1, d1
+x4 convubw t1, s1
+# ORC_MULDIV_255((s),(m)), m is from dest
+x4 convubw d_wide, d1
+splatw0q t2, d_wide
+x4 mullw t1, t1, t2
+x4 div255w t1, t1
+x4 convwb d1, t1
.function orc_code_combine_out_u
.dest 4 d1
.source 4 s1
.source 4 s2
-.temp 4 t1
+.temp 8 d_wide
+.temp 8 s_wide
+.temp 8 m_wide
+.temp 8 t1
+.temp 8 t2
+
+x4 convubw t1, s1
+x4 convubw t2, s2
+splatw0q t2, t2
+x4 mullw t1, t1, t2
+x4 div255w t1, t1
+# ORC_MULDIV_255((s),(m)), m is from dest
+x4 convubw d_wide, d1
+splatw0q t2, d_wide
+x4 xorw t2, t2, 0x00ff
+x4 mullw t1, t1, t2
+x4 div255w t1, t1
+x4 convwb d1, t1
-compin t1, s1, s2
-compout d1, d1, t1
.function orc_code_combine_out_u_n
.dest 4 d1
.source 4 s1
+.temp 8 d_wide
+.temp 8 s_wide
+.temp 8 m_wide
+.temp 8 t1
+.temp 8 t2
+
+x4 convubw t1, s1
+# ORC_MULDIV_255((s),(m)), m is from dest
+x4 convubw d_wide, d1
+splatw0q t2, d_wide
+x4 xorw t2, t2, 0x00ff
+x4 mullw t1, t1, t2
+x4 div255w t1, t1
+x4 convwb d1, t1
-compout d1, d1, s1
+# atop: (ORC_DIVIDE_255((s)*(da))+ORC_DIVIDE_255((d)*(255-(sa))))
.function orc_code_combine_atop_u
.dest 4 d1
.source 4 s1
.source 4 s2
-.temp 4 t1
+.temp 8 d_wide
+.temp 8 s_wide
+.temp 8 m_wide
+.temp 8 t1
+.temp 8 t2
+.temp 8 t3
+.temp 4 t4
+.temp 4 t5
+
+x4 convubw t1, s1
+x4 convubw t2, s2
+splatw0q t2, t2
+x4 mullw t1, t1, t2
+x4 div255w t1, t1
+
+x4 convubw d_wide, d1
+splatw0q t2, d_wide
+x4 mullw t3, t1, t2
+x4 div255w t3, t3
+x4 convwb t4, t3
-compin t1, s1, s2
-compatop d1, d1, t1
+x4 convubw d_wide, d1
+splatw0q t2, t1
+x4 xorw t2, t2, 0x00ff
+x4 mullw t1, d_wide, t2
+x4 div255w t1, t1
+x4 convwb t5, t1
+
+x4 addusb d1, t4, t5
.function orc_code_combine_atop_u_n
.dest 4 d1
.source 4 s1
+.temp 8 d_wide
+.temp 8 s_wide
+.temp 8 m_wide
+.temp 8 t1
+.temp 8 t2
+.temp 8 t3
+.temp 4 t4
+.temp 4 t5
+
+x4 convubw t1, s1
+
+x4 convubw d_wide, d1
+splatw0q t2, d_wide
+x4 mullw t3, t1, t2
+x4 div255w t3, t3
+x4 convwb t4, t3
+
+x4 convubw d_wide, d1
+splatw0q t2, t1
+x4 xorw t2, t2, 0x00ff
+x4 mullw t1, d_wide, t2
+x4 div255w t1, t1
+x4 convwb t5, t1
+
+x4 addusb d1, t4, t5
-compatop d1, d1, s1
.function orc_code_combine_xor_u
.dest 4 d1
.source 4 s1
.source 4 s2
-.temp 4 t1
+.temp 8 d_wide
+.temp 8 s_wide
+.temp 8 m_wide
+.temp 8 t1
+.temp 8 t2
+.temp 8 t3
+.temp 4 t4
+.temp 4 t5
+
+x4 convubw t1, s1
+x4 convubw t2, s2
+splatw0q t2, t2
+x4 mullw t1, t1, t2
+x4 div255w t1, t1
-compin t1, s1, s2
-compxor d1, d1, t1
+x4 convubw d_wide, d1
+splatw0q t2, d_wide
+x4 xorw t2, t2, 0x00ff
+x4 mullw t3, t1, t2
+x4 div255w t3, t3
+x4 convwb t4, t3
+
+x4 convubw d_wide, d1
+splatw0q t2, t1
+x4 xorw t2, t2, 0x00ff
+x4 mullw t1, d_wide, t2
+x4 div255w t1, t1
+x4 convwb t5, t1
+
+x4 addusb d1, t4, t5
.function orc_code_combine_xor_u_n
.dest 4 d1
.source 4 s1
+.temp 8 d_wide
+.temp 8 s_wide
+.temp 8 m_wide
+.temp 8 t1
+.temp 8 t2
+.temp 8 t3
+.temp 4 t4
+.temp 4 t5
+
+x4 convubw t1, s1
+x4 convubw d_wide, d1
+splatw0q t2, d_wide
+x4 xorw t2, t2, 0x00ff
+x4 mullw t3, t1, t2
+x4 div255w t3, t3
+x4 convwb t4, t3
+
+x4 convubw d_wide, d1
+splatw0q t2, t1
+x4 xorw t2, t2, 0x00ff
+x4 mullw t1, d_wide, t2
+x4 div255w t1, t1
+x4 convwb t5, t1
+
+x4 addusb d1, t4, t5
-compxor d1, d1, s1
diff --git a/test/ds.c b/test/ds.c
index 24a0ee9..d16d98b 100644
--- a/test/ds.c
+++ b/test/ds.c
@@ -38,6 +38,8 @@
#define randu8() (random()&0xff)
+#define static
+
static uint32_t
combine_mask (const uint32_t *src, const uint32_t *mask, int i)
{
@@ -59,8 +61,46 @@ combine_mask (const uint32_t *src, const uint32_t *mask, int i)
return s;
}
+static void
+combine_add_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
+ int width)
+{
+ int i;
+
+ for (i = 0; i < width; ++i)
+ {
+ uint32_t s = combine_mask (src, mask, i);
+ uint32_t d = *(dest + i);
+ UN8x4_ADD_UN8x4 (d, s);
+ *(dest + i) = d;
+ }
+}
+
void
-combine_xor_u (pixman_implementation_t *imp,
+combine_in_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
+ int width)
+{
+ int i;
+
+ for (i = 0; i < width; ++i)
+ {
+ uint32_t s = combine_mask (src, mask, i);
+ uint32_t a = ALPHA_8 (*(dest + i));
+ UN8x4_MUL_UN8 (s, a);
+ *(dest + i) = s;
+ }
+}
+
+static void
+combine_over_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * dest,
const uint32_t * src,
@@ -73,22 +113,20 @@ combine_xor_u (pixman_implementation_t *imp,
{
uint32_t s = combine_mask (src, mask, i);
uint32_t d = *(dest + i);
- uint32_t src_ia = ALPHA_8 (~s);
- uint32_t dest_ia = ALPHA_8 (~d);
+ uint32_t ia = ALPHA_8 (~s);
- UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
- *(dest + i) = s;
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+ *(dest + i) = d;
}
}
-#if 0
static void
-combine_add_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
+combine_atop_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
+ int width)
{
int i;
@@ -96,27 +134,32 @@ combine_add_u (pixman_implementation_t *imp,
{
uint32_t s = combine_mask (src, mask, i);
uint32_t d = *(dest + i);
- UN8x4_ADD_UN8x4 (d, s);
- *(dest + i) = d;
+ uint32_t dest_a = ALPHA_8 (d);
+ uint32_t src_ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+ *(dest + i) = s;
}
}
-#endif
static void
-combine_in_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
+combine_xor_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
+ int width)
{
int i;
for (i = 0; i < width; ++i)
{
uint32_t s = combine_mask (src, mask, i);
- uint32_t a = ALPHA_8 (*(dest + i));
- UN8x4_MUL_UN8 (s, a);
+ uint32_t d = *(dest + i);
+ uint32_t src_ia = ALPHA_8 (~s);
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
*(dest + i) = s;
}
}
@@ -151,8 +194,8 @@ int main (int argc, char *argv[])
orc_init();
for(i=0;i<256;i++){
- //dest_orig[i]=ORC_ARGB(255,0,0,255);
dest_orig[i]=rand_argb();
+ //dest_orig[i]=ORC_ARGB(0,0,0,255);
dest[i]=dest_orig[i];
dest_ref[i]=dest_orig[i];
}
@@ -163,13 +206,17 @@ int main (int argc, char *argv[])
}
for(i=0;i<256;i++){
- mask[i]=ORC_ARGB(255,0,0,0);
+ mask[i]=ORC_ARGB(i,0,0,0);
}
- //combine_xor_u (NULL, 0, dest, src, mask, 256);
- //combine_xor_u (NULL, 0, dest_ref, src, NULL, 256);
- combine_in_u (NULL, 0, dest_ref, src, NULL, 256);
- orc_code_combine_in_u_n (dest, src, 256);
+ //combine_add_u (NULL, 0, dest_ref, src, mask, 256);
+ //orc_code_combine_add_u (dest, src, mask, 256);
+
+ //combine_over_u (NULL, 0, dest_ref, src, mask, 256);
+ //orc_code_combine_over_u (dest, src, mask, 256);
+
+ combine_xor_u (NULL, 0, dest_ref, src, mask, 256);
+ orc_code_combine_xor_u (dest, src, mask, 256);
#if 0
for(i=0;i<256;i++){
@@ -197,7 +244,7 @@ int main (int argc, char *argv[])
#endif
for(i=0;i<256;i++){
- printf("%02x %02x %02x %02x %02x %02x %02x %02x -> "
+ printf("%02x %02x %02x %02x %02x %02x %02x %02x %02x -> "
"%02x %02x %02x %02x %02x %02x %02x %02x %s\n",
ORC_ARGB_A(dest_orig[i]),
ORC_ARGB_R(dest_orig[i]),
@@ -207,6 +254,7 @@ int main (int argc, char *argv[])
ORC_ARGB_R(src[i]),
ORC_ARGB_G(src[i]),
ORC_ARGB_B(src[i]),
+ ORC_ARGB_A(mask[i]),
ORC_ARGB_A(dest_ref[i]),
ORC_ARGB_R(dest_ref[i]),
ORC_ARGB_G(dest_ref[i]),