diff options
author | David Schleef <ds@schleef.org> | 2010-08-07 17:57:22 -0700 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2011-05-17 10:23:01 -0700 |
commit | 2d5fbcdf55bcd49f03b48e70b42ecf664a3492f8 (patch) | |
tree | 1ae53881fa16ebfadce5d605e0d9dfab6d3939c7 | |
parent | 63bbdef0ca2c7b6f67a673b07c35de5a82a92248 (diff) |
update for current orc
-rw-r--r-- | pixman/pixman-orccode.orc | 272 | ||||
-rw-r--r-- | test/ds.c | 108 |
2 files changed, 302 insertions, 78 deletions
diff --git a/pixman/pixman-orccode.orc b/pixman/pixman-orccode.orc index 969a334..0d4d850 100644 --- a/pixman/pixman-orccode.orc +++ b/pixman/pixman-orccode.orc @@ -50,7 +50,7 @@ copyl d1, s1 .dest 4 d1 .source 4 s1 -addusb4 d1, d1, s1 +x4 addusb d1, d1, s1 .function orc_composite_add_8_8_line @@ -64,133 +64,309 @@ addusb d1, d1, s1 .dest 1 d1 .source 1 s1 .param 1 p1 -.temp 1 t1 +.temp 2 t1 +.temp 1 t2 -compina t1, p1, s1 -addusb d1, d1, t1 +#compina t1, p1, s1 +convubw t1, s1 +mullw t1, t1, p1 +div255w t1, t1 +convwb t2, t1 +addusb d1, d1, t2 .function orc_code_combine_add_u .dest 4 d1 .source 4 s1 .source 4 s2 -.temp 4 t1 +.temp 8 t1 +.temp 8 t2 +.temp 4 t3 -compin t1, s1, s2 -addusb4 d1, d1, t1 +x4 convubw t1, s1 +x4 convubw t2, s2 +splatw0q t2, t2 +x4 mullw t1, t1, t2 +x4 div255w t1, t1 +x4 convwb t3, t1 +x4 addusb d1, d1, t3 .function orc_code_combine_add_u_n .dest 4 d1 .source 4 s1 -addusb4 d1, d1, s1 +x4 addusb d1, d1, s1 .function orc_code_combine_over_u .dest 4 d1 .source 4 s1 .source 4 s2 -.temp 4 t1 +.temp 8 t1 +.temp 8 t2 +.temp 4 t3 +.temp 4 d +.temp 8 d_wide -compin t1, s1, s2 -compover d1, d1, t1 +#compin t1, s1, s2 +#compover d1, d1, t1 +x4 convubw t1, s1 +x4 convubw t2, s2 +splatw0q t2, t2 +x4 mullw t1, t1, t2 +x4 div255w t1, t1 +x4 convwb t3, t1 +# ((d) + (s) - ORC_MULDIV_255((d),(m))) +loadl d, d1 +x4 convubw d_wide, d +x4 xorw t1, t1, 0x00ff +splatw0q t2, t1 +x4 mullw t1, d_wide, t2 +x4 div255w t1, t1 +x4 convwb d, t1 +x4 addusb d1, d, t3 .function orc_code_combine_over_u_n .dest 4 d1 .source 4 s1 - -compover d1, d1, s1 +.temp 8 t1 +.temp 8 t2 +.temp 4 d +.temp 4 s +.temp 8 d_wide + +loadl s, s1 +x4 convubw t1, s +loadl d, d1 +x4 convubw d_wide, d +x4 xorw t1, t1, 0x00ff +splatw0q t2, t1 +x4 mullw t1, d_wide, t2 +x4 div255w t1, t1 +x4 convwb d, t1 +x4 addusb d1, d, s .function orc_code_combine_in_u .dest 4 d1 .source 4 s1 .source 4 s2 -#.temp 4 t1 +.temp 8 d_wide +.temp 8 s_wide +.temp 8 m_wide +.temp 8 t1 .temp 8 t2 -.temp 8 t3 -#compin t1, s1, s2 -convubw4 t2, s1 -convubw4 t3, s2 -splat0w4 t3, t3 -mullw4 t2, t2, t3 -div255w4 t2, t2 -#convwb4 t1, t2 -#compin d1, t1, d1 -#convubw4 t2, t1 -convubw4 t3, d1 -splat0w4 t3, t3 -mullw4 t2, t2, t3 -div255w4 t2, t2 -convwb4 d1, t2 +x4 convubw t1, s1 +x4 convubw t2, s2 +splatw0q t2, t2 +x4 mullw t1, t1, t2 +x4 div255w t1, t1 +# ORC_MULDIV_255((s),(m)), m is from dest +x4 convubw d_wide, d1 +splatw0q t2, d_wide +x4 mullw t1, t1, t2 +x4 div255w t1, t1 +x4 convwb d1, t1 + .function orc_code_combine_in_u_n .dest 4 d1 .source 4 s1 +.temp 8 d_wide +.temp 8 s_wide +.temp 8 m_wide .temp 8 t1 .temp 8 t2 -convubw4 t1, s1 -convubw4 t2, d1 -splat0w4 t2, t2 -mullw4 t1, t1, t2 -div255w4 t1, t1 -convwb4 d1, t1 -#compin d1, s1, d1 +x4 convubw t1, s1 +# ORC_MULDIV_255((s),(m)), m is from dest +x4 convubw d_wide, d1 +splatw0q t2, d_wide +x4 mullw t1, t1, t2 +x4 div255w t1, t1 +x4 convwb d1, t1 .function orc_code_combine_out_u .dest 4 d1 .source 4 s1 .source 4 s2 -.temp 4 t1 +.temp 8 d_wide +.temp 8 s_wide +.temp 8 m_wide +.temp 8 t1 +.temp 8 t2 + +x4 convubw t1, s1 +x4 convubw t2, s2 +splatw0q t2, t2 +x4 mullw t1, t1, t2 +x4 div255w t1, t1 +# ORC_MULDIV_255((s),(m)), m is from dest +x4 convubw d_wide, d1 +splatw0q t2, d_wide +x4 xorw t2, t2, 0x00ff +x4 mullw t1, t1, t2 +x4 div255w t1, t1 +x4 convwb d1, t1 -compin t1, s1, s2 -compout d1, d1, t1 .function orc_code_combine_out_u_n .dest 4 d1 .source 4 s1 +.temp 8 d_wide +.temp 8 s_wide +.temp 8 m_wide +.temp 8 t1 +.temp 8 t2 + +x4 convubw t1, s1 +# ORC_MULDIV_255((s),(m)), m is from dest +x4 convubw d_wide, d1 +splatw0q t2, d_wide +x4 xorw t2, t2, 0x00ff +x4 mullw t1, t1, t2 +x4 div255w t1, t1 +x4 convwb d1, t1 -compout d1, d1, s1 +# atop: (ORC_DIVIDE_255((s)*(da))+ORC_DIVIDE_255((d)*(255-(sa)))) .function orc_code_combine_atop_u .dest 4 d1 .source 4 s1 .source 4 s2 -.temp 4 t1 +.temp 8 d_wide +.temp 8 s_wide +.temp 8 m_wide +.temp 8 t1 +.temp 8 t2 +.temp 8 t3 +.temp 4 t4 +.temp 4 t5 + +x4 convubw t1, s1 +x4 convubw t2, s2 +splatw0q t2, t2 +x4 mullw t1, t1, t2 +x4 div255w t1, t1 + +x4 convubw d_wide, d1 +splatw0q t2, d_wide +x4 mullw t3, t1, t2 +x4 div255w t3, t3 +x4 convwb t4, t3 -compin t1, s1, s2 -compatop d1, d1, t1 +x4 convubw d_wide, d1 +splatw0q t2, t1 +x4 xorw t2, t2, 0x00ff +x4 mullw t1, d_wide, t2 +x4 div255w t1, t1 +x4 convwb t5, t1 + +x4 addusb d1, t4, t5 .function orc_code_combine_atop_u_n .dest 4 d1 .source 4 s1 +.temp 8 d_wide +.temp 8 s_wide +.temp 8 m_wide +.temp 8 t1 +.temp 8 t2 +.temp 8 t3 +.temp 4 t4 +.temp 4 t5 + +x4 convubw t1, s1 + +x4 convubw d_wide, d1 +splatw0q t2, d_wide +x4 mullw t3, t1, t2 +x4 div255w t3, t3 +x4 convwb t4, t3 + +x4 convubw d_wide, d1 +splatw0q t2, t1 +x4 xorw t2, t2, 0x00ff +x4 mullw t1, d_wide, t2 +x4 div255w t1, t1 +x4 convwb t5, t1 + +x4 addusb d1, t4, t5 -compatop d1, d1, s1 .function orc_code_combine_xor_u .dest 4 d1 .source 4 s1 .source 4 s2 -.temp 4 t1 +.temp 8 d_wide +.temp 8 s_wide +.temp 8 m_wide +.temp 8 t1 +.temp 8 t2 +.temp 8 t3 +.temp 4 t4 +.temp 4 t5 + +x4 convubw t1, s1 +x4 convubw t2, s2 +splatw0q t2, t2 +x4 mullw t1, t1, t2 +x4 div255w t1, t1 -compin t1, s1, s2 -compxor d1, d1, t1 +x4 convubw d_wide, d1 +splatw0q t2, d_wide +x4 xorw t2, t2, 0x00ff +x4 mullw t3, t1, t2 +x4 div255w t3, t3 +x4 convwb t4, t3 + +x4 convubw d_wide, d1 +splatw0q t2, t1 +x4 xorw t2, t2, 0x00ff +x4 mullw t1, d_wide, t2 +x4 div255w t1, t1 +x4 convwb t5, t1 + +x4 addusb d1, t4, t5 .function orc_code_combine_xor_u_n .dest 4 d1 .source 4 s1 +.temp 8 d_wide +.temp 8 s_wide +.temp 8 m_wide +.temp 8 t1 +.temp 8 t2 +.temp 8 t3 +.temp 4 t4 +.temp 4 t5 + +x4 convubw t1, s1 +x4 convubw d_wide, d1 +splatw0q t2, d_wide +x4 xorw t2, t2, 0x00ff +x4 mullw t3, t1, t2 +x4 div255w t3, t3 +x4 convwb t4, t3 + +x4 convubw d_wide, d1 +splatw0q t2, t1 +x4 xorw t2, t2, 0x00ff +x4 mullw t1, d_wide, t2 +x4 div255w t1, t1 +x4 convwb t5, t1 + +x4 addusb d1, t4, t5 -compxor d1, d1, s1 @@ -38,6 +38,8 @@ #define randu8() (random()&0xff) +#define static + static uint32_t combine_mask (const uint32_t *src, const uint32_t *mask, int i) { @@ -59,8 +61,46 @@ combine_mask (const uint32_t *src, const uint32_t *mask, int i) return s; } +static void +combine_add_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t d = *(dest + i); + UN8x4_ADD_UN8x4 (d, s); + *(dest + i) = d; + } +} + void -combine_xor_u (pixman_implementation_t *imp, +combine_in_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + uint32_t s = combine_mask (src, mask, i); + uint32_t a = ALPHA_8 (*(dest + i)); + UN8x4_MUL_UN8 (s, a); + *(dest + i) = s; + } +} + +static void +combine_over_u (pixman_implementation_t *imp, pixman_op_t op, uint32_t * dest, const uint32_t * src, @@ -73,22 +113,20 @@ combine_xor_u (pixman_implementation_t *imp, { uint32_t s = combine_mask (src, mask, i); uint32_t d = *(dest + i); - uint32_t src_ia = ALPHA_8 (~s); - uint32_t dest_ia = ALPHA_8 (~d); + uint32_t ia = ALPHA_8 (~s); - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); - *(dest + i) = s; + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *(dest + i) = d; } } -#if 0 static void -combine_add_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) +combine_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; @@ -96,27 +134,32 @@ combine_add_u (pixman_implementation_t *imp, { uint32_t s = combine_mask (src, mask, i); uint32_t d = *(dest + i); - UN8x4_ADD_UN8x4 (d, s); - *(dest + i) = d; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + *(dest + i) = s; } } -#endif static void -combine_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) +combine_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; for (i = 0; i < width; ++i) { uint32_t s = combine_mask (src, mask, i); - uint32_t a = ALPHA_8 (*(dest + i)); - UN8x4_MUL_UN8 (s, a); + uint32_t d = *(dest + i); + uint32_t src_ia = ALPHA_8 (~s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); *(dest + i) = s; } } @@ -151,8 +194,8 @@ int main (int argc, char *argv[]) orc_init(); for(i=0;i<256;i++){ - //dest_orig[i]=ORC_ARGB(255,0,0,255); dest_orig[i]=rand_argb(); + //dest_orig[i]=ORC_ARGB(0,0,0,255); dest[i]=dest_orig[i]; dest_ref[i]=dest_orig[i]; } @@ -163,13 +206,17 @@ int main (int argc, char *argv[]) } for(i=0;i<256;i++){ - mask[i]=ORC_ARGB(255,0,0,0); + mask[i]=ORC_ARGB(i,0,0,0); } - //combine_xor_u (NULL, 0, dest, src, mask, 256); - //combine_xor_u (NULL, 0, dest_ref, src, NULL, 256); - combine_in_u (NULL, 0, dest_ref, src, NULL, 256); - orc_code_combine_in_u_n (dest, src, 256); + //combine_add_u (NULL, 0, dest_ref, src, mask, 256); + //orc_code_combine_add_u (dest, src, mask, 256); + + //combine_over_u (NULL, 0, dest_ref, src, mask, 256); + //orc_code_combine_over_u (dest, src, mask, 256); + + combine_xor_u (NULL, 0, dest_ref, src, mask, 256); + orc_code_combine_xor_u (dest, src, mask, 256); #if 0 for(i=0;i<256;i++){ @@ -197,7 +244,7 @@ int main (int argc, char *argv[]) #endif for(i=0;i<256;i++){ - printf("%02x %02x %02x %02x %02x %02x %02x %02x -> " + printf("%02x %02x %02x %02x %02x %02x %02x %02x %02x -> " "%02x %02x %02x %02x %02x %02x %02x %02x %s\n", ORC_ARGB_A(dest_orig[i]), ORC_ARGB_R(dest_orig[i]), @@ -207,6 +254,7 @@ int main (int argc, char *argv[]) ORC_ARGB_R(src[i]), ORC_ARGB_G(src[i]), ORC_ARGB_B(src[i]), + ORC_ARGB_A(mask[i]), ORC_ARGB_A(dest_ref[i]), ORC_ARGB_R(dest_ref[i]), ORC_ARGB_G(dest_ref[i]), |