diff options
-rw-r--r-- | configure.ac | 1 | ||||
-rw-r--r-- | testsuite/Makefile.am | 4 | ||||
-rw-r--r-- | testsuite/benchmorc/Makefile.am | 6 | ||||
-rw-r--r-- | testsuite/benchmorc/bench10.orc | 4294 | ||||
-rw-r--r-- | testsuite/benchmorc/benchmorc.c | 693 |
5 files changed, 4996 insertions, 2 deletions
diff --git a/configure.ac b/configure.ac index 81a48c8..35e06fd 100644 --- a/configure.ac +++ b/configure.ac @@ -189,6 +189,7 @@ orc/Makefile orc-test/Makefile testsuite/Makefile testsuite/orcc/Makefile +testsuite/benchmorc/Makefile examples/Makefile tools/Makefile orc-uninstalled.pc diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am index e6fc972..2e0044b 100644 --- a/testsuite/Makefile.am +++ b/testsuite/Makefile.am @@ -1,7 +1,7 @@ -DIST_SUBDIRS = orcc +DIST_SUBDIRS = orcc benchmorc -SUBDIRS = orcc +SUBDIRS = orcc benchmorc TESTS_ENVIRONMENT = \ testfile="$(srcdir)/test.orc" diff --git a/testsuite/benchmorc/Makefile.am b/testsuite/benchmorc/Makefile.am new file mode 100644 index 0000000..8bc1152 --- /dev/null +++ b/testsuite/benchmorc/Makefile.am @@ -0,0 +1,6 @@ + +noinst_PROGRAMS = benchmorc + +AM_CFLAGS = $(ORC_CFLAGS) +LIBS = $(ORC_LIBS) $(top_builddir)/orc-test/liborc-test-@ORC_MAJORMINOR@.la + diff --git a/testsuite/benchmorc/bench10.orc b/testsuite/benchmorc/bench10.orc new file mode 100644 index 0000000..cec8860 --- /dev/null +++ b/testsuite/benchmorc/bench10.orc @@ -0,0 +1,4294 @@ +.init bench10_init + +#.init gst_volume_orc_init + +.function orc_scalarmultiply_f32_ns +.dest 4 d1 float +.floatparam 4 p1 + +mulf d1, d1, p1 + + +.function orc_process_int16 +.dest 2 d1 gint16 +.param 2 p1 +.temp 4 t1 + +mulswl t1, d1, p1 +shrsl t1, t1, 13 +convlw d1, t1 + + +.function orc_process_int16_clamp +.dest 2 d1 gint16 +.param 2 p1 +.temp 4 t1 + +mulswl t1, d1, p1 +shrsl t1, t1, 13 +convssslw d1, t1 + + +.function orc_process_int8 +.dest 1 d1 gint8 +.param 1 p1 +.temp 2 t1 + +mulsbw t1, d1, p1 +shrsw t1, t1, 5 +convwb d1, t1 + + +.function orc_process_int8_clamp +.dest 1 d1 gint8 +.param 1 p1 +.temp 2 t1 + +mulsbw t1, d1, p1 +shrsw t1, t1, 5 +convssswb d1, t1 + + + +#.init gst_audio_convert_orc_init + +.function orc_audio_convert_unpack_u8 +.dest 4 d1 gint32 +.source 1 s1 guint8 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 2 t2 +.temp 4 t3 + +convubw t2, s1 +convuwl t3, t2 +shll t3, t3, p1 +xorl d1, t3, c1 + + +.function orc_audio_convert_unpack_s8 +.dest 4 d1 gint32 +.source 1 s1 guint8 +.param 4 p1 +.temp 2 t2 +.temp 4 t3 + +convubw t2, s1 +convuwl t3, t2 +shll d1, t3, p1 + + +.function orc_audio_convert_unpack_u16 +.dest 4 d1 gint32 +.source 2 s1 guint8 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 4 t2 + +convuwl t2, s1 +shll t2, t2, p1 +xorl d1, t2, c1 + + +.function orc_audio_convert_unpack_s16 +.dest 4 d1 gint32 +.source 2 s1 guint8 +.param 4 p1 +.temp 4 t2 + +convuwl t2, s1 +shll d1, t2, p1 + + +.function orc_audio_convert_unpack_u16_swap +.dest 4 d1 gint32 +.source 2 s1 guint8 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 2 t1 +.temp 4 t2 + +swapw t1, s1 +convuwl t2, t1 +shll t2, t2, p1 +xorl d1, t2, c1 + + +.function orc_audio_convert_unpack_s16_swap +.dest 4 d1 gint32 +.source 2 s1 guint8 +.param 4 p1 +.temp 2 t1 +.temp 4 t2 + +swapw t1, s1 +convuwl t2, t1 +shll d1, t2, p1 + + +.function orc_audio_convert_unpack_u32 +.dest 4 d1 gint32 +.source 4 s1 guint8 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 4 t1 + +shll t1, s1, p1 +xorl d1, t1, c1 + + +.function orc_audio_convert_unpack_s32 +.dest 4 d1 gint32 +.source 4 s1 guint8 +.param 4 p1 + +shll d1, s1, p1 + + +.function orc_audio_convert_unpack_u32_swap +.dest 4 d1 gint32 +.source 4 s1 guint8 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 4 t1 + +swapl t1, s1 +shll t1, t1, p1 +xorl d1, t1, c1 + + +.function orc_audio_convert_unpack_s32_swap +.dest 4 d1 gint32 +.source 4 s1 guint8 +.param 4 p1 +.temp 4 t1 + +swapl t1, s1 +shll d1, t1, p1 + +.function orc_audio_convert_unpack_float_s32 +.source 4 s1 gfloat +.dest 4 d1 guint32 +.temp 4 t1 + +loadl t1, s1 +# multiply with 2147483647.0 +mulf t1, t1, 0x4F000000 +# add 0.5 for rounding +addf t1, t1, 0x3F000000 +convfl d1, t1 + +.function orc_audio_convert_unpack_float_s32_swap +.source 4 s1 gfloat +.dest 4 d1 guint32 +.temp 4 t1 + +swapl t1, s1 +# multiply with 2147483647.0 +mulf t1, t1, 0x4F000000 +# add 0.5 for rounding +addf t1, t1, 0x3F000000 +convfl d1, t1 + +.function orc_audio_convert_unpack_float_double +.dest 8 d1 gdouble +.source 4 s1 gfloat + +convfd d1, s1 + +.function orc_audio_convert_unpack_float_double_swap +.dest 8 d1 gdouble +.source 4 s1 gfloat +.temp 4 t1 + +swapl t1, s1 +convfd d1, t1 + +.function orc_audio_convert_unpack_double_double +.dest 8 d1 gdouble +.source 8 s1 gdouble + +copyq d1, s1 + +.function orc_audio_convert_unpack_double_double_swap +.dest 8 d1 gdouble +.source 8 s1 gdouble + +swapq d1, s1 + +.function orc_audio_convert_unpack_u8_double +.dest 8 d1 gdouble +.source 1 s1 guint8 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 2 t2 +.temp 4 t3 + +convubw t2, s1 +convuwl t3, t2 +shll t3, t3, p1 +xorl t3, t3, c1 +convld d1, t3 + +.function orc_audio_convert_unpack_s8_double +.dest 8 d1 gdouble +.source 1 s1 guint8 +.param 4 p1 +.temp 2 t2 +.temp 4 t3 + +convubw t2, s1 +convuwl t3, t2 +shll t3, t3, p1 +convld d1, t3 + +.function orc_audio_convert_unpack_u16_double +.dest 8 d1 gdouble +.source 2 s1 guint8 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 4 t2 + +convuwl t2, s1 +shll t2, t2, p1 +xorl t2, t2, c1 +convld d1, t2 + +.function orc_audio_convert_unpack_s16_double +.dest 8 d1 gdouble +.source 2 s1 guint8 +.param 4 p1 +.temp 4 t2 + +convuwl t2, s1 +shll t2, t2, p1 +convld d1, t2 + +.function orc_audio_convert_unpack_u16_double_swap +.dest 8 d1 gdouble +.source 2 s1 guint8 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 2 t1 +.temp 4 t2 + +swapw t1, s1 +convuwl t2, t1 +shll t2, t2, p1 +xorl t2, t2, c1 +convld d1, t2 + +.function orc_audio_convert_unpack_s16_double_swap +.dest 8 d1 gdouble +.source 2 s1 guint8 +.param 4 p1 +.temp 2 t1 +.temp 4 t2 + +swapw t1, s1 +convuwl t2, t1 +shll t2, t2, p1 +convld d1, t2 + +.function orc_audio_convert_unpack_u32_double +.dest 8 d1 gdouble +.source 4 s1 guint8 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 4 t1 + +shll t1, s1, p1 +xorl t1, t1, c1 +convld d1, t1 + +.function orc_audio_convert_unpack_s32_double +.dest 8 d1 gdouble +.source 4 s1 guint8 +.param 4 p1 +.temp 4 t1 + +shll t1, s1, p1 +convld d1, t1 + +.function orc_audio_convert_unpack_u32_double_swap +.dest 8 d1 gdouble +.source 4 s1 guint8 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 4 t1 + +swapl t1, s1 +shll t1, t1, p1 +xorl t1, t1, c1 +convld d1, t1 + +.function orc_audio_convert_unpack_s32_double_swap +.dest 8 d1 gdouble +.source 4 s1 guint8 +.param 4 p1 +.temp 4 t1 + +swapl t1, s1 +shll t1, t1, p1 +convld d1, t1 + +.function orc_audio_convert_pack_u8 +.dest 1 d1 guint8 +.source 4 s1 gint32 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 4 t1 +.temp 2 t2 + +xorl t1, s1, c1 +shrul t1, t1, p1 +convlw t2, t1 +convwb d1, t2 + + +.function orc_audio_convert_pack_s8 +.dest 1 d1 guint8 +.source 4 s1 gint32 +.param 4 p1 +.temp 4 t1 +.temp 2 t2 + +shrsl t1, s1, p1 +convlw t2, t1 +convwb d1, t2 + + + +.function orc_audio_convert_pack_u16 +.dest 2 d1 guint8 +.source 4 s1 gint32 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 4 t1 + +xorl t1, s1, c1 +shrul t1, t1, p1 +convlw d1, t1 + + +.function orc_audio_convert_pack_s16 +.dest 2 d1 guint8 +.source 4 s1 gint32 +.param 4 p1 +.temp 4 t1 + +shrsl t1, s1, p1 +convlw d1, t1 + + +.function orc_audio_convert_pack_u16_swap +.dest 2 d1 guint8 +.source 4 s1 gint32 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 4 t1 +.temp 2 t2 + +xorl t1, s1, c1 +shrul t1, t1, p1 +convlw t2, t1 +swapw d1, t2 + + +.function orc_audio_convert_pack_s16_swap +.dest 2 d1 guint8 +.source 4 s1 gint32 +.param 4 p1 +.temp 4 t1 +.temp 2 t2 + +shrsl t1, s1, p1 +convlw t2, t1 +swapw d1, t2 + + + +.function orc_audio_convert_pack_u32 +.dest 4 d1 guint8 +.source 4 s1 gint32 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 4 t1 + +xorl t1, s1, c1 +shrul d1, t1, p1 + + +.function orc_audio_convert_pack_s32 +.dest 4 d1 guint8 +.source 4 s1 gint32 +.param 4 p1 + +shrsl d1, s1, p1 + + +.function orc_audio_convert_pack_u32_swap +.dest 4 d1 guint8 +.source 4 s1 gint32 +.param 4 p1 +.const 4 c1 0x80000000 +.temp 4 t1 + +xorl t1, s1, c1 +shrul t1, t1, p1 +swapl d1, t1 + + +.function orc_audio_convert_pack_s32_swap +.dest 4 d1 guint8 +.source 4 s1 gint32 +.param 4 p1 +.temp 4 t1 + +shrsl t1, s1, p1 +swapl d1, t1 + +.function orc_audio_convert_pack_s32_float +.dest 4 d1 gfloat +.source 4 s1 gint32 +.temp 4 t1 + +convlf t1, s1 +# divide by 2147483647.0 +divf t1, t1, 0x4F000000 +storel d1, t1 + +.function orc_audio_convert_pack_s32_float_swap +.dest 4 d1 gfloat +.source 4 s1 gint32 +.temp 4 t1 + +convlf t1, s1 +# divide by 2147483647.0 +divf t1, t1, 0x4F000000 +swapl d1, t1 + +.function orc_audio_convert_pack_double_float +.dest 4 d1 gfloat +.source 8 s1 gdouble + +convdf d1, s1 + +.function orc_audio_convert_pack_double_float_swap +.dest 4 d1 gfloat +.source 8 s1 gdouble +.temp 4 t1 + +convdf t1, s1 +swapl d1, t1 + +.function orc_audio_convert_pack_double_s8 +.dest 1 d1 guint8 +.source 8 s1 gdouble +.param 4 p1 +.temp 4 t1 +.temp 2 t2 + +convdl t1, s1 +shrsl t1, t1, p1 +convlw t2, t1 +convwb d1, t2 + +.function orc_audio_convert_pack_double_s16 +.dest 2 d1 guint8 +.source 8 s1 gdouble +.param 4 p1 +.temp 4 t1 + +convdl t1, s1 +shrsl t1, t1, p1 +convlw d1, t1 + +.function orc_audio_convert_pack_double_s16_swap +.dest 2 d1 guint8 +.source 8 s1 gdouble +.param 4 p1 +.temp 4 t1 +.temp 2 t2 + +convdl t1, s1 +shrsl t1, t1, p1 +convlw t2, t1 +swapw d1, t2 + +.function orc_audio_convert_pack_double_s32 +.dest 4 d1 guint8 +.source 8 s1 gdouble +.param 4 p1 +.temp 4 t1 + +convdl t1, s1 +shrsl d1, t1, p1 + +.function orc_audio_convert_pack_double_s32_swap +.dest 4 d1 guint8 +.source 8 s1 gdouble +.param 4 p1 +.temp 4 t1 + +convdl t1, s1 +shrsl t1, t1, p1 +swapl d1, t1 + + +#.init gst_videotestsrc_orc_init + +.function gst_orc_splat_u8 +.dest 1 d1 guint8 +.param 1 p1 + +copyb d1, p1 + + +.function gst_orc_splat_s16 +.dest 2 d1 gint8 +.param 2 p1 + +copyw d1, p1 + + +.function gst_orc_splat_u16 +.dest 2 d1 guint8 +.param 2 p1 + +copyw d1, p1 + + +.function gst_orc_splat_u32 +.dest 4 d1 guint8 +.param 4 p1 + +copyl d1, p1 + + + +#.init gst_videoscale_orc_init + +.function orc_merge_linear_u8 +.dest 1 d1 +.source 1 s1 +.source 1 s2 +.param 1 p1 +.temp 2 t1 +.temp 2 t2 +.temp 1 a +.temp 1 t + +loadb a, s1 +convubw t1, s1 +convubw t2, s2 +subw t2, t2, t1 +mullw t2, t2, p1 +addw t2, t2, 128 +convhwb t, t2 +addb d1, t, a + + + +.function orc_merge_linear_u16 +.dest 2 d1 +.source 2 s1 +.source 2 s2 +.param 2 p1 +.param 2 p2 +.temp 4 t1 +.temp 4 t2 + +# This is slightly different thatn the u8 case, since muluwl +# tends to be much faster than mulll +muluwl t1, s1, p1 +muluwl t2, s2, p2 +addl t1, t1, t2 +shrul t1, t1, 16 +convlw d1, t1 + + +.function orc_splat_u16 +.dest 2 d1 +.param 2 p1 + +copyw d1, p1 + + +.function orc_splat_u32 +.dest 4 d1 +.param 4 p1 + +copyl d1, p1 + + +.function orc_downsample_u8 +.dest 1 d1 guint8 +.source 2 s1 guint8 +.temp 1 t1 +.temp 1 t2 + +splitwb t1, t2, s1 +avgub d1, t1, t2 + + +.function orc_downsample_u16 +.dest 2 d1 guint16 +.source 4 s1 guint16 +.temp 2 t1 +.temp 2 t2 + +splitlw t1, t2, s1 +avguw d1, t1, t2 + + +.function gst_videoscale_orc_downsample_u32 +.dest 4 d1 guint8 +.source 8 s1 guint8 +.temp 4 t1 +.temp 4 t2 + +splitql t1, t2, s1 +x4 avgub d1, t1, t2 + + +.function gst_videoscale_orc_downsample_yuyv +.dest 4 d1 guint8 +.source 8 s1 guint8 +.temp 4 yyyy +.temp 4 uvuv +.temp 2 t1 +.temp 2 t2 +.temp 2 yy +.temp 2 uv + +x4 splitwb yyyy, uvuv, s1 +x2 splitwb t1, t2, yyyy +x2 avgub yy, t1, t2 +splitlw t1, t2, uvuv +x2 avgub uv, t1, t2 +x2 mergebw d1, yy, uv + + + +.function gst_videoscale_orc_resample_nearest_u8 +.dest 1 d1 guint8 +.source 1 s1 guint8 +.param 4 p1 +.param 4 p2 + +ldresnearb d1, s1, p1, p2 + + +.function gst_videoscale_orc_resample_bilinear_u8 +.dest 1 d1 guint8 +.source 1 s1 guint8 +.param 4 p1 +.param 4 p2 + +ldreslinb d1, s1, p1, p2 + + +.function gst_videoscale_orc_resample_nearest_u32 +.dest 4 d1 guint8 +.source 4 s1 guint8 +.param 4 p1 +.param 4 p2 + +ldresnearl d1, s1, p1, p2 + + +.function gst_videoscale_orc_resample_bilinear_u32 +.dest 4 d1 guint8 +.source 4 s1 guint8 +.param 4 p1 +.param 4 p2 + +ldreslinl d1, s1, p1, p2 + + +.function gst_videoscale_orc_resample_merge_bilinear_u32 +.dest 4 d1 guint8 +.dest 4 d2 guint8 +.source 4 s1 guint8 +.source 4 s2 guint8 +.temp 4 a +.temp 4 b +.temp 4 t +.temp 8 t1 +.temp 8 t2 +.param 4 p1 +.param 4 p2 +.param 4 p3 + +ldreslinl b, s2, p2, p3 +storel d2, b +loadl a, s1 +x4 convubw t1, a +x4 convubw t2, b +x4 subw t2, t2, t1 +x4 mullw t2, t2, p1 +x4 convhwb t, t2 +x4 addb d1, t, a + + + +.function gst_videoscale_orc_merge_bicubic_u8 +.dest 1 d1 guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 +.source 1 s3 guint8 +.source 1 s4 guint8 +.param 4 p1 +.param 4 p2 +.param 4 p3 +.param 4 p4 +.temp 2 t1 +.temp 2 t2 + +mulubw t1, s2, p2 +mulubw t2, s3, p3 +addw t1, t1, t2 +mulubw t2, s1, p1 +subw t1, t1, t2 +mulubw t2, s4, p4 +subw t1, t1, t2 +addw t1, t1, 32 +shrsw t1, t1, 6 +convsuswb d1, t1 + + + +#.init gst_adder_orc_init + +.function add_int32 +.dest 4 d1 gint32 +.source 4 s1 gint32 + +addssl d1, d1, s1 + + +.function add_int16 +.dest 2 d1 gint16 +.source 2 s1 gint16 + +addssw d1, d1, s1 + + +.function add_int8 +.dest 1 d1 gint8 +.source 1 s1 gint8 + +addssb d1, d1, s1 + + +.function add_uint32 +.dest 4 d1 guint32 +.source 4 s1 guint32 + +addusl d1, d1, s1 + + +.function add_uint16 +.dest 2 d1 guint16 +.source 2 s1 guint16 + +addusw d1, d1, s1 + + +.function add_uint8 +.dest 1 d1 guint8 +.source 1 s1 guint8 + +addusb d1, d1, s1 + + +.function add_float32 +.dest 4 d1 float +.source 4 s1 float + +addf d1, d1, s1 + + +.function orc_splat_u32 +.dest 4 d1 guint32 +.param 4 p1 guint32 + +copyl d1, p1 + +.function orc_memcpy_u32 +.dest 4 d1 guint32 +.source 4 s1 guint32 + +copyl d1, s1 + +.function orc_blend_u8 +.flags 2d +.dest 1 d1 guint8 +.source 1 s1 guint8 +.param 2 p1 +.temp 2 t1 +.temp 2 t2 +.const 1 c1 8 + +convubw t1, d1 +convubw t2, s1 +subw t2, t2, t1 +mullw t2, t2, p1 +shlw t1, t1, c1 +addw t2, t1, t2 +shruw t2, t2, c1 +convsuswb d1, t2 + + +.function orc_blend_argb +.flags 2d +.dest 4 d guint8 +.source 4 s guint8 +.param 2 alpha +.temp 4 t +.temp 2 tw +.temp 1 tb +.temp 4 a +.temp 8 d_wide +.temp 8 s_wide +.temp 8 a_wide +.const 4 a_alpha 0x000000ff + +loadl t, s +convlw tw, t +convwb tb, tw +splatbl a, tb +x4 convubw a_wide, a +x4 mullw a_wide, a_wide, alpha +x4 shruw a_wide, a_wide, 8 +x4 convubw s_wide, t +loadl t, d +x4 convubw d_wide, t +x4 subw s_wide, s_wide, d_wide +x4 mullw s_wide, s_wide, a_wide +x4 div255w s_wide, s_wide +x4 addw d_wide, d_wide, s_wide +x4 convwb t, d_wide +orl t, t, a_alpha +storel d, t + +.function orc_blend_bgra +.flags 2d +.dest 4 d guint8 +.source 4 s guint8 +.param 2 alpha +.temp 4 t +.temp 4 t2 +.temp 2 tw +.temp 1 tb +.temp 4 a +.temp 8 d_wide +.temp 8 s_wide +.temp 8 a_wide +.const 4 a_alpha 0xff000000 + +loadl t, s +shrul t2, t, 24 +convlw tw, t2 +convwb tb, tw +splatbl a, tb +x4 convubw a_wide, a +x4 mullw a_wide, a_wide, alpha +x4 shruw a_wide, a_wide, 8 +x4 convubw s_wide, t +loadl t, d +x4 convubw d_wide, t +x4 subw s_wide, s_wide, d_wide +x4 mullw s_wide, s_wide, a_wide +x4 div255w s_wide, s_wide +x4 addw d_wide, d_wide, s_wide +x4 convwb t, d_wide +orl t, t, a_alpha +storel d, t + + +.function orc_splat_u32 +.dest 4 d1 guint32 +.param 4 p1 + +copyl d1, p1 + + +.function deinterlace_line_vfir +.dest 1 d1 guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 +.source 1 s3 guint8 +.source 1 s4 guint8 +.source 1 s5 guint8 +.temp 2 t1 +.temp 2 t2 +.temp 2 t3 + +convubw t1, s1 +convubw t2, s5 +addw t1, t1, t2 +convubw t2, s2 +convubw t3, s4 +addw t2, t2, t3 +shlw t2, t2, 2 +convubw t3, s3 +shlw t3, t3, 1 +subw t2, t2, t1 +addw t2, t2, t3 +addw t2, t2, 4 +shrsw t2, t2, 3 +convsuswb d1, t2 + + +.function deinterlace_line_linear +.dest 1 d1 guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 + +avgub d1, s1, s2 + + +.function deinterlace_line_linear_blend +.dest 1 d1 guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 +.source 1 s3 guint8 +.temp 2 t1 +.temp 2 t2 +.temp 2 t3 + +convubw t1, s1 +convubw t2, s2 +convubw t3, s3 +addw t1, t1, t2 +addw t3, t3, t3 +addw t1, t1, t3 +addw t1, t1, 2 +shrsw t1, t1, 2 +convsuswb d1, t1 + + +.function deinterlace_line_greedy +.dest 1 d1 +.source 1 m0 +.source 1 t1 +.source 1 b1 +.source 1 m2 +.param 1 max_comb +.temp 1 tm0 +.temp 1 tm2 +.temp 1 tb1 +.temp 1 tt1 +.temp 1 avg +.temp 1 l2_diff +.temp 1 lp2_diff +.temp 1 t2 +.temp 1 t3 +.temp 1 best +.temp 1 min +.temp 1 max + + +loadb tm0, m0 +loadb tm2, m2 + +loadb tb1, b1 +loadb tt1, t1 +avgub avg, tt1, tb1 + +maxub t2, tm0, avg +minub t3, tm0, avg +subb l2_diff, t2, t3 + +maxub t2, tm2, avg +minub t3, tm2, avg +subb lp2_diff, t2, t3 + +xorb l2_diff, l2_diff, 0x80 +xorb lp2_diff, lp2_diff, 0x80 +cmpgtsb t3, l2_diff, lp2_diff + +andb t2, tm2, t3 +andnb t3, t3, tm0 +orb best, t2, t3 + +maxub max, tt1, tb1 +minub min, tt1, tb1 +addusb max, max, max_comb +subusb min, min, max_comb +minub best, best, max +maxub d1, best, min + + + + +.function cogorc_memcpy_2d +.flags 2d +.dest 1 d1 guint8 +.source 1 s1 guint8 + +copyb d1, s1 + + +.function cogorc_downsample_horiz_cosite_1tap +.dest 1 d1 guint8 +.source 2 s1 guint8 + +select0wb d1, s1 + + +.function cogorc_downsample_horiz_cosite_3tap +.dest 1 d1 guint8 +.source 2 s1 guint8 +.source 2 s2 guint8 +.temp 1 t1 +.temp 1 t2 +.temp 1 t3 +.temp 2 t4 +.temp 2 t5 +.temp 2 t6 + +copyw t4, s1 +select0wb t1, t4 +select1wb t2, t4 +select0wb t3, s2 +convubw t4, t1 +convubw t5, t2 +convubw t6, t3 +mullw t5, t5, 2 +addw t4, t4, t6 +addw t4, t4, t5 +addw t4, t4, 2 +shrsw t4, t4, 2 +convsuswb d1, t4 + + +.function cogorc_downsample_420_jpeg +.dest 1 d1 guint8 +.source 2 s1 guint8 +.source 2 s2 guint8 +.temp 2 t1 +.temp 1 t2 +.temp 1 t3 +.temp 1 t4 +.temp 1 t5 + +copyw t1, s1 +select0wb t2, t1 +select1wb t3, t1 +avgub t2, t2, t3 +copyw t1, s2 +select0wb t4, t1 +select1wb t5, t1 +avgub t4, t4, t5 +avgub d1, t2, t4 + + +.function cogorc_downsample_vert_halfsite_2tap +.dest 1 d1 guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 + +avgub d1, s1, s2 + + +.function cogorc_downsample_vert_cosite_3tap +.dest 1 d1 guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 +.source 1 s3 guint8 +.temp 2 t1 +.temp 2 t2 +.temp 2 t3 + +convubw t1, s1 +convubw t2, s2 +convubw t3, s3 +mullw t2, t2, 2 +addw t1, t1, t3 +addw t1, t1, t2 +addw t1, t1, 2 +shrsw t1, t1, 2 +convsuswb d1, t1 + + + +.function cogorc_downsample_vert_halfsite_4tap +.dest 1 d1 guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 +.source 1 s3 guint8 +.source 1 s4 guint8 +.temp 2 t1 +.temp 2 t2 +.temp 2 t3 +.temp 2 t4 + +convubw t1, s1 +convubw t2, s2 +convubw t3, s3 +convubw t4, s4 +addw t2, t2, t3 +mullw t2, t2, 26 +addw t1, t1, t4 +mullw t1, t1, 6 +addw t2, t2, t1 +addw t2, t2, 32 +shrsw t2, t2, 6 +convsuswb d1, t2 + + +.function cogorc_upsample_horiz_cosite_1tap +.dest 2 d1 guint8 guint8 +.source 1 s1 guint8 +.temp 1 t1 + +copyb t1, s1 +mergebw d1, t1, t1 + + +.function cogorc_upsample_horiz_cosite +.dest 2 d1 guint8 guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 +.temp 1 t1 +.temp 1 t2 + +copyb t1, s1 +avgub t2, t1, s2 +mergebw d1, t1, t2 + + +.function cogorc_upsample_vert_avgub +.dest 1 d1 guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 + +avgub d1, s1, s2 + + + + +.function orc_unpack_yuyv_y +.dest 1 d1 guint8 +.source 2 s1 guint8 + +select0wb d1, s1 + + +.function orc_unpack_yuyv_u +.dest 1 d1 guint8 +.source 4 s1 guint8 +.temp 2 t1 + +select0lw t1, s1 +select1wb d1, t1 + + +.function orc_unpack_yuyv_v +.dest 1 d1 guint8 +.source 4 s1 guint8 +.temp 2 t1 + +select1lw t1, s1 +select1wb d1, t1 + + +.function orc_pack_yuyv +.dest 4 d1 guint8 +.source 2 s1 guint8 guint8 +.source 1 s2 guint8 +.source 1 s3 guint8 +.temp 1 t1 +.temp 1 t2 +.temp 2 t3 +.temp 2 t4 +.temp 2 t5 + +copyw t5, s1 +select0wb t1, t5 +select1wb t2, t5 +mergebw t3, t1, s2 +mergebw t4, t2, s3 +mergewl d1, t3, t4 + + +.function orc_unpack_uyvy_y +.dest 1 d1 guint8 +.source 2 s1 guint8 + +select1wb d1, s1 + + +.function orc_unpack_uyvy_u +.dest 1 d1 guint8 +.source 4 s1 guint8 +.temp 2 t1 + +select0lw t1, s1 +select0wb d1, t1 + + +.function orc_unpack_uyvy_v +.dest 1 d1 guint8 +.source 4 s1 guint8 +.temp 2 t1 + +select1lw t1, s1 +select0wb d1, t1 + + +.function orc_pack_uyvy +.dest 4 d1 guint8 +.source 2 s1 guint8 guint8 +.source 1 s2 guint8 +.source 1 s3 guint8 +.temp 1 t1 +.temp 1 t2 +.temp 2 t3 +.temp 2 t4 +.temp 2 t5 + +copyw t5, s1 +select0wb t1, t5 +select1wb t2, t5 +mergebw t3, s2, t1 +mergebw t4, s3, t2 +mergewl d1, t3, t4 + + +.function orc_matrix2_u8 +.dest 1 d1 guint8 guint8 +.source 1 s1 guint8 guint8 +.source 1 s2 guint8 guint8 +.param 2 p1 +.param 2 p2 +.param 2 p3 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +mullw t1, t1, p1 +convubw t2, s2 +mullw t2, t2, p2 +addw t1, t1, t2 +addw t1, t1, p3 +shrsw t1, t1, 6 +convsuswb d1, t1 + + +.function orc_matrix2_11_u8 +.dest 1 d1 guint8 guint8 +.source 1 s1 guint8 guint8 +.source 1 s2 guint8 guint8 +.param 2 p1 +.param 2 p2 +.temp 2 t1 +.temp 2 t2 +.temp 2 t3 +.temp 2 t4 + +convubw t1, s1 +subw t1, t1, 16 +mullw t3, t1, p1 +convubw t2, s2 +subw t2, t2, 128 +mullw t4, t2, p2 +addw t3, t3, t4 +addw t3, t3, 128 +shrsw t3, t3, 8 +addw t3, t3, t1 +addw t3, t3, t2 +convsuswb d1, t3 + + +.function orc_matrix2_12_u8 +.dest 1 d1 guint8 guint8 +.source 1 s1 guint8 guint8 +.source 1 s2 guint8 guint8 +.param 2 p1 +.param 2 p2 +.temp 2 t1 +.temp 2 t2 +.temp 2 t3 +.temp 2 t4 + +convubw t1, s1 +subw t1, t1, 16 +mullw t3, t1, p1 +convubw t2, s2 +subw t2, t2, 128 +mullw t4, t2, p2 +addw t3, t3, t4 +addw t3, t3, 128 +shrsw t3, t3, 8 +addw t3, t3, t1 +addw t3, t3, t2 +addw t3, t3, t2 +convsuswb d1, t3 + + +.function orc_matrix3_u8 +.dest 1 d1 guint8 guint8 +.source 1 s1 guint8 guint8 +.source 1 s2 guint8 guint8 +.source 1 s3 guint8 guint8 +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +mullw t1, t1, p1 +convubw t2, s2 +mullw t2, t2, p2 +addw t1, t1, t2 +convubw t2, s3 +mullw t2, t2, p3 +addw t1, t1, t2 +addw t1, t1, p4 +shrsw t1, t1, 6 +convsuswb d1, t1 + + +.function orc_matrix3_100_u8 +.dest 1 d1 guint8 guint8 +.source 1 s1 guint8 guint8 +.source 1 s2 guint8 guint8 +.source 1 s3 guint8 guint8 +.param 2 p1 +.param 2 p2 +.param 2 p3 +.temp 2 t1 +.temp 2 t2 +.temp 2 t3 +#.temp 2 t4 + +convubw t1, s1 +subw t1, t1, 16 +mullw t3, t1, p1 +convubw t2, s2 +subw t2, t2, 128 +mullw t2, t2, p2 +addw t3, t3, t2 +convubw t2, s3 +subw t2, t2, 128 +mullw t2, t2, p3 +addw t3, t3, t2 +addw t3, t3, 128 +shrsw t3, t3, 8 +addw t3, t3, t1 +convsuswb d1, t3 + + +.function orc_matrix3_100_offset_u8 +.dest 1 d1 guint8 guint8 +.source 1 s1 guint8 guint8 +.source 1 s2 guint8 guint8 +.source 1 s3 guint8 guint8 +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.param 2 p5 +#.param 2 p6 +.temp 2 t1 +.temp 2 t2 +.temp 2 t3 +#.temp 2 t3 +#.temp 2 t4 + +convubw t3, s1 +mullw t1, t3, p1 +convubw t2, s2 +mullw t2, t2, p2 +addw t1, t1, t2 +convubw t2, s3 +mullw t2, t2, p3 +addw t1, t1, t2 +addw t1, t1, p4 +shrsw t1, t1, p5 +#addw t1, t1, p6 +addw t1, t1, t3 +convsuswb d1, t1 + + + +.function orc_matrix3_000_u8 +.dest 1 d1 guint8 guint8 +.source 1 s1 guint8 guint8 +.source 1 s2 guint8 guint8 +.source 1 s3 guint8 guint8 +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.param 2 p5 +#.param 2 p6 +.temp 2 t1 +.temp 2 t2 +#.temp 2 t3 +#.temp 2 t4 + +convubw t1, s1 +mullw t1, t1, p1 +convubw t2, s2 +mullw t2, t2, p2 +addw t1, t1, t2 +convubw t2, s3 +mullw t2, t2, p3 +addw t1, t1, t2 +addw t1, t1, p4 +shrsw t1, t1, p5 +#addw t1, t1, p6 +convwb d1, t1 + + + +.function orc_pack_123x +.dest 4 d1 guint32 guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 +.source 1 s3 guint8 +.param 1 p1 +.temp 2 t1 +.temp 2 t2 + +mergebw t1, s1, s2 +mergebw t2, s3, p1 +mergewl d1, t1, t2 + + +.function orc_pack_x123 +.dest 4 d1 guint32 guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 +.source 1 s3 guint8 +.param 1 p1 +.temp 2 t1 +.temp 2 t2 + +mergebw t1, p1, s1 +mergebw t2, s2, s3 +mergewl d1, t1, t2 + + +.function cogorc_combine2_u8 +.dest 1 d1 guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 +.param 2 p1 +.param 2 p2 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +mullw t1, t1, p1 +convubw t2, s2 +mullw t2, t2, p2 +addw t1, t1, t2 +shruw t1, t1, 8 +convsuswb d1, t1 + + +.function cogorc_convert_I420_UYVY +.dest 4 d1 guint8 +.dest 4 d2 guint8 +.source 2 y1 guint8 +.source 2 y2 guint8 +.source 1 u guint8 +.source 1 v guint8 +.temp 2 uv + +mergebw uv, u, v +x2 mergebw d1, uv, y1 +x2 mergebw d2, uv, y2 + + +.function cogorc_convert_I420_YUY2 +.dest 4 d1 guint8 +.dest 4 d2 guint8 +.source 2 y1 guint8 +.source 2 y2 guint8 +.source 1 u guint8 +.source 1 v guint8 +.temp 2 uv + +mergebw uv, u, v +x2 mergebw d1, y1, uv +x2 mergebw d2, y2, uv + + + +.function cogorc_convert_I420_AYUV +.dest 4 d1 guint8 +.dest 4 d2 guint8 +.source 1 y1 guint8 +.source 1 y2 guint8 +.source 1 u guint8 +.source 1 v guint8 +.const 1 c255 255 +.temp 2 uv +.temp 2 ay +.temp 1 tu +.temp 1 tv + +loadupdb tu, u +loadupdb tv, v +mergebw uv, tu, tv +mergebw ay, c255, y1 +mergewl d1, ay, uv +mergebw ay, c255, y2 +mergewl d2, ay, uv + + +.function cogorc_convert_YUY2_I420 +.dest 2 y1 guint8 +.dest 2 y2 guint8 +.dest 1 u guint8 +.dest 1 v guint8 +.source 4 yuv1 guint8 +.source 4 yuv2 guint8 +.temp 2 t1 +.temp 2 t2 +.temp 2 ty + +x2 splitwb t1, ty, yuv1 +storew y1, ty +x2 splitwb t2, ty, yuv2 +storew y2, ty +x2 avgub t1, t1, t2 +splitwb v, u, t1 + + +.function cogorc_convert_UYVY_YUY2 +.flags 2d +.dest 4 yuy2 guint8 +.source 4 uyvy guint8 + +x2 swapw yuy2, uyvy + + +.function cogorc_planar_chroma_420_422 +.flags 2d +.dest 1 d1 guint8 +.dest 1 d2 guint8 +.source 1 s guint8 + +copyb d1, s +copyb d2, s + + +.function cogorc_planar_chroma_420_444 +.flags 2d +.dest 2 d1 guint8 +.dest 2 d2 guint8 +.source 1 s guint8 +.temp 2 t + +splatbw t, s +storew d1, t +storew d2, t + + +.function cogorc_planar_chroma_422_444 +.flags 2d +.dest 2 d1 guint8 +.source 1 s guint8 +.temp 2 t + +splatbw t, s +storew d1, t + + +.function cogorc_planar_chroma_444_422 +.flags 2d +.dest 1 d guint8 +.source 2 s guint8 +.temp 1 t1 +.temp 1 t2 + +splitwb t1, t2, s +avgub d, t1, t2 + + +.function cogorc_planar_chroma_444_420 +.flags 2d +.dest 1 d guint8 +.source 2 s1 guint8 +.source 2 s2 guint8 +.temp 2 t +.temp 1 t1 +.temp 1 t2 + +x2 avgub t, s1, s2 +splitwb t1, t2, t +avgub d, t1, t2 + + +.function cogorc_planar_chroma_422_420 +.flags 2d +.dest 1 d guint8 +.source 1 s1 guint8 +.source 1 s2 guint8 + +avgub d, s1, s2 + + +.function cogorc_convert_YUY2_AYUV +.flags 2d +.dest 8 ayuv guint8 +.source 4 yuy2 guint8 +.const 2 c255 0xff +.temp 2 yy +.temp 2 uv +.temp 4 ayay +.temp 4 uvuv + +x2 splitwb uv, yy, yuy2 +x2 mergebw ayay, c255, yy +mergewl uvuv, uv, uv +x2 mergewl ayuv, ayay, uvuv + + +.function cogorc_convert_UYVY_AYUV +.flags 2d +.dest 8 ayuv guint8 +.source 4 uyvy guint8 +.const 2 c255 0xff +.temp 2 yy +.temp 2 uv +.temp 4 ayay +.temp 4 uvuv + +x2 splitwb yy, uv, uyvy +x2 mergebw ayay, c255, yy +mergewl uvuv, uv, uv +x2 mergewl ayuv, ayay, uvuv + + +.function cogorc_convert_YUY2_Y42B +.flags 2d +.dest 2 y guint8 +.dest 1 u guint8 +.dest 1 v guint8 +.source 4 yuy2 guint8 +.temp 2 uv + +x2 splitwb uv, y, yuy2 +splitwb v, u, uv + + +.function cogorc_convert_UYVY_Y42B +.flags 2d +.dest 2 y guint8 +.dest 1 u guint8 +.dest 1 v guint8 +.source 4 uyvy guint8 +.temp 2 uv + +x2 splitwb y, uv, uyvy +splitwb v, u, uv + + +.function cogorc_convert_YUY2_Y444 +.flags 2d +.dest 2 y guint8 +.dest 2 uu guint8 +.dest 2 vv guint8 +.source 4 yuy2 guint8 +.temp 2 uv +.temp 1 u +.temp 1 v + +x2 splitwb uv, y, yuy2 +splitwb v, u, uv +splatbw uu, u +splatbw vv, v + + +.function cogorc_convert_UYVY_Y444 +.flags 2d +.dest 2 y guint8 +.dest 2 uu guint8 +.dest 2 vv guint8 +.source 4 uyvy guint8 +.temp 2 uv +.temp 1 u +.temp 1 v + +x2 splitwb y, uv, uyvy +splitwb v, u, uv +splatbw uu, u +splatbw vv, v + + +.function cogorc_convert_UYVY_I420 +.dest 2 y1 guint8 +.dest 2 y2 guint8 +.dest 1 u guint8 +.dest 1 v guint8 +.source 4 yuv1 guint8 +.source 4 yuv2 guint8 +.temp 2 t1 +.temp 2 t2 +.temp 2 ty + +x2 splitwb ty, t1, yuv1 +storew y1, ty +x2 splitwb ty, t2, yuv2 +storew y2, ty +x2 avgub t1, t1, t2 +splitwb v, u, t1 + + + +.function cogorc_convert_AYUV_I420 +.flags 2d +.dest 2 y1 guint8 +.dest 2 y2 guint8 +.dest 1 u guint8 +.dest 1 v guint8 +.source 8 ayuv1 guint8 +.source 8 ayuv2 guint8 +.temp 4 ay +.temp 4 uv1 +.temp 4 uv2 +.temp 4 uv +.temp 2 uu +.temp 2 vv +.temp 1 t1 +.temp 1 t2 + +x2 splitlw uv1, ay, ayuv1 +x2 select1wb y1, ay +x2 splitlw uv2, ay, ayuv2 +x2 select1wb y2, ay +x4 avgub uv, uv1, uv2 +x2 splitwb vv, uu, uv +splitwb t1, t2, uu +avgub u, t1, t2 +splitwb t1, t2, vv +avgub v, t1, t2 + + + +.function cogorc_convert_AYUV_YUY2 +.flags 2d +.dest 4 yuy2 guint8 +.source 8 ayuv guint8 +.temp 2 yy +.temp 2 uv1 +.temp 2 uv2 +.temp 4 ayay +.temp 4 uvuv + +x2 splitlw uvuv, ayay, ayuv +splitlw uv1, uv2, uvuv +x2 avgub uv1, uv1, uv2 +x2 select1wb yy, ayay +x2 mergebw yuy2, yy, uv1 + + +.function cogorc_convert_AYUV_UYVY +.flags 2d +.dest 4 yuy2 guint8 +.source 8 ayuv guint8 +.temp 2 yy +.temp 2 uv1 +.temp 2 uv2 +.temp 4 ayay +.temp 4 uvuv + +x2 splitlw uvuv, ayay, ayuv +splitlw uv1, uv2, uvuv +x2 avgub uv1, uv1, uv2 +x2 select1wb yy, ayay +x2 mergebw yuy2, uv1, yy + + + +.function cogorc_convert_AYUV_Y42B +.flags 2d +.dest 2 y guint8 +.dest 1 u guint8 +.dest 1 v guint8 +.source 8 ayuv guint8 +.temp 4 ayay +.temp 4 uvuv +.temp 2 uv1 +.temp 2 uv2 + +x2 splitlw uvuv, ayay, ayuv +splitlw uv1, uv2, uvuv +x2 avgub uv1, uv1, uv2 +splitwb v, u, uv1 +x2 select1wb y, ayay + + +.function cogorc_convert_AYUV_Y444 +.flags 2d +.dest 1 y guint8 +.dest 1 u guint8 +.dest 1 v guint8 +.source 4 ayuv guint8 +.temp 2 ay +.temp 2 uv + +splitlw uv, ay, ayuv +splitwb v, u, uv +select1wb y, ay + + +.function cogorc_convert_Y42B_YUY2 +.flags 2d +.dest 4 yuy2 guint8 +.source 2 y guint8 +.source 1 u guint8 +.source 1 v guint8 +.temp 2 uv + +mergebw uv, u, v +x2 mergebw yuy2, y, uv + + +.function cogorc_convert_Y42B_UYVY +.flags 2d +.dest 4 uyvy guint8 +.source 2 y guint8 +.source 1 u guint8 +.source 1 v guint8 +.temp 2 uv + +mergebw uv, u, v +x2 mergebw uyvy, uv, y + + +.function cogorc_convert_Y42B_AYUV +.flags 2d +.dest 8 ayuv guint8 +.source 2 yy guint8 +.source 1 u guint8 +.source 1 v guint8 +.const 1 c255 255 +.temp 2 uv +.temp 2 ay +.temp 4 uvuv +.temp 4 ayay + +mergebw uv, u, v +x2 mergebw ayay, c255, yy +mergewl uvuv, uv, uv +x2 mergewl ayuv, ayay, uvuv + + +.function cogorc_convert_Y444_YUY2 +.flags 2d +.dest 4 yuy2 guint8 +.source 2 y guint8 +.source 2 u guint8 +.source 2 v guint8 +.temp 2 uv +.temp 4 uvuv +.temp 2 uv1 +.temp 2 uv2 + +x2 mergebw uvuv, u, v +splitlw uv1, uv2, uvuv +x2 avgub uv, uv1, uv2 +x2 mergebw yuy2, y, uv + + +.function cogorc_convert_Y444_UYVY +.flags 2d +.dest 4 uyvy guint8 +.source 2 y guint8 +.source 2 u guint8 +.source 2 v guint8 +.temp 2 uv +.temp 4 uvuv +.temp 2 uv1 +.temp 2 uv2 + +x2 mergebw uvuv, u, v +splitlw uv1, uv2, uvuv +x2 avgub uv, uv1, uv2 +x2 mergebw uyvy, uv, y + + +.function cogorc_convert_Y444_AYUV +.flags 2d +.dest 4 ayuv guint8 +.source 1 yy guint8 +.source 1 u guint8 +.source 1 v guint8 +.const 1 c255 255 +.temp 2 uv +.temp 2 ay + +mergebw uv, u, v +mergebw ay, c255, yy +mergewl ayuv, ay, uv + + + +.function cogorc_convert_AYUV_ARGB +.flags 2d +.dest 4 argb guint8 +.source 4 ayuv guint8 +.temp 2 t1 +.temp 2 t2 +.temp 1 a +.temp 1 y +.temp 1 u +.temp 1 v +.temp 2 wy +.temp 2 wu +.temp 2 wv +.temp 2 wr +.temp 2 wg +.temp 2 wb +.temp 1 r +.temp 1 g +.temp 1 b +.temp 4 x +.const 1 c8 8 + +x4 subb x, ayuv, 128 +splitlw t1, t2, x +splitwb y, a, t2 +splitwb v, u, t1 +convsbw wy, y +convsbw wu, u +convsbw wv, v + +mullw t1, wy, 42 +shrsw t1, t1, c8 +addssw wy, wy, t1 + +addssw wr, wy, wv +mullw t1, wv, 103 +shrsw t1, t1, c8 +subssw wr, wr, t1 +addssw wr, wr, wv + +addssw wb, wy, wu +addssw wb, wb, wu +mullw t1, wu, 4 +shrsw t1, t1, c8 +addssw wb, wb, t1 + +mullw t1, wu, 100 +shrsw t1, t1, c8 +subssw wg, wy, t1 +mullw t1, wv, 104 +shrsw t1, t1, c8 +subssw wg, wg, t1 +subssw wg, wg, t1 + +convssswb r, wr +convssswb g, wg +convssswb b, wb + +mergebw t1, a, r +mergebw t2, g, b +mergewl x, t1, t2 +x4 addb argb, x, 128 + + + +.function cogorc_convert_AYUV_BGRA +.flags 2d +.dest 4 argb guint8 +.source 4 ayuv guint8 +.temp 2 t1 +.temp 2 t2 +.temp 1 a +.temp 1 y +.temp 1 u +.temp 1 v +.temp 2 wy +.temp 2 wu +.temp 2 wv +.temp 2 wr +.temp 2 wg +.temp 2 wb +.temp 1 r +.temp 1 g +.temp 1 b +.temp 4 x +.const 1 c8 8 + +x4 subb x, ayuv, 128 +splitlw t1, t2, x +splitwb y, a, t2 +splitwb v, u, t1 +convsbw wy, y +convsbw wu, u +convsbw wv, v + +mullw t1, wy, 42 +shrsw t1, t1, c8 +addssw wy, wy, t1 + +addssw wr, wy, wv +mullw t1, wv, 103 +shrsw t1, t1, c8 +subssw wr, wr, t1 +addssw wr, wr, wv + +addssw wb, wy, wu +addssw wb, wb, wu +mullw t1, wu, 4 +shrsw t1, t1, c8 +addssw wb, wb, t1 + +mullw t1, wu, 100 +shrsw t1, t1, c8 +subssw wg, wy, t1 +mullw t1, wv, 104 +shrsw t1, t1, c8 +subssw wg, wg, t1 +subssw wg, wg, t1 + +convssswb r, wr +convssswb g, wg +convssswb b, wb + +mergebw t1, b, g +mergebw t2, r, a +mergewl x, t1, t2 +x4 addb argb, x, 128 + + + + +.function cogorc_convert_AYUV_ABGR +.flags 2d +.dest 4 argb guint8 +.source 4 ayuv guint8 +.temp 2 t1 +.temp 2 t2 +.temp 1 a +.temp 1 y +.temp 1 u +.temp 1 v +.temp 2 wy +.temp 2 wu +.temp 2 wv +.temp 2 wr +.temp 2 wg +.temp 2 wb +.temp 1 r +.temp 1 g +.temp 1 b +.temp 4 x +.const 1 c8 8 + +x4 subb x, ayuv, 128 +splitlw t1, t2, x +splitwb y, a, t2 +splitwb v, u, t1 +convsbw wy, y +convsbw wu, u +convsbw wv, v + +mullw t1, wy, 42 +shrsw t1, t1, c8 +addssw wy, wy, t1 + +addssw wr, wy, wv +mullw t1, wv, 103 +shrsw t1, t1, c8 +subssw wr, wr, t1 +addssw wr, wr, wv + +addssw wb, wy, wu +addssw wb, wb, wu +mullw t1, wu, 4 +shrsw t1, t1, c8 +addssw wb, wb, t1 + +mullw t1, wu, 100 +shrsw t1, t1, c8 +subssw wg, wy, t1 +mullw t1, wv, 104 +shrsw t1, t1, c8 +subssw wg, wg, t1 +subssw wg, wg, t1 + +convssswb r, wr +convssswb g, wg +convssswb b, wb + +mergebw t1, a, b +mergebw t2, g, r +mergewl x, t1, t2 +x4 addb argb, x, 128 + + + +.function cogorc_convert_AYUV_RGBA +.flags 2d +.dest 4 argb guint8 +.source 4 ayuv guint8 +.temp 2 t1 +.temp 2 t2 +.temp 1 a +.temp 1 y +.temp 1 u +.temp 1 v +.temp 2 wy +.temp 2 wu +.temp 2 wv +.temp 2 wr +.temp 2 wg +.temp 2 wb +.temp 1 r +.temp 1 g +.temp 1 b +.temp 4 x +.const 1 c8 8 + +x4 subb x, ayuv, 128 +splitlw t1, t2, x +splitwb y, a, t2 +splitwb v, u, t1 +convsbw wy, y +convsbw wu, u +convsbw wv, v + +mullw t1, wy, 42 +shrsw t1, t1, c8 +addssw wy, wy, t1 + +addssw wr, wy, wv +mullw t1, wv, 103 +shrsw t1, t1, c8 +subssw wr, wr, t1 +addssw wr, wr, wv + +addssw wb, wy, wu +addssw wb, wb, wu +mullw t1, wu, 4 +shrsw t1, t1, c8 +addssw wb, wb, t1 + +mullw t1, wu, 100 +shrsw t1, t1, c8 +subssw wg, wy, t1 +mullw t1, wv, 104 +shrsw t1, t1, c8 +subssw wg, wg, t1 +subssw wg, wg, t1 + +convssswb r, wr +convssswb g, wg +convssswb b, wb + +mergebw t1, r, g +mergebw t2, b, a +mergewl x, t1, t2 +x4 addb argb, x, 128 + + + +.function cogorc_convert_I420_BGRA +.dest 4 argb guint8 +.source 1 y guint8 +.source 1 u guint8 +.source 1 v guint8 +.temp 2 t1 +.temp 2 t2 +.temp 1 t3 +.temp 2 wy +.temp 2 wu +.temp 2 wv +.temp 2 wr +.temp 2 wg +.temp 2 wb +.temp 1 r +.temp 1 g +.temp 1 b +.temp 4 x +.const 1 c8 8 +.const 1 c128 128 + +subb t3, y, c128 +convsbw wy, t3 +loadupib t3, u +subb t3, t3, c128 +convsbw wu, t3 +loadupib t3, v +subb t3, t3, c128 +convsbw wv, t3 + +mullw t1, wy, 42 +shrsw t1, t1, c8 +addssw wy, wy, t1 + +addssw wr, wy, wv +mullw t1, wv, 103 +shrsw t1, t1, c8 +subssw wr, wr, t1 +addssw wr, wr, wv + +addssw wb, wy, wu +addssw wb, wb, wu +mullw t1, wu, 4 +shrsw t1, t1, c8 +addssw wb, wb, t1 + +mullw t1, wu, 100 +shrsw t1, t1, c8 +subssw wg, wy, t1 +mullw t1, wv, 104 +shrsw t1, t1, c8 +subssw wg, wg, t1 +subssw wg, wg, t1 + +convssswb r, wr +convssswb g, wg +convssswb b, wb + +mergebw t1, b, g +mergebw t2, r, 255 +mergewl x, t1, t2 +x4 addb argb, x, c128 + + + +.function cogorc_convert_I420_BGRA_avg +.dest 4 argb guint8 +.source 1 y guint8 +.source 1 u1 guint8 +.source 1 u2 guint8 +.source 1 v1 guint8 +.source 1 v2 guint8 +.temp 2 t1 +.temp 2 t2 +.temp 1 t3 +.temp 1 t4 +.temp 2 wy +.temp 2 wu +.temp 2 wv +.temp 2 wr +.temp 2 wg +.temp 2 wb +.temp 1 r +.temp 1 g +.temp 1 b +.temp 4 x +.const 1 c8 8 +.const 1 c128 128 + +subb t3, y, c128 +convsbw wy, t3 +loadupib t3, u1 +loadupib t4, u2 +avgub t3, t3, t4 +subb t3, t3, c128 +convsbw wu, t3 +loadupib t3, v1 +loadupib t4, v2 +avgub t3, t3, t4 +subb t3, t3, c128 +convsbw wv, t3 + +mullw t1, wy, 42 +shrsw t1, t1, c8 +addssw wy, wy, t1 + +addssw wr, wy, wv +mullw t1, wv, 103 +shrsw t1, t1, c8 +subssw wr, wr, t1 +addssw wr, wr, wv + +addssw wb, wy, wu +addssw wb, wb, wu +mullw t1, wu, 4 +shrsw t1, t1, c8 +addssw wb, wb, t1 + +mullw t1, wu, 100 +shrsw t1, t1, c8 +subssw wg, wy, t1 +mullw t1, wv, 104 +shrsw t1, t1, c8 +subssw wg, wg, t1 +subssw wg, wg, t1 + +convssswb r, wr +convssswb g, wg +convssswb b, wb + +mergebw t1, b, g +mergebw t2, r, 255 +mergewl x, t1, t2 +x4 addb argb, x, c128 + + + +.function cogorc_getline_I420 +.dest 4 d guint8 +.source 1 y guint8 +.source 1 u guint8 +.source 1 v guint8 +.const 1 c255 255 +.temp 2 uv +.temp 2 ay +.temp 1 tu +.temp 1 tv + +loadupdb tu, u +loadupdb tv, v +mergebw uv, tu, tv +mergebw ay, c255, y +mergewl d, ay, uv + + +.function cogorc_getline_YUY2 +.dest 8 ayuv guint8 +.source 4 yuy2 guint8 +.const 2 c255 0xff +.temp 2 yy +.temp 2 uv +.temp 4 ayay +.temp 4 uvuv + +x2 splitwb uv, yy, yuy2 +x2 mergebw ayay, c255, yy +mergewl uvuv, uv, uv +x2 mergewl ayuv, ayay, uvuv + + +.function cogorc_getline_UYVY +.dest 8 ayuv guint8 +.source 4 uyvy guint8 +.const 2 c255 0xff +.temp 2 yy +.temp 2 uv +.temp 4 ayay +.temp 4 uvuv + +x2 splitwb yy, uv, uyvy +x2 mergebw ayay, c255, yy +mergewl uvuv, uv, uv +x2 mergewl ayuv, ayay, uvuv + + +.function cogorc_getline_YVYU +.dest 8 ayuv guint8 +.source 4 uyvy guint8 +.const 2 c255 0xff +.temp 2 yy +.temp 2 uv +.temp 4 ayay +.temp 4 uvuv + +x2 splitwb yy, uv, uyvy +x2 mergebw ayay, c255, yy +mergewl uvuv, uv, uv +x2 mergewl ayuv, ayay, uvuv + + +.function cogorc_getline_Y42B +.dest 8 ayuv guint8 +.source 2 yy guint8 +.source 1 u guint8 +.source 1 v guint8 +.const 1 c255 255 +.temp 2 uv +.temp 2 ay +.temp 4 uvuv +.temp 4 ayay + +mergebw uv, u, v +x2 mergebw ayay, c255, yy +mergewl uvuv, uv, uv +x2 mergewl ayuv, ayay, uvuv + + +.function cogorc_getline_Y444 +.dest 4 ayuv guint8 +.source 1 y guint8 +.source 1 u guint8 +.source 1 v guint8 +.const 1 c255 255 +.temp 2 uv +.temp 2 ay + +mergebw uv, u, v +mergebw ay, c255, y +mergewl ayuv, ay, uv + + +.function cogorc_getline_Y800 +.dest 4 ayuv guint8 +.source 1 y guint8 +.const 1 c255 255 +.const 2 c0xffff 0xffff +.temp 2 ay + +mergebw ay, c255, y +mergewl ayuv, ay, c0xffff + + +.function cogorc_getline_BGRA +.dest 4 argb guint8 +.source 4 bgra guint8 + +swapl argb, bgra + + +.function cogorc_getline_ABGR +.dest 4 argb guint8 +.source 4 abgr guint8 +.temp 1 a +.temp 1 r +.temp 1 g +.temp 1 b +.temp 2 gr +.temp 2 ab +.temp 2 ar +.temp 2 gb + +splitlw gr, ab, abgr +splitwb r, g, gr +splitwb b, a, ab +mergebw ar, a, r +mergebw gb, g, b +mergewl argb, ar, gb + + +.function cogorc_getline_RGBA +.dest 4 argb guint8 +.source 4 rgba guint8 +.temp 1 a +.temp 1 r +.temp 1 g +.temp 1 b +.temp 2 rg +.temp 2 ba +.temp 2 ar +.temp 2 gb + +splitlw ba, rg, rgba +splitwb g, r, rg +splitwb a, b, ba +mergebw ar, a, r +mergebw gb, g, b +mergewl argb, ar, gb + + +.function cogorc_getline_NV12 +.dest 8 d guint8 +.source 2 y guint8 +.source 2 uv guint8 +.const 1 c255 255 +.temp 4 ay +.temp 4 uvuv + +mergewl uvuv, uv, uv +x2 mergebw ay, c255, y +x2 mergewl d, ay, uvuv + + +.function cogorc_getline_NV21 +.dest 8 d guint8 +.source 2 y guint8 +.source 2 vu guint8 +.const 1 c255 255 +.temp 2 uv +.temp 4 ay +.temp 4 uvuv + +swapw uv, vu +mergewl uvuv, uv, uv +x2 mergebw ay, c255, y +x2 mergewl d, ay, uvuv + + +.function cogorc_putline_I420 +.dest 2 y guint8 +.dest 1 u guint8 +.dest 1 v guint8 +.source 8 ayuv guint8 +.temp 4 ay +.temp 4 uv +.temp 2 uu +.temp 2 vv +.temp 1 t1 +.temp 1 t2 + +x2 splitlw uv, ay, ayuv +x2 select1wb y, ay +x2 splitwb vv, uu, uv +splitwb t1, t2, uu +avgub u, t1, t2 +splitwb t1, t2, vv +avgub v, t1, t2 + + + +.function cogorc_putline_YUY2 +.dest 4 yuy2 guint8 +.source 8 ayuv guint8 +.temp 2 yy +.temp 2 uv1 +.temp 2 uv2 +.temp 4 ayay +.temp 4 uvuv + +x2 splitlw uvuv, ayay, ayuv +splitlw uv1, uv2, uvuv +x2 avgub uv1, uv1, uv2 +x2 select1wb yy, ayay +x2 mergebw yuy2, yy, uv1 + + +.function cogorc_putline_YVYU +.dest 4 yuy2 guint8 +.source 8 ayuv guint8 +.temp 2 yy +.temp 2 uv1 +.temp 2 uv2 +.temp 4 ayay +.temp 4 uvuv + +x2 splitlw uvuv, ayay, ayuv +splitlw uv1, uv2, uvuv +x2 avgub uv1, uv1, uv2 +x2 select1wb yy, ayay +swapw uv1, uv1 +x2 mergebw yuy2, yy, uv1 + + +.function cogorc_putline_UYVY +.dest 4 yuy2 guint8 +.source 8 ayuv guint8 +.temp 2 yy +.temp 2 uv1 +.temp 2 uv2 +.temp 4 ayay +.temp 4 uvuv + +x2 splitlw uvuv, ayay, ayuv +splitlw uv1, uv2, uvuv +x2 avgub uv1, uv1, uv2 +x2 select1wb yy, ayay +x2 mergebw yuy2, uv1, yy + + + +.function cogorc_putline_Y42B +.dest 2 y guint8 +.dest 1 u guint8 +.dest 1 v guint8 +.source 8 ayuv guint8 +.temp 4 ayay +.temp 4 uvuv +.temp 2 uv1 +.temp 2 uv2 + +x2 splitlw uvuv, ayay, ayuv +splitlw uv1, uv2, uvuv +x2 avgub uv1, uv1, uv2 +splitwb v, u, uv1 +x2 select1wb y, ayay + + +.function cogorc_putline_Y444 +.dest 1 y guint8 +.dest 1 u guint8 +.dest 1 v guint8 +.source 4 ayuv guint8 +.temp 2 ay +.temp 2 uv + +splitlw uv, ay, ayuv +splitwb v, u, uv +select1wb y, ay + + +.function cogorc_putline_Y800 +.dest 1 y guint8 +.source 4 ayuv guint8 +.temp 2 ay + +select0lw ay, ayuv +select1wb y, ay + + +.function cogorc_putline_BGRA +.dest 4 bgra guint8 +.source 4 argb guint8 + +swapl bgra, argb + + +.function cogorc_putline_ABGR +.dest 4 abgr guint8 +.source 4 argb guint8 +.temp 1 a +.temp 1 r +.temp 1 g +.temp 1 b +.temp 2 gr +.temp 2 ab +.temp 2 ar +.temp 2 gb + +splitlw gb, ar, argb +splitwb b, g, gb +splitwb r, a, ar +mergebw ab, a, b +mergebw gr, g, r +mergewl abgr, ab, gr + + +.function cogorc_putline_RGBA +.dest 4 rgba guint8 +.source 4 argb guint8 +.temp 1 a +.temp 1 r +.temp 1 g +.temp 1 b +.temp 2 rg +.temp 2 ba +.temp 2 ar +.temp 2 gb + +splitlw gb, ar, argb +splitwb b, g, gb +splitwb r, a, ar +mergebw ba, b, a +mergebw rg, r, g +mergewl rgba, rg, ba + + +.function cogorc_putline_NV12 +.dest 2 y guint8 +.dest 2 uv guint8 +.source 8 ayuv guint8 +.temp 4 ay +.temp 4 uvuv +.temp 2 uv1 +.temp 2 uv2 + +x2 splitlw uvuv, ay, ayuv +x2 select1wb y, ay +splitlw uv1, uv2, uvuv +x2 avgub uv, uv1, uv2 + + +.function cogorc_putline_NV21 +.dest 2 y guint8 +.dest 2 vu guint8 +.source 8 ayuv guint8 +.temp 4 ay +.temp 4 uvuv +.temp 2 uv1 +.temp 2 uv2 +.temp 2 uv + +x2 splitlw uvuv, ay, ayuv +x2 select1wb y, ay +splitlw uv1, uv2, uvuv +x2 avgub uv, uv1, uv2 +swapw vu, uv + + + +#.init schro_orc_init + +.function orc_add2_rshift_add_s16_22_op +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.source 2 s3 int16_t +.temp 2 t1 + +addw t1, s2, s3 +addw t1, t1, 2 +shrsw t1, t1, 2 +addw d1, s1, t1 + + +.function orc_add2_rshift_add_s16_22 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 + +addw t1, s1, s2 +addw t1, t1, 2 +shrsw t1, t1, 2 +addw d1, d1, t1 + + +.function orc_add2_rshift_sub_s16_22_op +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.source 2 s3 int16_t +.temp 2 t1 + +addw t1, s2, s3 +addw t1, t1, 2 +shrsw t1, t1, 2 +subw d1, s1, t1 + + +.function orc_add2_rshift_sub_s16_22 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 + +addw t1, s1, s2 +addw t1, t1, 2 +shrsw t1, t1, 2 +subw d1, d1, t1 + + +.function orc_add2_rshift_add_s16_11_op +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.source 2 s3 int16_t +.temp 2 t1 + +avgsw t1, s2, s3 +addw d1, s1, t1 + + +.function orc_add2_rshift_add_s16_11 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 + +avgsw t1, s1, s2 +addw d1, d1, t1 + + +.function orc_add2_rshift_sub_s16_11_op +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.source 2 s3 int16_t +.temp 2 t1 + +avgsw t1, s2, s3 +subw d1, s1, t1 + + +.function orc_add2_rshift_sub_s16_11 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 + +avgsw t1, s1, s2 +subw d1, d1, t1 + + +.function orc_add_const_rshift_s16_11 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.temp 2 t1 + +addw t1, s1, 1 +shrsw d1, t1, 1 + + +.function orc_add_const_rshift_s16 +.dest 2 d1 int16_t +.param 2 p1 +.param 2 p2 +.temp 2 t1 + +addw t1, d1, p1 +shrsw d1, t1, p2 + + +.function orc_add_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t + +addw d1, s1, s2 + + +.function orc_add_s16_2d +.flags 2d +.dest 2 d1 int16_t +.source 2 s1 int16_t + +addw d1, d1, s1 + + +.function orc_addc_rshift_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.param 2 p1 + +addw t1, s1, s2 +shrsw d1, t1, p1 + + +.function orc_lshift1_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t + +shlw d1, s1, 1 + + +.function orc_lshift2_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t + +shlw d1, s1, 2 + + +.function orc_lshift_s16_ip +.dest 2 d1 int16_t +.param 2 p1 + +shlw d1, d1, p1 + + +.function orc_mas2_add_s16_op +.dest 2 d1 int16_t +.source 2 s0 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.temp 4 t2 +.param 2 p1 +.param 4 p2 +.param 4 p3 + +addw t1, s1, s2 +mulswl t2, t1, p1 +addl t2, t2, p2 +shrsl t2, t2, p3 +convlw t1, t2 +addw d1, s0, t1 + + +.function orc_mas2_add_s16_ip +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.temp 4 t2 +.param 2 p1 +.param 4 p2 +.param 4 p3 + +addw t1, s1, s2 +mulswl t2, t1, p1 +addl t2, t2, p2 +shrsl t2, t2, p3 +convlw t1, t2 +addw d1, d1, t1 + + +.function orc_mas2_sub_s16_op +.dest 2 d1 int16_t +.source 2 s0 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.temp 4 t2 +.param 2 p1 +.param 4 p2 +.param 4 p3 + +addw t1, s1, s2 +mulswl t2, t1, p1 +addl t2, t2, p2 +shrsl t2, t2, p3 +convlw t1, t2 +subw d1, s0, t1 + + +.function orc_mas2_sub_s16_ip +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.temp 4 t2 +.param 2 p1 +.param 4 p2 +.param 4 p3 + +addw t1, s1, s2 +mulswl t2, t1, p1 +addl t2, t2, p2 +shrsl t2, t2, p3 +convlw t1, t2 +subw d1, d1, t1 + + +.function orc_mas4_across_add_s16_1991_op +.dest 2 d1 int16_t +.source 2 s0 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.source 2 s3 int16_t +.source 2 s4 int16_t +.param 4 p1 +.param 4 p2 +.temp 2 t1 +.temp 2 t2 +.temp 4 t3 +.temp 4 t4 + +addw t1, s2, s3 +mulswl t3, t1, 9 +addw t2, s1, s4 +convswl t4, t2 +subl t3, t3, t4 +addl t3, t3, p1 +shrsl t3, t3, p2 +convlw t1, t3 +addw d1, s0, t1 + + +.function orc_mas4_across_add_s16_1991_ip +.dest 2 d1 int16_t +.source 2 s1 int16_t +.param 4 p1 +.param 4 p2 +.temp 2 t1 +.temp 2 t2 +.temp 4 t3 +.temp 4 t4 + +loadoffw t1, s1, 1 +loadoffw t2, s1, 2 +addw t1, t1, t2 +mulswl t3, t1, 9 +loadw t1, s1 +loadoffw t2, s1, 3 +addw t2, t1, t2 +convswl t4, t2 +subl t3, t3, t4 +addl t3, t3, p1 +shrsl t3, t3, p2 +convlw t1, t3 +addw d1, d1, t1 + + +.function orc_mas4_across_sub_s16_1991_op +.dest 2 d1 int16_t +.source 2 s0 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.source 2 s3 int16_t +.source 2 s4 int16_t +.param 4 p1 +.param 4 p2 +.temp 2 t1 +.temp 2 t2 +.temp 4 t3 +.temp 4 t4 + +addw t1, s2, s3 +mulswl t3, t1, 9 +addw t2, s1, s4 +convswl t4, t2 +subl t3, t3, t4 +addl t3, t3, p1 +shrsl t3, t3, p2 +convlw t1, t3 +subw d1, s0, t1 + + +.function orc_mas4_across_sub_s16_1991_ip +.dest 2 d1 int16_t +.source 2 s1 int16_t +.param 4 p1 +.param 4 p2 +.temp 2 t1 +.temp 2 t2 +.temp 4 t3 +.temp 4 t4 + +loadoffw t1, s1, 1 +loadoffw t2, s1, 2 +addw t1, t1, t2 +mulswl t3, t1, 9 +loadw t1, s1 +loadoffw t2, s1, 3 +addw t2, t1, t2 +convswl t4, t2 +subl t3, t3, t4 +addl t3, t3, p1 +shrsl t3, t3, p2 +convlw t1, t3 +subw d1, d1, t1 + + +.function orc_subtract_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t + +subw d1, s1, s2 + + +.function orc_add_s16_u8 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 1 s2 +.temp 2 t1 + +convubw t1, s2 +addw d1, t1, s1 + + +.function orc_add_s16_u8_2d +.flags 2d +.dest 2 d1 int16_t +.source 1 s1 +.temp 2 t1 + +convubw t1, s1 +addw d1, d1, t1 + + +.function orc_convert_s16_u8 +.dest 2 d1 +.source 1 s1 + +convubw d1, s1 + + +.function orc_convert_u8_s16 +.dest 1 d1 +.source 2 s1 int16_t + +convsuswb d1, s1 + + +.function orc_offsetconvert_u8_s16 +.dest 1 d1 +.source 2 s1 int16_t +.temp 2 t1 + +addw t1, s1, 128 +convsuswb d1, t1 + + +.function orc_offsetconvert_s16_u8 +.dest 2 d1 int16_t +.source 1 s1 +.temp 2 t1 + +convubw t1, s1 +subw d1, t1, 128 + + +.function orc_subtract_s16_u8 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 1 s2 +.temp 2 t1 + +convubw t1, s2 +subw d1, s1, t1 + + +.function orc_multiply_and_add_s16_u8 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 1 s2 +.temp 2 t1 + +convubw t1, s2 +mullw t1, t1, s1 +addw d1, d1, t1 + + +.function orc_splat_s16_ns +.dest 2 d1 int16_t +.param 2 p1 + +copyw d1, p1 + + +.function orc_splat_s16_2d_4xn +.n 4 +.flags 2d +.dest 2 d1 int16_t +.param 2 p1 + +copyw d1, p1 + + +.function orc_splat_s16_2d_8xn +.n 8 +.flags 2d +.dest 2 d1 int16_t +.param 2 p1 + +copyw d1, p1 + + +.function orc_splat_s16_2d +.flags 2d +.dest 2 d1 int16_t +.param 2 p1 + +copyw d1, p1 + + +.function orc_splat_u8_ns +.dest 1 d1 +.param 1 p1 + +copyb d1, p1 + + +.function orc_splat_u8_2d +.flags 2d +.dest 1 d1 +.param 1 p1 + +copyb d1, p1 + + +.function orc_average_u8 +.dest 1 d1 +.source 1 s1 +.source 1 s2 + +avgub d1, s1, s2 + + +.function orc_rrshift6_add_s16_2d +.flags 2d +.dest 1 d1 uint8_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 + +addw t1, s2, 32 +shrsw t1, t1, 6 +addw t1, s1, t1 +convsuswb d1, t1 + + +.function orc_rrshift6_sub_s16_2d +.flags 2d +.dest 2 d1 int16_t +.dest 2 d2 int16_t +.temp 2 t1 + +subw t1, d2, 8160 +shrsw t1, t1, 6 +copyw d2, t1 +subw d1, d1, t1 + + +.function orc_rrshift6_s16_ip_2d +.flags 2d +.dest 2 d1 int16_t +.temp 2 t1 + +subw t1, d1, 8160 +shrsw d1, t1, 6 + + +.function orc_rrshift6_s16_ip +.dest 2 d1 int16_t +.temp 2 t1 + +subw t1, d1, 8160 +shrsw d1, t1, 6 + + +.function orc_unpack_yuyv_y +.dest 1 d1 +.source 2 s1 + +select0wb d1, s1 + + +.function orc_unpack_yuyv_u +.dest 1 d1 +.source 4 s1 +.temp 2 t1 + +select0lw t1, s1 +select1wb d1, t1 + + +.function orc_unpack_yuyv_v +.dest 1 d1 +.source 4 s1 +.temp 2 t1 + +select1lw t1, s1 +select1wb d1, t1 + + +.function orc_packyuyv +.dest 4 d1 +.source 2 s1 uint8_t +.source 1 s2 +.source 1 s3 +.temp 1 t1 +.temp 1 t2 +.temp 2 t3 +.temp 2 t4 +.temp 2 t5 + +copyw t5, s1 +select0wb t1, t5 +select1wb t2, t5 +mergebw t3, t1, s2 +mergebw t4, t2, s3 +mergewl d1, t3, t4 + + +.function orc_unpack_uyvy_y +.dest 1 d1 +.source 2 s1 + +select1wb d1, s1 + + +.function orc_unpack_uyvy_u +.dest 1 d1 +.source 4 s1 +.temp 2 t1 + +select0lw t1, s1 +select0wb d1, t1 + + +.function orc_unpack_uyvy_v +.dest 1 d1 +.source 4 s1 +.temp 2 t1 + +select1lw t1, s1 +select0wb d1, t1 + + +.function orc_interleave2_s16 +.dest 4 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t + +mergewl d1, s1, s2 + + +.function orc_interleave2_rrshift1_s16 +.dest 4 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.temp 2 t2 + +addw t1, s1, 1 +shrsw t1, t1, 1 +addw t2, s2, 1 +shrsw t2, t2, 1 +mergewl d1, t1, t2 + + +.function orc_deinterleave2_s16 +.dest 2 d1 int16_t +.dest 2 d2 int16_t +.source 4 s1 int16_t +.temp 4 t1 + +copyl t1, s1 +select0lw d1, t1 +select1lw d2, t1 + + +.function orc_deinterleave2_lshift1_s16 +.dest 2 d1 int16_t +.dest 2 d2 int16_t +.source 4 s1 int16_t +.temp 4 t1 +.temp 2 t2 +.temp 2 t3 + +copyl t1, s1 +select0lw t2, t1 +shlw d1, t2, 1 +select1lw t3, t1 +shlw d2, t3, 1 + + +.function orc_haar_deint_lshift1_split_s16 +.dest 2 d1 int16_t +.dest 2 d2 int16_t +.source 4 s1 int16_t +.temp 2 t1 +.temp 2 t2 +.temp 4 t3 + +copyl t3, s1 +select0lw t1, t3 +select1lw t2, t3 +shlw t1, t1, 1 +shlw t2, t2, 1 +subw t2, t2, t1 +copyw d2, t2 +avgsw t2, t2, 0 +addw d1, t1, t2 + + +.function orc_haar_deint_split_s16 +.dest 2 d1 int16_t +.dest 2 d2 int16_t +.source 4 s1 int16_t +.temp 2 t1 +.temp 2 t2 +.temp 4 t3 + +copyl t3, s1 +select0lw t1, t3 +select1lw t2, t3 +subw t2, t2, t1 +copyw d2, t2 +avgsw t2, t2, 0 +addw d1, t1, t2 + + +.function orc_haar_split_s16_lo +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.temp 2 t2 + +copyw t1, s1 +subw t2, s2, t1 +avgsw t2, t2, 0 +addw d1, t1, t2 + + +.function orc_haar_split_s16_hi +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t + +subw d1, s2, s1 + + +.function orc_haar_split_s16_op +.dest 2 d1 int16_t +.dest 2 d2 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.temp 2 t2 + +copyw t1, s1 +subw t2, s2, t1 +copyw d2, t2 +avgsw t2, t2, 0 +addw d1, t1, t2 + + +.function orc_haar_split_s16 +.dest 2 d1 int16_t +.dest 2 d2 int16_t +.temp 2 t1 +.temp 2 t2 + +copyw t1, d1 +copyw t2, d2 +subw t2, t2, t1 +copyw d2, t2 +avgsw t2, t2, 0 +addw d1, t1, t2 + + +.function orc_haar_synth_s16_lo +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 + +avgsw t1, s2, 0 +subw d1, s1, t1 + + +.function orc_haar_synth_s16_hi +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.temp 2 t2 +.temp 2 t3 + +copyw t2, s2 +avgsw t3, t2, 0 +subw t1, s1, t3 +addw d1, t2, t1 + + +.function orc_haar_synth_s16_op +.dest 2 d1 int16_t +.dest 2 d2 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.temp 2 t2 +.temp 2 t3 + +copyw t2, s2 +avgsw t3, t2, 0 +subw t1, s1, t3 +copyw d1, t1 +addw d2, t2, t1 + + +.function orc_haar_synth_s16 +.dest 2 d1 int16_t +.dest 2 d2 int16_t +.temp 2 t1 +.temp 2 t2 +.temp 2 t3 + +copyw t1, d1 +copyw t2, d2 +avgsw t3, t2, 0 +subw t1, t1, t3 +copyw d1, t1 +addw d2, t2, t1 + + +.function orc_haar_synth_rrshift1_int_s16 +.dest 4 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.temp 2 t2 + +copyw t2, s2 +avgsw t1, t2, 0 +subw t1, s1, t1 +addw t2, t2, t1 +avgsw t1, t1, 0 +avgsw t2, t2, 0 +mergewl d1, t1, t2 + + +.function orc_haar_synth_int_s16 +.dest 4 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.temp 2 t2 + +copyw t2, s2 +avgsw t1, t2, 0 +subw t1, s1, t1 +addw t2, t2, t1 +mergewl d1, t1, t2 + + +.function orc_haar_sub_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t + +subw d1, d1, s1 + + +.function orc_haar_add_half_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.temp 2 t1 + +avgsw t1, s1, 0 +addw d1, d1, t1 + + +.function orc_haar_add_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t + +addw d1, d1, s1 + + +.function orc_haar_sub_half_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.temp 2 t1 + +avgsw t1, s1, 0 +subw d1, d1, t1 + + +.function orc_sum_u8 +.accumulator 4 a1 int32_t +.source 1 s1 +.temp 2 t1 +.temp 4 t2 + +convubw t1, s1 +convuwl t2, t1 +accl a1, t2 + + +.function orc_sum_s16 +.accumulator 4 a1 int32_t +.source 2 s1 int16_t +.temp 4 t1 + +convswl t1, s1 +accl a1, t1 + + +.function orc_sum_square_diff_u8 +.accumulator 4 a1 int32_t +.source 1 s1 +.source 1 s2 +.temp 2 t1 +.temp 2 t2 +.temp 4 t3 + +convubw t1, s1 +convubw t2, s2 +subw t1, t1, t2 +mullw t1, t1, t1 +convuwl t3, t1 +accl a1, t3 + + +.function orc_dequantise_s16_2d_4xn +.n 4 +.flags 2d +.dest 2 d1 int16_t +.source 2 s1 int16_t +.param 2 p1 +.param 2 p2 +.temp 2 t1 +.temp 2 t2 + +copyw t1, s1 +signw t2, t1 +absw t1, t1 +mullw t1, t1, p1 +addw t1, t1, p2 +shrsw t1, t1, 2 +mullw d1, t1, t2 + + +.function orc_dequantise_s16_2d_8xn +.n 8 +.flags 2d +.dest 2 d1 int16_t +.source 2 s1 int16_t +.param 2 p1 +.param 2 p2 +.temp 2 t1 +.temp 2 t2 + +copyw t1, s1 +signw t2, t1 +absw t1, t1 +mullw t1, t1, p1 +addw t1, t1, p2 +shrsw t1, t1, 2 +mullw d1, t1, t2 + + +.function orc_dequantise_s16_ip_2d_8xn +.n 8 +.flags 2d +.dest 2 d1 int16_t +.param 2 p1 +.param 2 p2 +.temp 2 t1 +.temp 2 t2 + +copyw t1, d1 +signw t2, t1 +absw t1, t1 +mullw t1, t1, p1 +addw t1, t1, p2 +shrsw t1, t1, 2 +mullw d1, t1, t2 + + +.function orc_dequantise_s16_ip_2d +.flags 2d +.dest 2 d1 int16_t +.param 2 p1 +.param 2 p2 +.temp 2 t1 +.temp 2 t2 + +copyw t1, d1 +signw t2, t1 +absw t1, t1 +mullw t1, t1, p1 +addw t1, t1, p2 +shrsw t1, t1, 2 +mullw d1, t1, t2 + + +.function orc_dequantise_s16_ip +.dest 2 d1 int16_t +.param 2 p1 +.param 2 p2 +.temp 2 t1 +.temp 2 t2 + +copyw t1, d1 +signw t2, t1 +absw t1, t1 +mullw t1, t1, p1 +addw t1, t1, p2 +shrsw t1, t1, 2 +mullw d1, t1, t2 + + +.function orc_dequantise_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.param 2 p1 +.param 2 p2 +.temp 2 t1 +.temp 2 t2 + +copyw t1, s1 +signw t2, t1 +absw t1, t1 +mullw t1, t1, p1 +addw t1, t1, p2 +shrsw t1, t1, 2 +mullw d1, t1, t2 + + +.function orc_dequantise_var_s16_ip +.dest 2 d1 int16_t +.source 2 s1 int16_t +.source 2 s2 int16_t +.temp 2 t1 +.temp 2 t2 + +copyw t1, d1 +signw t2, t1 +absw t1, t1 +mullw t1, t1, s1 +addw t1, t1, s2 +shrsw t1, t1, 2 +mullw d1, t1, t2 + + +# only works for values between -16384 and 16384 +.function orc_quantise1_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.temp 2 t1 +.temp 2 t2 + +copyw t1, s1 +signw t2, t1 +absw t1, t1 +shlw t1, t1, 2 +subw t1, t1, p2 +mulhuw t1, t1, p1 +shruw t1, t1, p3 +mullw d1, t1, t2 + + +# only works for values between -16384 and 16384 +.function orc_quantise2_s16 +.dest 2 d1 int16_t +.source 2 s1 int16_t +.param 2 p1 +.param 2 p2 +.temp 2 t1 +.temp 2 t2 + +copyw t1, s1 +signw t2, t1 +absw t1, t1 +shlw t1, t1, 2 +subw t1, t1, p2 +shruw t1, t1, p1 +mullw d1, t1, t2 + + +# only works for values between -16384 and 16384 +.function orc_quantdequant1_s16 +.dest 2 d1 int16_t +.dest 2 d2 int16_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.param 2 p5 +.temp 2 t1 +.temp 2 t2 + +copyw t1, d2 +signw t2, t1 +absw t1, t1 +shlw t1, t1, 2 +subw t1, t1, p2 +mulhuw t1, t1, p1 +shruw t1, t1, p3 +mullw t2, t1, t2 +copyw d1, t2 +signw t2, t2 +mullw t1, t1, p4 +addw t1, t1, p5 +shrsw t1, t1, 2 +mullw d2, t1, t2 + + +# only works for values between -16384 and 16384 +.function orc_quantdequant3_s16 +.dest 2 d1 int16_t +.dest 2 d2 int16_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.param 2 p5 +.param 4 p6 +.temp 2 t1 +.temp 2 t2 +.temp 4 t3 + +copyw t1, d2 +signw t2, t1 +absw t1, t1 +shlw t1, t1, 2 +subw t1, t1, p2 +muluwl t3, t1, p1 +addl t3, t3, p6 +shrul t3, t3, p3 +convlw t1, t3 +mullw t2, t1, t2 +copyw d1, t2 +signw t2, t2 +mullw t1, t1, p4 +addw t1, t1, p5 +shrsw t1, t1, 2 +mullw d2, t1, t2 + + +# only works for values between -16384 and 16384 +.function orc_quantdequant2_s16 +.dest 2 d1 int16_t +.dest 2 d2 int16_t +.param 2 p1 +.param 2 p2 +.param 2 p4 +.param 2 p5 +.temp 2 t1 +.temp 2 t2 + +copyw t1, d2 +signw t2, t1 +absw t1, t1 +shlw t1, t1, 2 +subw t1, t1, p2 +shruw t1, t1, p1 +mullw t2, t1, t2 +copyw d1, t2 +signw t2, t2 +mullw t1, t1, p4 +addw t1, t1, p5 +shrsw t1, t1, 2 +mullw d2, t1, t2 + + + +.function orc_downsample_vert_u8 +.dest 1 d1 +.source 1 s1 +.source 1 s2 +.source 1 s3 +.source 1 s4 +.temp 2 t1 +.temp 2 t2 +.temp 2 t3 + +convubw t1, s1 +convubw t2, s4 +addw t1, t1, t2 +mullw t1, t1, 6 +convubw t2, s2 +convubw t3, s3 +addw t2, t2, t3 +mullw t2, t2, 26 +addw t2, t2, t1 +addw t2, t2, 32 +shruw t2, t2, 6 +convwb d1, t2 + + +.function orc_downsample_horiz_u8 +.dest 1 d1 +.source 2 s1 uint8_t +.temp 2 a +.temp 2 b +.temp 2 t1 +.temp 1 t2 +.temp 1 t3 +.temp 2 c + +loadw t1, s1 +select1wb t2, t1 +convubw a, t2 +loadoffw t1, s1, 2 +select0wb t2, t1 +convubw b, t2 +addw c, a, b +mullw c, c, 6 + +loadoffw t1, s1, 1 +splitwb t2, t3, t1 +convubw a, t2 +convubw b, t3 +addw a, a, b +mullw a, a, 26 +addw c, c, a +addw c, c, 32 +shruw c, c, 6 +convwb d1, c + + +.function orc_stats_moment_s16 +.source 2 s1 int16_t +.accumulator 4 a1 int32_t +.temp 2 t1 +.temp 4 t2 + +absw t1, s1 +subw t1, t1, 2 +maxsw t1, t1, 0 +convuwl t2, t1 +accl a1, t2 + + +.function orc_stats_above_s16 +.source 2 s1 int16_t +.accumulator 4 a1 int32_t +.temp 2 t1 +.temp 4 t2 + +absw t1, s1 +subw t1, t1, 1 +maxsw t1, t1, 0 +minsw t1, t1, 1 +convuwl t2, t1 +accl a1, t2 + + +.function orc_accw +.accumulator 2 a1 int +.source 2 s1 int16_t +.temp 2 t1 + +absw t1, s1 +accw a1, t1 + + +.function orc_avg2_8xn_u8 +.flags 2d +.n 8 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t + +avgub d1, s1, s2 + + +.function orc_avg2_12xn_u8 +.flags 2d +.n 12 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t + +avgub d1, s1, s2 + + +.function orc_avg2_16xn_u8 +.flags 2d +.n 16 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t + +avgub d1, s1, s2 + + +.function orc_avg2_32xn_u8 +.flags 2d +.n 32 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t + +avgub d1, s1, s2 + + +.function orc_avg2_nxm_u8 +.flags 2d +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t + +avgub d1, s1, s2 + + +.function orc_combine4_8xn_u8 +.flags 2d +.n 8 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t +.source 1 s3 uint8_t +.source 1 s4 uint8_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +mullw t2, t1, p1 +convubw t1, s2 +mullw t1, t1, p2 +addw t2, t2, t1 +convubw t1, s3 +mullw t1, t1, p3 +addw t2, t2, t1 +convubw t1, s4 +mullw t1, t1, p4 +addw t2, t2, t1 +addw t2, t2, 8 +convsuswb d1, t2 + + +.function orc_combine4_12xn_u8 +.flags 2d +.n 12 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t +.source 1 s3 uint8_t +.source 1 s4 uint8_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +mullw t2, t1, p1 +convubw t1, s2 +mullw t1, t1, p2 +addw t2, t2, t1 +convubw t1, s3 +mullw t1, t1, p3 +addw t2, t2, t1 +convubw t1, s4 +mullw t1, t1, p4 +addw t2, t2, t1 +addw t2, t2, 8 +convsuswb d1, t2 + + +.function orc_combine4_16xn_u8 +.flags 2d +.n 16 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t +.source 1 s3 uint8_t +.source 1 s4 uint8_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +mullw t2, t1, p1 +convubw t1, s2 +mullw t1, t1, p2 +addw t2, t2, t1 +convubw t1, s3 +mullw t1, t1, p3 +addw t2, t2, t1 +convubw t1, s4 +mullw t1, t1, p4 +addw t2, t2, t1 +addw t2, t2, 8 +convsuswb d1, t2 + + +.function orc_combine4_24xn_u8 +.flags 2d +.n 24 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t +.source 1 s3 uint8_t +.source 1 s4 uint8_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +mullw t2, t1, p1 +convubw t1, s2 +mullw t1, t1, p2 +addw t2, t2, t1 +convubw t1, s3 +mullw t1, t1, p3 +addw t2, t2, t1 +convubw t1, s4 +mullw t1, t1, p4 +addw t2, t2, t1 +addw t2, t2, 8 +convsuswb d1, t2 + + +.function orc_combine4_32xn_u8 +.flags 2d +.n 32 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t +.source 1 s3 uint8_t +.source 1 s4 uint8_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +mullw t2, t1, p1 +convubw t1, s2 +mullw t1, t1, p2 +addw t2, t2, t1 +convubw t1, s3 +mullw t1, t1, p3 +addw t2, t2, t1 +convubw t1, s4 +mullw t1, t1, p4 +addw t2, t2, t1 +addw t2, t2, 8 +convsuswb d1, t2 + + +.function orc_combine4_nxm_u8 +.flags 2d +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t +.source 1 s3 uint8_t +.source 1 s4 uint8_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +mullw t2, t1, p1 +convubw t1, s2 +mullw t1, t1, p2 +addw t2, t2, t1 +convubw t1, s3 +mullw t1, t1, p3 +addw t2, t2, t1 +convubw t1, s4 +mullw t1, t1, p4 +addw t2, t2, t1 +addw t2, t2, 8 +shrsw t2, t2, 4 +convsuswb d1, t2 + + +.function orc_combine2_8xn_u8 +.flags 2d +.n 8 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +convubw t2, s2 +mullw t1, t1, p1 +mullw t2, t2, p2 +addw t1, t1, t2 +addw t1, t1, p3 +shrsw t1, t1, p4 +convsuswb d1, t1 + + + +.function orc_combine2_12xn_u8 +.flags 2d +.n 12 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +convubw t2, s2 +mullw t1, t1, p1 +mullw t2, t2, p2 +addw t1, t1, t2 +addw t1, t1, p3 +shrsw t1, t1, p4 +convsuswb d1, t1 + + + +.function orc_combine2_16xn_u8 +.flags 2d +.n 16 +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +convubw t2, s2 +mullw t1, t1, p1 +mullw t2, t2, p2 +addw t1, t1, t2 +addw t1, t1, p3 +shrsw t1, t1, p4 +convsuswb d1, t1 + + + +.function orc_combine2_nxm_u8 +.flags 2d +.dest 1 d1 uint8_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +convubw t2, s2 +mullw t1, t1, p1 +mullw t2, t2, p2 +addw t1, t1, t2 +addw t1, t1, p3 +shrsw t1, t1, p4 +convsuswb d1, t1 + + + +.function orc_sad_nxm_u8 +.flags 2d +.accumulator 4 a1 uint32_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t + +accsadubl a1, s1, s2 + + +.function orc_sad_8x8_u8 +.flags 2d +.n 8 +.m 8 +.accumulator 4 a1 uint32_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t + +accsadubl a1, s1, s2 + + + +.function orc_sad_12x12_u8 +.flags 2d +.n 12 +.m 12 +.accumulator 4 a1 uint32_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t + +accsadubl a1, s1, s2 + + + +.function orc_sad_16xn_u8 +.flags 2d +.n 16 +.accumulator 4 a1 uint32_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t + +accsadubl a1, s1, s2 + + + +.function orc_sad_32xn_u8 +.flags 2d +.n 32 +.accumulator 4 a1 uint32_t +.source 1 s1 uint8_t +.source 1 s2 uint8_t + +accsadubl a1, s1, s2 + + + diff --git a/testsuite/benchmorc/benchmorc.c b/testsuite/benchmorc/benchmorc.c new file mode 100644 index 0000000..fc72d91 --- /dev/null +++ b/testsuite/benchmorc/benchmorc.c @@ -0,0 +1,693 @@ + +#include <orc/orc.h> +#include <orc-test/orctest.h> +#include <orc/orcparse.h> + +#include <stdio.h> +#include <stdlib.h> + +static char * read_file (const char *filename); +void output_code (OrcProgram *p, FILE *output); +void output_code_header (OrcProgram *p, FILE *output); +void output_code_test (OrcProgram *p, FILE *output); + +int error = FALSE; + +double weights_ginger[]; +//double weights_preston[]; +double weights_n900[]; + +int +main (int argc, char *argv[]) +{ + char *code; + int n; + int i; + OrcProgram **programs; + const char *filename = NULL; + double sum; + + orc_init (); + orc_test_init (); + + filename = "bench10.orc"; + code = read_file (filename); + if (!code) { + printf("benchmorc needs bench10.orc file in current directory\n"); + exit(1); + } + + n = orc_parse (code, &programs); + +#if 0 + sum = 0; + for(i=0;i<n;i++){ + double perf; + double weight; + + perf = orc_test_performance_full (programs[i], 0, NULL); + + if (perf == 0) { + weight = 0; + } else { + weight = 1.0/perf/241.0; + sum++; + } + printf(" %g, /* %s */\n", weight, programs[i]->name); + } + printf("sum = %g\n", sum); +#else + sum = 0; + for(i=0;i<n;i++){ + double perf; + double weight; + + perf = orc_test_performance_full (programs[i], 0, NULL); + //weight = weights_ginger[i]; + weight = weights_n900[i]; + + sum += weight * perf; + } + printf("score %g\n", 100.0/sum); +#endif + + if (error) return 1; + return 0; +} + + +static char * +read_file (const char *filename) +{ + FILE *file = NULL; + char *contents = NULL; + long size; + int ret; + + file = fopen (filename, "r"); + if (file == NULL) return NULL; + + ret = fseek (file, 0, SEEK_END); + if (ret < 0) goto bail; + + size = ftell (file); + if (size < 0) goto bail; + + ret = fseek (file, 0, SEEK_SET); + if (ret < 0) goto bail; + + contents = malloc (size + 1); + if (contents == NULL) goto bail; + + ret = fread (contents, size, 1, file); + if (ret < 0) goto bail; + + contents[size] = 0; + + return contents; +bail: + /* something failed */ + if (file) fclose (file); + if (contents) free (contents); + + return NULL; +} + + + +/* tables */ + +/* ginger Intel(R) Core(TM)2 CPU T7600 @ 2.33GHz */ + +double weights_ginger[] = { + 0.00539898, /* orc_scalarmultiply_f32_ns */ + 0.00173034, /* orc_process_int16 */ + 0.00229296, /* orc_process_int16_clamp */ + 0.00238334, /* orc_process_int8 */ + 0.00286, /* orc_process_int8_clamp */ + 0.00224671, /* orc_audio_convert_unpack_u8 */ + 0.00223485, /* orc_audio_convert_unpack_s8 */ + 0.00261931, /* orc_audio_convert_unpack_u16 */ + 0.0026756, /* orc_audio_convert_unpack_s16 */ + 0.00187791, /* orc_audio_convert_unpack_u16_swap */ + 0.00188964, /* orc_audio_convert_unpack_s16_swap */ + 0.0018846, /* orc_audio_convert_unpack_u32 */ + 0.00208672, /* orc_audio_convert_unpack_s32 */ + 0.00158413, /* orc_audio_convert_unpack_u32_swap */ + 0.0016592, /* orc_audio_convert_unpack_s32_swap */ + 0.00113724, /* orc_audio_convert_unpack_float_s32 */ + 0.000966394, /* orc_audio_convert_unpack_float_s32_swap */ + 0.00163051, /* orc_audio_convert_unpack_float_double */ + 0.00129049, /* orc_audio_convert_unpack_float_double_swap */ + 0.00328124, /* orc_audio_convert_unpack_double_double */ + 0.0019506, /* orc_audio_convert_unpack_double_double_swap */ + 0.000854422, /* orc_audio_convert_unpack_u8_double */ + 0.000841177, /* orc_audio_convert_unpack_s8_double */ + 0.0013211, /* orc_audio_convert_unpack_u16_double */ + 0.0012878, /* orc_audio_convert_unpack_s16_double */ + 0.000888871, /* orc_audio_convert_unpack_u16_double_swap */ + 0.00113332, /* orc_audio_convert_unpack_s16_double_swap */ + 0.00125976, /* orc_audio_convert_unpack_u32_double */ + 0.00204625, /* orc_audio_convert_unpack_s32_double */ + 0.0010244, /* orc_audio_convert_unpack_u32_double_swap */ + 0.00148207, /* orc_audio_convert_unpack_s32_double_swap */ + 0.00135233, /* orc_audio_convert_pack_u8 */ + 0.0013869, /* orc_audio_convert_pack_s8 */ + 0.0021164, /* orc_audio_convert_pack_u16 */ + 0.00211852, /* orc_audio_convert_pack_s16 */ + 0.00183715, /* orc_audio_convert_pack_u16_swap */ + 0.00200417, /* orc_audio_convert_pack_s16_swap */ + 0.00193889, /* orc_audio_convert_pack_u32 */ + 0.00208307, /* orc_audio_convert_pack_s32 */ + 0.00159261, /* orc_audio_convert_pack_u32_swap */ + 0.00167437, /* orc_audio_convert_pack_s32_swap */ + 0.00143194, /* orc_audio_convert_pack_s32_float */ + 0.00118178, /* orc_audio_convert_pack_s32_float_swap */ + 0.00268428, /* orc_audio_convert_pack_double_float */ + 0.0014616, /* orc_audio_convert_pack_double_float_swap */ + 0.000483737, /* orc_audio_convert_pack_double_s8 */ + 0.000686549, /* orc_audio_convert_pack_double_s16 */ + 0.000577306, /* orc_audio_convert_pack_double_s16_swap */ + 0.00100781, /* orc_audio_convert_pack_double_s32 */ + 0.000784434, /* orc_audio_convert_pack_double_s32_swap */ + 0.0172065, /* gst_orc_splat_u8 */ + 0.0121632, /* gst_orc_splat_s16 */ + 0.01221, /* gst_orc_splat_u16 */ + 0.00740001, /* gst_orc_splat_u32 */ + 0.00205476, /* orc_merge_linear_u8 */ + 0.000841177, /* orc_merge_linear_u16 */ + 0.0120937, /* orc_splat_u16 */ + 0.00740001, /* orc_splat_u32 */ + 0.00367431, /* orc_downsample_u8 */ + 0.00148717, /* orc_downsample_u16 */ + 0.00182448, /* gst_videoscale_orc_downsample_u32 */ + 0.000360484, /* gst_videoscale_orc_downsample_yuyv */ + 0, /* gst_videoscale_orc_resample_nearest_u8 */ + 0, /* gst_videoscale_orc_resample_bilinear_u8 */ + 0.0011403, /* gst_videoscale_orc_resample_nearest_u32 */ + 0.000404898, /* gst_videoscale_orc_resample_bilinear_u32 */ + 0.00027372, /* gst_videoscale_orc_resample_merge_bilinear_u32 */ + 0.000888871, /* gst_videoscale_orc_merge_bicubic_u8 */ + 0.00122619, /* add_int32 */ + 0.0044839, /* add_int16 */ + 0.00820311, /* add_int8 */ + 0.00138106, /* add_uint32 */ + 0.0044839, /* add_uint16 */ + 0.00807787, /* add_uint8 */ + 0.00177998, /* add_float32 */ + 0.00737422, /* orc_splat_u32 */ + 0.00221845, /* orc_memcpy_u32 */ + 0.00294308, /* orc_blend_u8 */ + 0.000216663, /* orc_blend_argb */ + 0.000213342, /* orc_blend_bgra */ + 0.00737422, /* orc_splat_u32 */ + 0.000987128, /* deinterlace_line_vfir */ + 0.00386205, /* deinterlace_line_linear */ + 0.00178901, /* deinterlace_line_linear_blend */ + 0, /* deinterlace_line_greedy */ + 0.00824266, /* cogorc_memcpy_2d */ + 0.00483197, /* cogorc_downsample_horiz_cosite_1tap */ + 0.00139512, /* cogorc_downsample_horiz_cosite_3tap */ + 0.00157236, /* cogorc_downsample_420_jpeg */ + 0.00387619, /* cogorc_downsample_vert_halfsite_2tap */ + 0.00175199, /* cogorc_downsample_vert_cosite_3tap */ + 0.00131699, /* cogorc_downsample_vert_halfsite_4tap */ + 0.00594495, /* cogorc_upsample_horiz_cosite_1tap */ + 0.003575, /* cogorc_upsample_horiz_cosite */ + 0.00371299, /* cogorc_upsample_vert_avgub */ + 0.00495645, /* orc_unpack_yuyv_y */ + 0.00172345, /* orc_unpack_yuyv_u */ + 0.0019524, /* orc_unpack_yuyv_v */ + 0.000834873, /* orc_pack_yuyv */ + 0.0059283, /* orc_unpack_uyvy_y */ + 0.0015303, /* orc_unpack_uyvy_u */ + 0.00172345, /* orc_unpack_uyvy_v */ + 0.000837184, /* orc_pack_uyvy */ + 0.00245522, /* orc_matrix2_u8 */ + 0, /* orc_matrix2_11_u8 */ + 0, /* orc_matrix2_12_u8 */ + 0.00145158, /* orc_matrix3_u8 */ + 0, /* orc_matrix3_100_u8 */ + 0.00138146, /* orc_matrix3_100_offset_u8 */ + 0.00134289, /* orc_matrix3_000_u8 */ + 0.00164957, /* orc_pack_123x */ + 0.00160943, /* orc_pack_x123 */ + 0.00254681, /* cogorc_combine2_u8 */ + 0.000633112, /* cogorc_convert_I420_UYVY */ + 0.000635284, /* cogorc_convert_I420_YUY2 */ + 0.000541003, /* cogorc_convert_I420_AYUV */ + 0.00055886, /* cogorc_convert_YUY2_I420 */ + 0.00187425, /* cogorc_convert_UYVY_YUY2 */ + 0.00277488, /* cogorc_planar_chroma_420_422 */ + 0.00147587, /* cogorc_planar_chroma_420_444 */ + 0.00624044, /* cogorc_planar_chroma_422_444 */ + 0.0038428, /* cogorc_planar_chroma_444_422 */ + 0.00203705, /* cogorc_planar_chroma_444_420 */ + 0.00434352, /* cogorc_planar_chroma_422_420 */ + 0.0005254, /* cogorc_convert_YUY2_AYUV */ + 0.000538103, /* cogorc_convert_UYVY_AYUV */ + 0.00100143, /* cogorc_convert_YUY2_Y42B */ + 0.00103223, /* cogorc_convert_UYVY_Y42B */ + 0.000770722, /* cogorc_convert_YUY2_Y444 */ + 0.000778117, /* cogorc_convert_UYVY_Y444 */ + 0.00055886, /* cogorc_convert_UYVY_I420 */ + 0, /* cogorc_convert_AYUV_I420 */ + 0.000341915, /* cogorc_convert_AYUV_YUY2 */ + 0.000348717, /* cogorc_convert_AYUV_UYVY */ + 0.000312522, /* cogorc_convert_AYUV_Y42B */ + 0.000873979, /* cogorc_convert_AYUV_Y444 */ + 0.00161992, /* cogorc_convert_Y42B_YUY2 */ + 0.00165063, /* cogorc_convert_Y42B_UYVY */ + 0.000588503, /* cogorc_convert_Y42B_AYUV */ + 0.000805998, /* cogorc_convert_Y444_YUY2 */ + 0.000815565, /* cogorc_convert_Y444_UYVY */ + 0.0014937, /* cogorc_convert_Y444_AYUV */ + 0, /* cogorc_convert_AYUV_ARGB */ + 0, /* cogorc_convert_AYUV_BGRA */ + 0, /* cogorc_convert_AYUV_ABGR */ + 0, /* cogorc_convert_AYUV_RGBA */ + 0, /* cogorc_convert_I420_BGRA */ + 0, /* cogorc_convert_I420_BGRA_avg */ + 0.000819403, /* cogorc_getline_I420 */ + 0.00057497, /* cogorc_getline_YUY2 */ + 0.000551146, /* cogorc_getline_UYVY */ + 0.000562873, /* cogorc_getline_YVYU */ + 0.000552297, /* cogorc_getline_Y42B */ + 0.00156539, /* cogorc_getline_Y444 */ + 0.00260962, /* cogorc_getline_Y800 */ + 0.00185722, /* cogorc_getline_BGRA */ + 0.000454554, /* cogorc_getline_ABGR */ + 0.000452223, /* cogorc_getline_RGBA */ + 0.000903587, /* cogorc_getline_NV12 */ + 0.000800455, /* cogorc_getline_NV21 */ + 0.000286517, /* cogorc_putline_I420 */ + 0.000349472, /* cogorc_putline_YUY2 */ + 0.000391564, /* cogorc_putline_YVYU */ + 0.00035675, /* cogorc_putline_UYVY */ + 0.000320473, /* cogorc_putline_Y42B */ + 0.000845209, /* cogorc_putline_Y444 */ + 0.00172426, /* cogorc_putline_Y800 */ + 0.00185867, /* cogorc_putline_BGRA */ + 0.000454814, /* cogorc_putline_ABGR */ + 0.000451794, /* cogorc_putline_RGBA */ + 0.000369853, /* cogorc_putline_NV12 */ + 0.000414899, /* cogorc_putline_NV21 */ + 0.00161681, /* orc_add2_rshift_add_s16_22_op */ + 0.00209961, /* orc_add2_rshift_add_s16_22 */ + 0.00161557, /* orc_add2_rshift_sub_s16_22_op */ + 0.00209961, /* orc_add2_rshift_sub_s16_22 */ + 0.00156654, /* orc_add2_rshift_add_s16_11_op */ + 0.00201776, /* orc_add2_rshift_add_s16_11 */ + 0.00156654, /* orc_add2_rshift_sub_s16_11_op */ + 0.00205078, /* orc_add2_rshift_sub_s16_11 */ + 0.00428421, /* orc_add_const_rshift_s16_11 */ + 0.00863838, /* orc_add_const_rshift_s16 */ + 0.00281436, /* orc_add_s16 */ + 0.00372344, /* orc_add_s16_2d */ + 0.0022491, /* orc_addc_rshift_s16 */ + 0.00466168, /* orc_lshift1_s16 */ + 0.00469616, /* orc_lshift2_s16 */ + 0.010175, /* orc_lshift_s16_ip */ + 0.000855112, /* orc_mas2_add_s16_op */ + 0.000850644, /* orc_mas2_add_s16_ip */ + 0.000853044, /* orc_mas2_sub_s16_op */ + 0.000851328, /* orc_mas2_sub_s16_ip */ + 0.000639672, /* orc_mas4_across_add_s16_1991_op */ + 0.000673799, /* orc_mas4_across_add_s16_1991_ip */ + 0.000640558, /* orc_mas4_across_sub_s16_1991_op */ + 0.000673584, /* orc_mas4_across_sub_s16_1991_ip */ + 0.00275573, /* orc_subtract_s16 */ + 0.00267222, /* orc_add_s16_u8 */ + 0.00544385, /* orc_add_s16_u8_2d */ + 0.00653211, /* orc_convert_s16_u8 */ + 0.00678334, /* orc_convert_u8_s16 */ + 0.00689382, /* orc_offsetconvert_u8_s16 */ + 0.00584641, /* orc_offsetconvert_s16_u8 */ + 0.00273437, /* orc_subtract_s16_u8 */ + 0.00238334, /* orc_multiply_and_add_s16_u8 */ + 0.012025, /* orc_splat_s16_ns */ + 0.000952381, /* orc_splat_s16_2d_4xn */ + 0.00172953, /* orc_splat_s16_2d_8xn */ + 0.0121284, /* orc_splat_s16_2d */ + 0.0176367, /* orc_splat_u8_ns */ + 0.0286862, /* orc_splat_u8_2d */ + 0.00372606, /* orc_average_u8 */ + 0.00242965, /* orc_rrshift6_add_s16_2d */ + 0.00107453, /* orc_rrshift6_sub_s16_2d */ + 0.0139063, /* orc_rrshift6_s16_ip_2d */ + 0.011565, /* orc_rrshift6_s16_ip */ + 0.00493334, /* orc_unpack_yuyv_y */ + 0.00171647, /* orc_unpack_yuyv_u */ + 0.00194363, /* orc_unpack_yuyv_v */ + 0.000834543, /* orc_packyuyv */ + 0.00591174, /* orc_unpack_uyvy_y */ + 0.00153214, /* orc_unpack_uyvy_u */ + 0.00172627, /* orc_unpack_uyvy_v */ + 0.0029935, /* orc_interleave2_s16 */ + 0.00201094, /* orc_interleave2_rrshift1_s16 */ + 0.00140531, /* orc_deinterleave2_s16 */ + 0.0011584, /* orc_deinterleave2_lshift1_s16 */ + 0.00131151, /* orc_haar_deint_lshift1_split_s16 */ + 0.00136132, /* orc_haar_deint_split_s16 */ + 0.00202333, /* orc_haar_split_s16_lo */ + 0.00278474, /* orc_haar_split_s16_hi */ + 0.00127417, /* orc_haar_split_s16_op */ + 0.00123128, /* orc_haar_split_s16 */ + 0.0020668, /* orc_haar_synth_s16_lo */ + 0.00175199, /* orc_haar_synth_s16_hi */ + 0.00124788, /* orc_haar_synth_s16_op */ + 0.0010582, /* orc_haar_synth_s16 */ + 0.00130965, /* orc_haar_synth_rrshift1_int_s16 */ + 0.00173192, /* orc_haar_synth_int_s16 */ + 0.00444622, /* orc_haar_sub_s16 */ + 0.00300625, /* orc_haar_add_half_s16 */ + 0.00446498, /* orc_haar_add_s16 */ + 0.00297619, /* orc_haar_sub_half_s16 */ + 0.00296415, /* orc_sum_u8 */ + 0.00353913, /* orc_sum_s16 */ + 0.00195601, /* orc_sum_square_diff_u8 */ + 0.000907029, /* orc_dequantise_s16_2d_4xn */ + 0.0010582, /* orc_dequantise_s16_2d_8xn */ + 0.0010582, /* orc_dequantise_s16_ip_2d_8xn */ + 0.00607507, /* orc_dequantise_s16_ip_2d */ + 0.00544062, /* orc_dequantise_s16_ip */ + 0.002788, /* orc_dequantise_s16 */ + 0.00177551, /* orc_dequantise_var_s16_ip */ + 0.00250462, /* orc_quantise1_s16 */ + 0.00286, /* orc_quantise2_s16 */ + 0.000653211, /* orc_quantdequant1_s16 */ + 0.00033615, /* orc_quantdequant3_s16 */ + 0.000700332, /* orc_quantdequant2_s16 */ + 0.00117709, /* orc_downsample_vert_u8 */ + 0.000427844, /* orc_downsample_horiz_u8 */ + 0.00341907, /* orc_stats_moment_s16 */ + 0.00300625, /* orc_stats_above_s16 */ + 0.012025, /* orc_accw */ + 0.000979968, /* orc_avg2_8xn_u8 */ + 0.000970018, /* orc_avg2_12xn_u8 */ + 0.0012075, /* orc_avg2_16xn_u8 */ + 0.00257509, /* orc_avg2_32xn_u8 */ + 0.0042909, /* orc_avg2_nxm_u8 */ + 0.00106312, /* orc_combine4_8xn_u8 */ + 0.00116315, /* orc_combine4_12xn_u8 */ + 0.00147629, /* orc_combine4_16xn_u8 */ + 0.0013289, /* orc_combine4_24xn_u8 */ + 0.00158025, /* orc_combine4_32xn_u8 */ + 0.00119665, /* orc_combine4_nxm_u8 */ + 0.00118816, /* orc_combine2_8xn_u8 */ + 0.00167189, /* orc_combine2_12xn_u8 */ + 0.00178851, /* orc_combine2_16xn_u8 */ + 0.00229296, /* orc_combine2_nxm_u8 */ + 0.00304518, /* orc_sad_nxm_u8 */ + 0.0010836, /* orc_sad_8x8_u8 */ + 0.00171215, /* orc_sad_12x12_u8 */ + 0.00228437, /* orc_sad_16xn_u8 */ + 0.00310406, /* orc_sad_32xn_u8 */ +}; + +double weights_n900[] = { + 0.00189692, /* orc_scalarmultiply_f32_ns */ + 0.000655569, /* orc_process_int16 */ + 0.000607111, /* orc_process_int16_clamp */ + 0.00130039, /* orc_process_int8 */ + 0.00121357, /* orc_process_int8_clamp */ + 0, /* orc_audio_convert_unpack_u8 */ + 0.000886761, /* orc_audio_convert_unpack_s8 */ + 0, /* orc_audio_convert_unpack_u16 */ + 0.000885403, /* orc_audio_convert_unpack_s16 */ + 0, /* orc_audio_convert_unpack_u16_swap */ + 0.000884163, /* orc_audio_convert_unpack_s16_swap */ + 0, /* orc_audio_convert_unpack_u32 */ + 0.000886388, /* orc_audio_convert_unpack_s32 */ + 0, /* orc_audio_convert_unpack_u32_swap */ + 0.000885463, /* orc_audio_convert_unpack_s32_swap */ + 0.000887546, /* orc_audio_convert_unpack_float_s32 */ + 0.00088498, /* orc_audio_convert_unpack_float_s32_swap */ + 0, /* orc_audio_convert_unpack_float_double */ + 0, /* orc_audio_convert_unpack_float_double_swap */ + 0.000453781, /* orc_audio_convert_unpack_double_double */ + 0.000455403, /* orc_audio_convert_unpack_double_double_swap */ + 0, /* orc_audio_convert_unpack_u8_double */ + 0, /* orc_audio_convert_unpack_s8_double */ + 0, /* orc_audio_convert_unpack_u16_double */ + 0, /* orc_audio_convert_unpack_s16_double */ + 0, /* orc_audio_convert_unpack_u16_double_swap */ + 0, /* orc_audio_convert_unpack_s16_double_swap */ + 0, /* orc_audio_convert_unpack_u32_double */ + 0, /* orc_audio_convert_unpack_s32_double */ + 0, /* orc_audio_convert_unpack_u32_double_swap */ + 0, /* orc_audio_convert_unpack_s32_double_swap */ + 0, /* orc_audio_convert_pack_u8 */ + 0.000923392, /* orc_audio_convert_pack_s8 */ + 0, /* orc_audio_convert_pack_u16 */ + 0.0010451, /* orc_audio_convert_pack_s16 */ + 0, /* orc_audio_convert_pack_u16_swap */ + 0.000928539, /* orc_audio_convert_pack_s16_swap */ + 0, /* orc_audio_convert_pack_u32 */ + 0.000889152, /* orc_audio_convert_pack_s32 */ + 0, /* orc_audio_convert_pack_u32_swap */ + 0.000878315, /* orc_audio_convert_pack_s32_swap */ + 0, /* orc_audio_convert_pack_s32_float */ + 0, /* orc_audio_convert_pack_s32_float_swap */ + 0, /* orc_audio_convert_pack_double_float */ + 0, /* orc_audio_convert_pack_double_float_swap */ + 0, /* orc_audio_convert_pack_double_s8 */ + 0, /* orc_audio_convert_pack_double_s16 */ + 0, /* orc_audio_convert_pack_double_s16_swap */ + 0, /* orc_audio_convert_pack_double_s32 */ + 0, /* orc_audio_convert_pack_double_s32_swap */ + 0.00364483, /* gst_orc_splat_u8 */ + 0.00176666, /* gst_orc_splat_s16 */ + 0.00176698, /* gst_orc_splat_u16 */ + 0.000887025, /* gst_orc_splat_u32 */ + 0.00120903, /* orc_merge_linear_u8 */ + 0.000840928, /* orc_merge_linear_u16 */ + 0.00176687, /* orc_splat_u16 */ + 0.000886406, /* orc_splat_u32 */ + 0.00206071, /* orc_downsample_u8 */ + 0.00111783, /* orc_downsample_u16 */ + 0.00057776, /* gst_videoscale_orc_downsample_u32 */ + 0.000282225, /* gst_videoscale_orc_downsample_yuyv */ + 0, /* gst_videoscale_orc_resample_nearest_u8 */ + 0, /* gst_videoscale_orc_resample_bilinear_u8 */ + 0, /* gst_videoscale_orc_resample_nearest_u32 */ + 0, /* gst_videoscale_orc_resample_bilinear_u32 */ + 0, /* gst_videoscale_orc_resample_merge_bilinear_u32 */ + 0.000810787, /* gst_videoscale_orc_merge_bicubic_u8 */ + 0.00190264, /* add_int32 */ + 0.00361624, /* add_int16 */ + 0.00507259, /* add_int8 */ + 0.00191598, /* add_uint32 */ + 0.00361182, /* add_uint16 */ + 0.00507259, /* add_uint8 */ + 0.00153987, /* add_float32 */ + 0.00124016, /* orc_splat_u32 */ + 0.00113814, /* orc_memcpy_u32 */ + 0.000908623, /* orc_blend_u8 */ + 0.000133396, /* orc_blend_argb */ + 0, /* orc_blend_bgra */ + 0.000901581, /* orc_splat_u32 */ + 0.000844679, /* deinterlace_line_vfir */ + 0.00413285, /* deinterlace_line_linear */ + 0.00123704, /* deinterlace_line_linear_blend */ + 0.00109371, /* deinterlace_line_greedy */ + 0.00381793, /* cogorc_memcpy_2d */ + 0.00377314, /* cogorc_downsample_horiz_cosite_1tap */ + 0.0010716, /* cogorc_downsample_horiz_cosite_3tap */ + 0.00183775, /* cogorc_downsample_420_jpeg */ + 0.00354723, /* cogorc_downsample_vert_halfsite_2tap */ + 0.00110979, /* cogorc_downsample_vert_cosite_3tap */ + 0.000901003, /* cogorc_downsample_vert_halfsite_4tap */ + 0.00170136, /* cogorc_upsample_horiz_cosite_1tap */ + 0.00165625, /* cogorc_upsample_horiz_cosite */ + 0.00447476, /* cogorc_upsample_vert_avgub */ + 0.00463618, /* orc_unpack_yuyv_y */ + 0.00171047, /* orc_unpack_yuyv_u */ + 0.00154902, /* orc_unpack_yuyv_v */ + 0.000832261, /* orc_pack_yuyv */ + 0.00380119, /* orc_unpack_uyvy_y */ + 0.00190989, /* orc_unpack_uyvy_u */ + 0.00170967, /* orc_unpack_uyvy_v */ + 0.000832185, /* orc_pack_uyvy */ + 0.00104047, /* orc_matrix2_u8 */ + 0.000862238, /* orc_matrix2_11_u8 */ + 0.00082022, /* orc_matrix2_12_u8 */ + 0.00078049, /* orc_matrix3_u8 */ + 0.000649602, /* orc_matrix3_100_u8 */ + 0.000727013, /* orc_matrix3_100_offset_u8 */ + 0.000760795, /* orc_matrix3_000_u8 */ + 0.000790078, /* orc_pack_123x */ + 0.000792433, /* orc_pack_x123 */ + 0.00117391, /* cogorc_combine2_u8 */ + 0.000196525, /* cogorc_convert_I420_UYVY */ + 0.00019252, /* cogorc_convert_I420_YUY2 */ + 0, /* cogorc_convert_I420_AYUV */ + 0.000114783, /* cogorc_convert_YUY2_I420 */ + 0.000977855, /* cogorc_convert_UYVY_YUY2 */ + 0.000740782, /* cogorc_planar_chroma_420_422 */ + 0.000600582, /* cogorc_planar_chroma_420_444 */ + 0.0018566, /* cogorc_planar_chroma_422_444 */ + 0.00219326, /* cogorc_planar_chroma_444_422 */ + 0.00130591, /* cogorc_planar_chroma_444_420 */ + 0.00211275, /* cogorc_planar_chroma_422_420 */ + 0.000442738, /* cogorc_convert_YUY2_AYUV */ + 0.000448218, /* cogorc_convert_UYVY_AYUV */ + 0.000126284, /* cogorc_convert_YUY2_Y42B */ + 0.00013561, /* cogorc_convert_UYVY_Y42B */ + 0.000141577, /* cogorc_convert_YUY2_Y444 */ + 0.000186874, /* cogorc_convert_UYVY_Y444 */ + 9.1806e-05, /* cogorc_convert_UYVY_I420 */ + 4.92179e-05, /* cogorc_convert_AYUV_I420 */ + 0.000387252, /* cogorc_convert_AYUV_YUY2 */ + 0.000386643, /* cogorc_convert_AYUV_UYVY */ + 7.10203e-05, /* cogorc_convert_AYUV_Y42B */ + 0.000121778, /* cogorc_convert_AYUV_Y444 */ + 0.000693003, /* cogorc_convert_Y42B_YUY2 */ + 0.00068451, /* cogorc_convert_Y42B_UYVY */ + 0.000366249, /* cogorc_convert_Y42B_AYUV */ + 0.000602631, /* cogorc_convert_Y444_YUY2 */ + 0.00059332, /* cogorc_convert_Y444_UYVY */ + 0.000742439, /* cogorc_convert_Y444_AYUV */ + 0.00016071, /* cogorc_convert_AYUV_ARGB */ + 0.00015913, /* cogorc_convert_AYUV_BGRA */ + 0.000159045, /* cogorc_convert_AYUV_ABGR */ + 0.000159024, /* cogorc_convert_AYUV_RGBA */ + 0, /* cogorc_convert_I420_BGRA */ + 0, /* cogorc_convert_I420_BGRA_avg */ + 0, /* cogorc_getline_I420 */ + 0.000447772, /* cogorc_getline_YUY2 */ + 0.000449395, /* cogorc_getline_UYVY */ + 0.000448533, /* cogorc_getline_YVYU */ + 0.000385634, /* cogorc_getline_Y42B */ + 0.000794574, /* cogorc_getline_Y444 */ + 0, /* cogorc_getline_Y800 */ + 0.000886484, /* cogorc_getline_BGRA */ + 0.000554285, /* cogorc_getline_ABGR */ + 0.000558078, /* cogorc_getline_RGBA */ + 0.000405223, /* cogorc_getline_NV12 */ + 0.000448563, /* cogorc_getline_NV21 */ + 8.13877e-05, /* cogorc_putline_I420 */ + 0.000387218, /* cogorc_putline_YUY2 */ + 0.00035472, /* cogorc_putline_YVYU */ + 0.000388082, /* cogorc_putline_UYVY */ + 7.53593e-05, /* cogorc_putline_Y42B */ + 0.000140356, /* cogorc_putline_Y444 */ + 0.0017132, /* cogorc_putline_Y800 */ + 0.000887432, /* cogorc_putline_BGRA */ + 0.000554304, /* cogorc_putline_ABGR */ + 0.000558475, /* cogorc_putline_RGBA */ + 0.000325961, /* cogorc_putline_NV12 */ + 0.000302852, /* cogorc_putline_NV21 */ + 0.00165479, /* orc_add2_rshift_add_s16_22_op */ + 0.00203387, /* orc_add2_rshift_add_s16_22 */ + 0.00178764, /* orc_add2_rshift_sub_s16_22_op */ + 0.00191923, /* orc_add2_rshift_sub_s16_22 */ + 0.00240863, /* orc_add2_rshift_add_s16_11_op */ + 0.00236533, /* orc_add2_rshift_add_s16_11 */ + 0.00238607, /* orc_add2_rshift_sub_s16_11_op */ + 0.00215832, /* orc_add2_rshift_sub_s16_11 */ + 0.00181178, /* orc_add_const_rshift_s16_11 */ + 0.00276915, /* orc_add_const_rshift_s16 */ + 0.00281042, /* orc_add_s16 */ + 0.00166994, /* orc_add_s16_2d */ + 0.00213569, /* orc_addc_rshift_s16 */ + 0.00285013, /* orc_lshift1_s16 */ + 0.00245598, /* orc_lshift2_s16 */ + 0.00370756, /* orc_lshift_s16_ip */ + 0.000576921, /* orc_mas2_add_s16_op */ + 0.000574115, /* orc_mas2_add_s16_ip */ + 0.000577379, /* orc_mas2_sub_s16_op */ + 0.000572396, /* orc_mas2_sub_s16_ip */ + 0.000463221, /* orc_mas4_across_add_s16_1991_op */ + 0.000454726, /* orc_mas4_across_add_s16_1991_ip */ + 0.000459198, /* orc_mas4_across_sub_s16_1991_op */ + 0.000454726, /* orc_mas4_across_sub_s16_1991_ip */ + 0.00228059, /* orc_subtract_s16 */ + 0.00221536, /* orc_add_s16_u8 */ + 0.00207322, /* orc_add_s16_u8_2d */ + 0.00292092, /* orc_convert_s16_u8 */ + 0.0036697, /* orc_convert_u8_s16 */ + 0.00262931, /* orc_offsetconvert_u8_s16 */ + 0.00169662, /* orc_offsetconvert_s16_u8 */ + 0.00238882, /* orc_subtract_s16_u8 */ + 0.00214581, /* orc_multiply_and_add_s16_u8 */ + 0.00171415, /* orc_splat_s16_ns */ + 0.000393839, /* orc_splat_s16_2d_4xn */ + 0.00108717, /* orc_splat_s16_2d_8xn */ + 0.00238507, /* orc_splat_s16_2d */ + 0.00710336, /* orc_splat_u8_ns */ + 0.00403526, /* orc_splat_u8_2d */ + 0.0050664, /* orc_average_u8 */ + 0.00134097, /* orc_rrshift6_add_s16_2d */ + 0.000807761, /* orc_rrshift6_sub_s16_2d */ + 0.00307394, /* orc_rrshift6_s16_ip_2d */ + 0.00316505, /* orc_rrshift6_s16_ip */ + 0.00414213, /* orc_unpack_yuyv_y */ + 0.00171048, /* orc_unpack_yuyv_u */ + 0.00152237, /* orc_unpack_yuyv_v */ + 0.000832094, /* orc_packyuyv */ + 0.00399968, /* orc_unpack_uyvy_y */ + 0.00191216, /* orc_unpack_uyvy_u */ + 0.00171008, /* orc_unpack_uyvy_v */ + 0.000897314, /* orc_interleave2_s16 */ + 0.000901892, /* orc_interleave2_rrshift1_s16 */ + 0.00076676, /* orc_deinterleave2_s16 */ + 0.000698548, /* orc_deinterleave2_lshift1_s16 */ + 0.00052955, /* orc_haar_deint_lshift1_split_s16 */ + 0.000541159, /* orc_haar_deint_split_s16 */ + 0.00193144, /* orc_haar_split_s16_lo */ + 0.00199633, /* orc_haar_split_s16_hi */ + 0.00100157, /* orc_haar_split_s16_op */ + 0.00110899, /* orc_haar_split_s16 */ + 0.00250888, /* orc_haar_synth_s16_lo */ + 0.00237495, /* orc_haar_synth_s16_hi */ + 0.00102733, /* orc_haar_synth_s16_op */ + 0.000903217, /* orc_haar_synth_s16 */ + 0.000876559, /* orc_haar_synth_rrshift1_int_s16 */ + 0.000887662, /* orc_haar_synth_int_s16 */ + 0.00410694, /* orc_haar_sub_s16 */ + 0.0032058, /* orc_haar_add_half_s16 */ + 0.00388831, /* orc_haar_add_s16 */ + 0.00291108, /* orc_haar_sub_half_s16 */ + 0.000801367, /* orc_sum_u8 */ + 0.00114909, /* orc_sum_s16 */ + 0.000370646, /* orc_sum_square_diff_u8 */ + 0.00013075, /* orc_dequantise_s16_2d_4xn */ + 0.000133912, /* orc_dequantise_s16_2d_8xn */ + 9.03212e-05, /* orc_dequantise_s16_ip_2d_8xn */ + 0.00113719, /* orc_dequantise_s16_ip_2d */ + 0.00110444, /* orc_dequantise_s16_ip */ + 0.0010195, /* orc_dequantise_s16 */ + 0.00102223, /* orc_dequantise_var_s16_ip */ + 0.000730524, /* orc_quantise1_s16 */ + 0.00110444, /* orc_quantise2_s16 */ + 0.000335265, /* orc_quantdequant1_s16 */ + 0.000192182, /* orc_quantdequant3_s16 */ + 0.000417592, /* orc_quantdequant2_s16 */ + 0.000898304, /* orc_downsample_vert_u8 */ + 0.000362124, /* orc_downsample_horiz_u8 */ + 0.000544868, /* orc_stats_moment_s16 */ + 0.000453732, /* orc_stats_above_s16 */ + 0.00292441, /* orc_accw */ + 0.000359527, /* orc_avg2_8xn_u8 */ + 0.000354736, /* orc_avg2_12xn_u8 */ + 0.00149989, /* orc_avg2_16xn_u8 */ + 0.00233937, /* orc_avg2_32xn_u8 */ + 0.00314972, /* orc_avg2_nxm_u8 */ + 0.00035942, /* orc_combine4_8xn_u8 */ + 0.000194902, /* orc_combine4_12xn_u8 */ + 0.000479254, /* orc_combine4_16xn_u8 */ + 0.000570636, /* orc_combine4_24xn_u8 */ + 0.00024299, /* orc_combine4_32xn_u8 */ + 0.00065699, /* orc_combine4_nxm_u8 */ + 0.000471688, /* orc_combine2_8xn_u8 */ + 0.000324071, /* orc_combine2_12xn_u8 */ + 0.000253177, /* orc_combine2_16xn_u8 */ + 0.000954874, /* orc_combine2_nxm_u8 */ + 0.000585976, /* orc_sad_nxm_u8 */ + 0.000462648, /* orc_sad_8x8_u8 */ + 0.000516877, /* orc_sad_12x12_u8 */ + 0.000539588, /* orc_sad_16xn_u8 */ + 0.000534326, /* orc_sad_32xn_u8 */ +}; + |