summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWim Taymans <wtaymans@redhat.com>2014-12-06 21:00:23 +0100
committerWim Taymans <wtaymans@redhat.com>2014-12-09 09:27:23 +0100
commitcce306405a3ed440170a5da03f876f53a376a807 (patch)
tree72e8063c3e182c9697b70198f60b12ef077232b3
parenta0b8e96b77da5c6148b5d0cb16f9ccdafe901bf9 (diff)
wip experiment with mulas4wlworkmulas4wl
-rw-r--r--gst-libs/gst/video/video-converter.c41
-rw-r--r--gst-libs/gst/video/video-orc.orc62
2 files changed, 96 insertions, 7 deletions
diff --git a/gst-libs/gst/video/video-converter.c b/gst-libs/gst/video/video-converter.c
index c49c265b8..13a93fece 100644
--- a/gst-libs/gst/video/video-converter.c
+++ b/gst-libs/gst/video/video-converter.c
@@ -834,6 +834,11 @@ color_matrix_RGB_to_XYZ (MatrixData * dst, double Rx, double Ry, double Gx,
color_matrix_copy (dst, &m);
}
+#if 1
+//void
+//_custom_video_orc_matrix8 (guint8 * ORC_RESTRICT d1,
+// const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3,
+// orc_int64 p4, int n);
void
_custom_video_orc_matrix8 (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3,
@@ -847,15 +852,15 @@ _custom_video_orc_matrix8 (guint8 * ORC_RESTRICT d1,
gint a20, a21, a22, a23;
a00 = (gint16) (p1 >> 16);
- a01 = (gint16) (p2 >> 16);
- a02 = (gint16) (p3 >> 16);
+ a01 = (gint16) (p1 >> 32);
+ a02 = (gint16) (p1 >> 48);
a03 = (gint16) (p4 >> 16);
- a10 = (gint16) (p1 >> 32);
+ a10 = (gint16) (p2 >> 16);
a11 = (gint16) (p2 >> 32);
- a12 = (gint16) (p3 >> 32);
+ a12 = (gint16) (p2 >> 48);
a13 = (gint16) (p4 >> 32);
- a20 = (gint16) (p1 >> 48);
- a21 = (gint16) (p2 >> 48);
+ a20 = (gint16) (p3 >> 16);
+ a21 = (gint16) (p3 >> 32);
a22 = (gint16) (p3 >> 48);
a23 = (gint16) (p4 >> 48);
@@ -873,6 +878,7 @@ _custom_video_orc_matrix8 (guint8 * ORC_RESTRICT d1,
d1[i * 4 + 3] = CLAMP (v, 0, 255);
}
}
+#endif
static void
video_converter_matrix8 (MatrixData * data, gpointer pixels)
@@ -948,6 +954,7 @@ prepare_matrix (GstVideoConverter * convert, MatrixData * data)
GST_DEBUG ("use 8bit matrix");
data->matrix_func = video_converter_matrix8;
+#if 0
data->orc_p1 = (((guint64) (guint16) data->im[2][0]) << 48) |
(((guint64) (guint16) data->im[1][0]) << 32) |
(((guint64) (guint16) data->im[0][0]) << 16);
@@ -957,13 +964,32 @@ prepare_matrix (GstVideoConverter * convert, MatrixData * data)
data->orc_p3 = (((guint64) (guint16) data->im[2][2]) << 48) |
(((guint64) (guint16) data->im[1][2]) << 32) |
(((guint64) (guint16) data->im[0][2]) << 16);
+#else
+ data->orc_p1 = (((guint64) (guint16) data->im[0][2]) << 48) |
+ (((guint64) (guint16) data->im[0][1]) << 32) |
+ (((guint64) (guint16) data->im[0][0]) << 16);
+ data->orc_p2 = (((guint64) (guint16) data->im[1][2]) << 48) |
+ (((guint64) (guint16) data->im[1][1]) << 32) |
+ (((guint64) (guint16) data->im[1][0]) << 16);
+ data->orc_p3 = (((guint64) (guint16) data->im[2][2]) << 48) |
+ (((guint64) (guint16) data->im[2][1]) << 32) |
+ (((guint64) (guint16) data->im[2][0]) << 16);
+#endif
+ GST_DEBUG ("%08llx %08llx %08llx", (long long unsigned int) data->orc_p1,
+ (long long unsigned int) data->orc_p2,
+ (long long unsigned int) data->orc_p3);
a03 = data->im[0][3] >> SCALE;
a13 = data->im[1][3] >> SCALE;
a23 = data->im[2][3] >> SCALE;
+#if 1
data->orc_p4 = (((guint64) (guint16) a23) << 48) |
(((guint64) (guint16) a13) << 32) | (((guint64) (guint16) a03) << 16);
+#else
+ data->orc_p4 = (((guint64) (guint16) a03) << 32) |
+ (((guint64) (guint16) a13) << 16) | (((guint64) (guint16) a23) << 0);
+#endif
}
} else {
GST_DEBUG ("use 16bit matrix");
@@ -1362,7 +1388,6 @@ chain_convert (GstVideoConverter * convert, GstLineCache * prev)
color_matrix_scale_components (&convert->convert_matrix,
(float) scale, (float) scale, (float) scale);
}
- convert->current_bits = MAX (convert->in_bits, convert->out_bits);
do_conversion = TRUE;
if (!same_matrix || !same_primaries)
@@ -3125,6 +3150,8 @@ video_converter_lookup_fastpath (GstVideoConverter * convert)
if (width != convert->out_width || height != convert->out_height)
return FALSE;
+ return FALSE;
+
if (GET_OPT_DITHER_QUANTIZATION (convert) != 1)
return FALSE;
diff --git a/gst-libs/gst/video/video-orc.orc b/gst-libs/gst/video/video-orc.orc
index 862f7d35d..963228593 100644
--- a/gst-libs/gst/video/video-orc.orc
+++ b/gst-libs/gst/video/video-orc.orc
@@ -1329,6 +1329,68 @@ x4 addw aq, aq, q1
x4 convssswb ayuv2, aq
x4 addb ayuv, ayuv2, c128
+.function video_orc_matrix8_2
+.source 4 argb guint8
+.dest 4 ayuv guint8
+.longparam 8 p1
+.longparam 8 p2
+.longparam 8 p3
+.longparam 8 p4
+.temp 2 a
+.temp 2 w1
+.temp 2 w2
+.temp 2 y
+.temp 2 u
+.temp 2 v
+.temp 4 l1
+.temp 4 l2
+.temp 8 aq
+.temp 8 q1
+.temp 8 q2
+.temp 8 q3
+.temp 8 pr1
+.temp 8 pr2
+.temp 8 pr3
+.temp 8 pr4
+
+loadpq pr1, p1
+loadpq pr2, p2
+loadpq pr3, p3
+loadpq pr4, p4
+
+x4 convubw aq, argb
+select0ql l1, aq
+select0lw a, l1
+
+mulas4wl l1, aq, pr1
+shrsl l1, l1, 8
+convssslw y, l1
+
+mulas4wl l1, aq, pr2
+mulas4wl l2, aq, pr3
+mergelq q1, l1, l2
+x2 shrsl q1, q1, 8
+x2 convssslw l2, q1
+
+mergewl l1, a, y
+mergelq q3, l1, l2
+
+x4 addw q3, q3, pr4
+x4 convsuswb ayuv, q3
+
+
+.function video_orc_test_mul
+.source 4 argb guint8
+.dest 4 ayuv guint8
+.longparam 8 p1
+.temp 8 t
+.temp 8 pr1
+
+loadpq pr1, p1
+
+x4 convubw t, argb
+mulas4wl ayuv, t, pr1
+
#.function video_orc_resample_h_near_u32
#.source 4 src guint32
#.source 4 idx