From 20ed9e823731cc93f8583440d2718385f40258b0 Mon Sep 17 00:00:00 2001 From: Nicola Murino Date: Mon, 21 Nov 2016 11:48:29 +0100 Subject: matroskaparse: add support for skipping invalid data https://bugzilla.gnome.org/show_bug.cgi?id=774566 --- gst/matroska/matroska-parse.c | 47 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) (limited to 'gst/matroska/matroska-parse.c') diff --git a/gst/matroska/matroska-parse.c b/gst/matroska/matroska-parse.c index 096547bb6..006a46d29 100644 --- a/gst/matroska/matroska-parse.c +++ b/gst/matroska/matroska-parse.c @@ -77,6 +77,8 @@ GST_DEBUG_CATEGORY_STATIC (matroskaparse_debug); GST_DEBUG_OBJECT (parse, "Parsing " element " element " \ " finished with '%s'", gst_flow_get_name (ret)) +#define INVALID_DATA_THRESHOLD (2 * 1024 * 1024) + enum { PROP_0 @@ -2574,8 +2576,15 @@ gst_matroska_parse_parse_id (GstMatroskaParse * parse, guint32 id, break; case GST_MATROSKA_READ_STATE_SCANNING: if (id != GST_MATROSKA_ID_CLUSTER && - id != GST_MATROSKA_ID_CLUSTERTIMECODE) + id != GST_MATROSKA_ID_CLUSTERTIMECODE) { + /* we need to skip byte per byte if we are scanning for a new cluster */ + read = 1; goto skip; + } else { + GST_LOG_OBJECT (parse, "Resync done, new cluster found!"); + parse->common.start_resync_offset = -1; + parse->common.state = parse->common.state_to_restore; + } /* fall-through */ case GST_MATROSKA_READ_STATE_HEADER: case GST_MATROSKA_READ_STATE_DATA: @@ -3017,9 +3026,36 @@ next: if (G_UNLIKELY (ret != GST_FLOW_OK && ret != GST_FLOW_EOS)) { if (parse->common.ebml_segment_length != G_MAXUINT64 && parse->common.offset >= - parse->common.ebml_segment_start + parse->common.ebml_segment_length) - ret = GST_FLOW_EOS; - return ret; + parse->common.ebml_segment_start + parse->common.ebml_segment_length) { + return GST_FLOW_EOS; + } else { + /* + * parsing error: we need to flush a byte from the adapter if the id is + * not a cluster and so on until we found a new cluser or the + * INVALID_DATA_THRESHOLD is exceeded, we reuse gst_matroska_parse_parse_id + * setting the state to GST_MATROSKA_READ_STATE_SCANNING so the bytes + * are skipped until a new cluster is found + */ + gint64 bytes_scanned; + if (parse->common.start_resync_offset == -1) { + parse->common.start_resync_offset = parse->common.offset; + parse->common.state_to_restore = parse->common.state; + } + bytes_scanned = parse->common.offset - parse->common.start_resync_offset; + if (bytes_scanned <= INVALID_DATA_THRESHOLD) { + GST_WARNING_OBJECT (parse, + "parse error, looking for next cluster, actual offset %" + G_GUINT64_FORMAT ", start resync offset %" G_GUINT64_FORMAT, + parse->common.offset, parse->common.start_resync_offset); + parse->common.state = GST_MATROSKA_READ_STATE_SCANNING; + ret = GST_FLOW_OK; + } else { + GST_WARNING_OBJECT (parse, + "unrecoverable parse error, next cluster not found and threshold " + "exceeded, bytes scanned %" G_GINT64_FORMAT, bytes_scanned); + return ret; + } + } } GST_LOG_OBJECT (parse, "Offset %" G_GUINT64_FORMAT ", Element id 0x%x, " @@ -3094,7 +3130,8 @@ gst_matroska_parse_handle_sink_event (GstPad * pad, GstObject * parent, } case GST_EVENT_EOS: { - if (parse->common.state != GST_MATROSKA_READ_STATE_DATA) { + if (parse->common.state != GST_MATROSKA_READ_STATE_DATA + && parse->common.state != GST_MATROSKA_READ_STATE_SCANNING) { gst_event_unref (event); GST_ELEMENT_ERROR (parse, STREAM, DEMUX, (NULL), ("got eos and didn't receive a complete header object")); -- cgit v1.2.3