diff options
author | Edward Hervey <edward@centricular.com> | 2017-12-18 09:48:21 +0100 |
---|---|---|
committer | Edward Hervey <bilboed@bilboed.com> | 2017-12-18 09:48:21 +0100 |
commit | cb04515cbd8cd7c8dc0650360b1a3672228507e9 (patch) | |
tree | 302a21cf6b88987e0b69f6bd0bf4f24291dfd48b | |
parent | aa8e27f2a33f7ae8aa0178eddcfee409782e3419 (diff) |
validate/baseclasses: Don't leak several hundred MB of XML
The xml-based MediaDescriptor were keeping open the XML file and the
associated ElementTree structures, resulting in memory usage of several
hundred megabytes.
Instead cache the information we need immediately and release the
XML structure
-rw-r--r-- | validate/launcher/baseclasses.py | 64 |
1 files changed, 37 insertions, 27 deletions
diff --git a/validate/launcher/baseclasses.py b/validate/launcher/baseclasses.py index 1fe340d..8c16bd3 100644 --- a/validate/launcher/baseclasses.py +++ b/validate/launcher/baseclasses.py @@ -2024,18 +2024,40 @@ class GstValidateMediaDescriptor(MediaDescriptor): self._xml_path = xml_path try: - self.media_xml = ET.parse(xml_path).getroot() + media_xml = ET.parse(xml_path).getroot() except xml.etree.ElementTree.ParseError: printc("Could not parse %s" % xml_path, Colors.FAIL) raise - # Sanity checks - self.media_xml.attrib["duration"] - self.media_xml.attrib["seekable"] + self._extract_data (media_xml) self.set_protocol(urllib.parse.urlparse(urllib.parse.urlparse(self.get_uri()).scheme).scheme) + def _extract_data(self, media_xml): + # Extract the information we need from the xml + self._caps = media_xml.findall("streams")[0].attrib["caps"] + self._track_caps = [] + try: + streams = media_xml.findall("streams")[0].findall("stream") + except IndexError: + pass + else: + for stream in streams: + self._track_caps.append((stream.attrib["type"], stream.attrib["caps"])) + self._uri = media_xml.attrib["uri"] + self._duration = int(media_xml.attrib["duration"]) + self._protocol = media_xml.get("protocol", None) + self._is_seekable = media_xml.attrib["seekable"].lower() == "true" + self._is_live = media_xml.get("live", "false").lower() == "true" + self._is_image = False + for stream in media_xml.findall("streams")[0].findall("stream"): + if stream.attrib["type"] == "image": + self._is_image = True + self._track_types = [] + for stream in media_xml.findall("streams")[0].findall("stream"): + self._track_types.append(stream.attrib["type"]) + @staticmethod def new_from_uri(uri, verbose=False, include_frames=False): """ @@ -2100,51 +2122,39 @@ class GstValidateMediaDescriptor(MediaDescriptor): return self._xml_path.replace("." + self.STREAM_INFO_EXT, "") def get_caps(self): - return self.media_xml.findall("streams")[0].attrib["caps"] + return self._caps def get_tracks_caps(self): - res = [] - try: - streams = self.media_xml.findall("streams")[0].findall("stream") - except IndexError: - return res - - for stream in streams: - res.append((stream.attrib["type"], stream.attrib["caps"])) - - return res + return self._track_caps def get_uri(self): - return self.media_xml.attrib["uri"] + return self._uri def get_duration(self): - return int(self.media_xml.attrib["duration"]) + return self._duration def set_protocol(self, protocol): - self.media_xml.attrib["protocol"] = protocol + self._protocol = protocol def get_protocol(self): - return self.media_xml.attrib["protocol"] + return self._protocol def is_seekable(self): - return self.media_xml.attrib["seekable"].lower() == "true" + return self._is_seekable def is_live(self): - return self.media_xml.get("live", "false").lower() == "true" + return self._is_live def can_play_reverse(self): return True def is_image(self): - for stream in self.media_xml.findall("streams")[0].findall("stream"): - if stream.attrib["type"] == "image": - return True - return False + return self._is_image def get_num_tracks(self, track_type): n = 0 - for stream in self.media_xml.findall("streams")[0].findall("stream"): - if stream.attrib["type"] == track_type: + for t in self._track_types: + if t == track_type: n += 1 return n |