diff options
author | Caolán McNamara <caolanm@redhat.com> | 2011-11-14 10:37:01 +0000 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2011-11-14 10:37:01 +0000 |
commit | b863767bd1ddc2af18900fa1df0cd61ef2fa6edb (patch) | |
tree | 9c6590556dd58601861c31215360e017ef0f49bc /bin | |
parent | ff6af93716bca3956b46e1c5940d4fdb92ceb7eb (diff) |
add script to download documents from various bugzillas
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/get-bugzilla-attachments-by-mimetype | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype new file mode 100755 index 000000000000..72157649b100 --- /dev/null +++ b/bin/get-bugzilla-attachments-by-mimetype @@ -0,0 +1,141 @@ +#!/usr/bin/env python +# -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- +#************************************************************************* +# Version: MPL 1.1 / GPLv3+ / LGPLv3+ +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License or as specified alternatively below. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# The Initial Developer of the Original Code is +# Caolán McNamara, Red Hat, Inc. <caolanm@redhat.com> +# Portions created by the Initial Developer are Copyright (C) 2011 the +# Initial Developer. All Rights Reserved. +# +# Major Contributor(s): +# +# For minor contributions see the git repository. +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 3 or later (the "GPLv3+"), or +# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), +# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable +# instead of those above. +#************************************************************************* + +#This digs through a pile of bugzilla's and populates the cwd with a big +#collection of bug-docs in per-filetype dirs with bug-ids as names with +#prefixes to indicate which bug-tracker, e.g. +# +#fdo-bugid-X.suffix +#rhbz-bugid-X.suffix +#moz-bugid-X.suffix +# +#where X is the n'th attachment of that type in the bug + +import urllib +import feedparser +import base64 +import os, os.path +import xmlrpclib +from xml.dom import minidom +from xml.sax.saxutils import escape + +def get_from_bug_url_via_xml(url, mimetype, prefix, suffix): + id = url.rsplit('=', 2)[1] + print "id is", prefix, id, suffix + if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix): + print "assuming", id, "is up to date" + else: + print "parsing", id + sock = urllib.urlopen(url+"&ctype=xml") + dom = minidom.parse(sock) + sock.close() + attachmentid=1 + for attachment in dom.getElementsByTagName('attachment'): + print " mimetype is", + for node in attachment.childNodes: + if node.nodeName == 'type': + print node.firstChild.nodeValue, + if node.firstChild.nodeValue.lower() != mimetype.lower(): + print 'skipping' + break + elif node.nodeName == 'data': + download = suffix + '/' +prefix + id + '-' + str(attachmentid) + '.' + suffix + print 'downloading as', download + f = open(download, 'w') + f.write(base64.b64decode(node.firstChild.nodeValue)) + f.close() + attachmentid += 1 + break + +def get_through_rpc_query(rpcurl, showurl, mimetype, prefix, suffix): + try: + proxy = xmlrpclib.ServerProxy(rpcurl) + query = dict() + query['column_list']='bug_id' + query['query_format']='advanced' + query['field0-0-0']='attachments.mimetype' + query['type0-0-0']='equals' + query['value0-0-0']=mimetype + result = proxy.Bug.search(query) + bugs = result['bugs'] + print len(bugs), 'bugs to process' + for bug in bugs: + url = showurl + str(bug['bug_id']) + get_from_bug_url_via_xml(url, mimetype, prefix, suffix) + except xmlrpclib.Fault, err: + print "A fault occurred" + print "Fault code: %s" % err.faultCode + print err.faultString + +def get_through_rss_query_url(url, mimetype, prefix, suffix): + try: + os.mkdir(suffix) + except: + pass + d = feedparser.parse(url) + for entry in d['entries']: + get_from_bug_url_via_xml(entry['id'], mimetype, prefix, suffix) + +def get_through_rss_query(queryurl, mimetype, prefix, suffix): + url = queryurl + '?query_format=advanced&field0-0-0=attachments.mimetype&type0-0-0=equals&value0-0-0=' + escape(mimetype) + '&ctype=rss' + print 'url is', url + get_through_rss_query_url(url, mimetype, prefix, suffix) + + +freedesktop = 'http://bugs.freedesktop.org/buglist.cgi' +openoffice = 'http://openoffice.org/bugzilla/buglist.cgi' +redhatrpc = 'https://bugzilla.redhat.com/xmlrpc.cgi' +redhatbug = 'https://bugzilla.redhat.com/show_bug.cgi?id=' +novell = 'https://bugzilla.novell.com/buglist.cgi' +mozilla = 'https://bugzilla.mozilla.org/buglist.cgi' + +get_through_rss_query(freedesktop, 'application/msword', "fdo", "doc") +get_through_rss_query(freedesktop, 'application/rtf', "fdo", "rtf") +get_through_rss_query(freedesktop, 'text/rtf', "fdo", "rtf") +get_through_rss_query(freedesktop, 'text/spreadsheet', "fdo", "slk") +get_through_rss_query(freedesktop, 'application/vnd.ms-powerpoint', "fdo", "ppt") + +get_through_rpc_query(redhatrpc, redhatbug, 'application/msword', "rhbz", "doc") +get_through_rpc_query(redhatrpc, redhatbug, 'application/rtf', "rhbz", "rtf") +get_through_rpc_query(redhatrpc, redhatbug, 'text/rtf', "rhbz", "rtf") +get_through_rpc_query(redhatrpc, redhatbug, 'text/spreadsheet', "rhbz", "slk") +get_through_rpc_query(redhatrpc, redhatbug, 'application/vnd.ms-powerpoint', "rhbz", "ppt") + +#to-do, get attachments some other way, not inline in xml +#get_through_rss_query(novell, 'application/msword', "n", "doc") + +get_through_rss_query(openoffice, 'application/msword', "ooo", "doc") +get_through_rss_query(openoffice, 'application/rtf', "ooo", "rtf") +get_through_rss_query(openoffice, 'text/rtf', "ooo", "rtf") +get_through_rss_query(openoffice, 'text/spreadsheet', "ooo", "slk") +get_through_rss_query(openoffice, 'application/vnd.ms-powerpoint', "ooo", "ppt") + +# vim:set shiftwidth=4 softtabstop=4 expandtab: |