diff options
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/get-bugzilla-attachments-by-mimetype | 278 |
1 files changed, 191 insertions, 87 deletions
diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype index 0b1806085002..7316f0a921f2 100755 --- a/bin/get-bugzilla-attachments-by-mimetype +++ b/bin/get-bugzilla-attachments-by-mimetype @@ -39,6 +39,7 @@ import urllib import feedparser import base64 +import re import os, os.path import xmlrpclib from xml.dom import minidom @@ -83,6 +84,42 @@ def get_from_bug_url_via_xml(url, mimetype, prefix, suffix): break attachmentid += 1 +def get_novell_bug_via_xml(url, mimetype, prefix, suffix): + id = url.rsplit('=', 2)[1] + print "id is", prefix, id, suffix + if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix): + print "assuming", id, "is up to date" + else: + print "parsing", id + sock = urlopen_retry(url+"&ctype=xml") + dom = minidom.parse(sock) + sock.close() + attachmentid=1 + for comment in dom.getElementsByTagName('thetext'): + commentText = comment.firstChild.nodeValue + match = re.search(r".*Created an attachment \(id=([0-9]+)\)", commentText) + if not match: + continue + + realAttachmentId = match.group(1) + handle = urlopen_retry(novellattach + realAttachmentId) + if not handle: + print "attachment %s is not accessible", realAttachmentId + continue + + remoteMime = handle.info().gettype() + if remoteMime != mimetype: + print "skipping" + continue + + download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix + print 'downloading as', download + f = open(download, 'w') + f.write(handle.read()) + f.close() + break + attachmentid += 1 + def get_through_rpc_query(rpcurl, showurl, mimetype, prefix, suffix): try: proxy = xmlrpclib.ServerProxy(rpcurl) @@ -109,108 +146,164 @@ def get_through_rss_query_url(url, mimetype, prefix, suffix): except: pass d = feedparser.parse(url) + + #Getting detailed bug information and downloading an attachment body is not possible without logging in to Novell bugzilla + #get_novell_bug_via_xml function is a workaround for that situation + get_bug_function = get_novell_bug_via_xml if prefix == "novell" else get_from_bug_url_via_xml + for entry in d['entries']: - get_from_bug_url_via_xml(entry['id'], mimetype, prefix, suffix) + get_bug_function(entry['id'], mimetype, prefix, suffix) def get_through_rss_query(queryurl, mimetype, prefix, suffix): url = queryurl + '?query_format=advanced&field0-0-0=attachments.mimetype&type0-0-0=equals&value0-0-0=' + escape(mimetype) + '&ctype=rss' print 'url is', url get_through_rss_query_url(url, mimetype, prefix, suffix) +def get_launchpad_bugs(prefix): + #launchpadlib python module is required to download launchpad attachments + from launchpadlib.launchpad import Launchpad + + launchpad = Launchpad.login_anonymously("attachmentdownload", "production") + ubuntu = launchpad.distributions["ubuntu"] + + #since searching bugs having attachments with specific mimetypes is not available in launchpad API + #we're iterating over all bugs of the libreoffice source package + libo = ubuntu.getSourcePackage(name="libreoffice") + libobugs = libo.getBugTasks() + + for bugtask in libobugs: + bug = bugtask.bug + id = str(bug.id) + print "parsing ", id, "status:", bugtask.status, "title:", bug.title[:50] + attachmentid = 1 + for attachment in bug.attachments: + handle = attachment.data.open() + if not handle.content_type in mimetypes: + #print "skipping" + continue + + suffix = mimetypes[handle.content_type] + if not os.path.isdir(suffix): + try: + os.mkdir(suffix) + except: + pass + + download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix + + if os.path.isfile(download): + print "assuming", id, "is up to date" + break + + print 'mimetype is', handle.content_type, 'downloading as', download + + f = open(download, "w") + f.write(handle.read()) + f.close() + attachmentid += 1 freedesktop = 'http://bugs.freedesktop.org/buglist.cgi' +abisource = 'http://bugzilla.abisource.com/buglist.cgi' #added for abiword +gnome = 'http://bugzilla.gnome.org/buglist.cgi' # added for gnumeric +kde = 'http://bugs.kde.org/buglist.cgi' # added for koffice/calligra openoffice = 'https://issues.apache.org/ooo/buglist.cgi' redhatrpc = 'https://bugzilla.redhat.com/xmlrpc.cgi' redhatbug = 'https://bugzilla.redhat.com/show_bug.cgi?id=' -novell = 'https://bugzilla.novell.com/buglist.cgi' mozilla = 'https://bugzilla.mozilla.org/buglist.cgi' -mimetypes = [ +#Novell Bugzilla requires users to log in in order to get details of the bugs such as attachment bodies etc. +#As a dirty workaround, we parse comments containing "Created an attachment (id=xxxxxx)" and download attachments manually +#python-bugzilla claims that it supports Novell bugzilla login but it's not working right now and novell bugzilla login +#system is a nightmare +novellattach = 'https://bugzilla.novell.com/attachment.cgi?id=' +novell = 'https://bugzilla.novell.com/buglist.cgi' + +mimetypes = { # ODF - ('application/vnd.oasis.opendocument.base', 'odb'), - ('application/vnd.oasis.opendocument.database', 'odb'), - ('application/vnd.oasis.opendocument.chart', 'odc'), - ('application/vnd.oasis.opendocument.chart-template', 'otc'), - ('application/vnd.oasis.opendocument.formula', 'odf'), - ('application/vnd.oasis.opendocument.formula-template', 'otf'), - ('application/vnd.oasis.opendocument.graphics', 'odg'), - ('application/vnd.oasis.opendocument.graphics-template', 'otg'), - ('application/vnd.oasis.opendocument.graphics-flat-xml', 'fodg'), - ('application/vnd.oasis.opendocument.presentation', 'odp'), - ('application/vnd.oasis.opendocument.presentation-template', 'otp'), - ('application/vnd.oasis.opendocument.presentation-flat-xml', 'fodp'), - ('application/vnd.oasis.opendocument.spreadsheet', 'ods'), - ('application/vnd.oasis.opendocument.spreadsheet-template', 'ots'), - ('application/vnd.oasis.opendocument.spreadsheet-flat-xml', 'fods'), - ('application/vnd.oasis.opendocument.text', 'odt'), - ('application/vnd.oasis.opendocument.text-flat-xml', 'fodt'), - ('application/vnd.oasis.opendocument.text-master', 'odm'), - ('application/vnd.oasis.opendocument.text-template', 'ott'), - ('application/vnd.oasis.opendocument.text-web', 'oth'), + 'application/vnd.oasis.opendocument.base': 'odb', + 'application/vnd.oasis.opendocument.database': 'odb', + 'application/vnd.oasis.opendocument.chart': 'odc', + 'application/vnd.oasis.opendocument.chart-template': 'otc', + 'application/vnd.oasis.opendocument.formula': 'odf', + 'application/vnd.oasis.opendocument.formula-template': 'otf', + 'application/vnd.oasis.opendocument.graphics': 'odg', + 'application/vnd.oasis.opendocument.graphics-template': 'otg', + 'application/vnd.oasis.opendocument.graphics-flat-xml': 'fodg', + 'application/vnd.oasis.opendocument.presentation': 'odp', + 'application/vnd.oasis.opendocument.presentation-template': 'otp', + 'application/vnd.oasis.opendocument.presentation-flat-xml': 'fodp', + 'application/vnd.oasis.opendocument.spreadsheet': 'ods', + 'application/vnd.oasis.opendocument.spreadsheet-template': 'ots', + 'application/vnd.oasis.opendocument.spreadsheet-flat-xml': 'fods', + 'application/vnd.oasis.opendocument.text': 'odt', + 'application/vnd.oasis.opendocument.text-flat-xml': 'fodt', + 'application/vnd.oasis.opendocument.text-master': 'odm', + 'application/vnd.oasis.opendocument.text-template': 'ott', + 'application/vnd.oasis.opendocument.text-web': 'oth', # OOo XML - ('application/vnd.sun.xml.base', 'odb'), - ('application/vnd.sun.xml.calc', 'sxc'), - ('application/vnd.sun.xml.calc.template', 'stc'), - ('application/vnd.sun.xml.chart', 'sxs'), - ('application/vnd.sun.xml.draw', 'sxd'), - ('application/vnd.sun.xml.draw.template', 'std'), - ('application/vnd.sun.xml.impress', 'sxi'), - ('application/vnd.sun.xml.impress.template', 'sti'), - ('application/vnd.sun.xml.math', 'sxm'), - ('application/vnd.sun.xml.writer', 'sxw'), - ('application/vnd.sun.xml.writer.global', 'sxg'), - ('application/vnd.sun.xml.writer.template', 'stw'), - ('application/vnd.sun.xml.writer.web', 'stw'), + 'application/vnd.sun.xml.base': 'odb', + 'application/vnd.sun.xml.calc': 'sxc', + 'application/vnd.sun.xml.calc.template': 'stc', + 'application/vnd.sun.xml.chart': 'sxs', + 'application/vnd.sun.xml.draw': 'sxd', + 'application/vnd.sun.xml.draw.template': 'std', + 'application/vnd.sun.xml.impress': 'sxi', + 'application/vnd.sun.xml.impress.template': 'sti', + 'application/vnd.sun.xml.math': 'sxm', + 'application/vnd.sun.xml.writer': 'sxw', + 'application/vnd.sun.xml.writer.global': 'sxg', + 'application/vnd.sun.xml.writer.template': 'stw', + 'application/vnd.sun.xml.writer.web': 'stw', # MSO - ('application/rtf', 'rtf'), - ('text/rtf', 'rtf'), - ('application/msword', 'doc'), - ('application/vnd.ms-powerpoint', 'ppt'), - ('application/vnd.ms-excel', 'xls'), - ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'xlsx'), - ('application/vnd.openxmlformats-officedocument.spreadsheetml.template', 'xltx'), - ('application/vnd.openxmlformats-officedocument.presentationml.presentation', 'pptx'), - ('application/vnd.openxmlformats-officedocument.presentationml.template', 'ppotx'), - ('application/vnd.openxmlformats-officedocument.presentationml.slideshow', 'ppsx'), - ('application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'docx'), - ('application/vnd.openxmlformats-officedocument.wordprocessingml.template', 'dotx'), - ('application/vnd.visio', 'vsd'), + 'application/rtf': 'rtf', + 'text/rtf': 'rtf', + 'application/msword': 'doc', + 'application/vnd.ms-powerpoint': 'ppt', + 'application/vnd.ms-excel': 'xls', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.template': 'xltx', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx', + 'application/vnd.openxmlformats-officedocument.presentationml.template': 'ppotx', + 'application/vnd.openxmlformats-officedocument.presentationml.slideshow': 'ppsx', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.template': 'dotx', + 'application/vnd.visio': 'vsd', # W3C - ('application/xhtml+xml', 'xhtml'), - ('application/mathml+xml', 'mml'), - ('text/html', 'html'), - ('application/docbook+xml', 'docbook'), + 'application/xhtml+xml': 'xhtml', + 'application/mathml+xml': 'mml', + 'text/html': 'html', + 'application/docbook+xml': 'docbook', # misc - ('text/spreadsheet', 'slk'), - ('application/pdf', 'pdf'), - ('application/vnd.corel-draw', 'cdr'), - ('application/vnd.lotus-wordpro', 'lwp'), - ('application/vnd.lotus-1-2-3', 'wks'), - ('application/vnd.wordperfect', 'wpd'), - ('application/vnd.ms-works', 'wps'), - ('application/x-hwp', 'hwp'), - ('application/x-aportisdoc', 'pdb'), - ('application/x-pocket-word', 'psw'), - ('application/x-t602', '602'), + 'text/spreadsheet': 'slk', + 'application/pdf': 'pdf', + 'application/vnd.corel-draw': 'cdr', + 'application/vnd.lotus-wordpro': 'lwp', + 'application/vnd.lotus-1-2-3': 'wks', + 'application/vnd.wordperfect': 'wpd', + 'application/vnd.ms-works': 'wps', + 'application/x-hwp': 'hwp', + 'application/x-aportisdoc': 'pdb', + 'application/x-pocket-word': 'psw', + 'application/x-t602': '602', # binfilter - ('application/x-starcalc', 'sdc'), - ('application/vnd.stardivision.calc', 'sdc5'), - ('application/x-starchart', 'sds'), - ('application/vnd.stardivision.chart', 'sds5'), - ('application/x-stardraw', 'sdd_d'), - ('application/vnd.stardivision.draw', 'sda5'), - ('application/x-starimpress', 'sdd_i'), - ('application/vnd.stardivision.impress', 'sdd5'), - ('application/vnd.stardivision.impress-packed', 'sdp5'), - ('application/x-starmath', 'smf'), - ('application/vnd.stardivision.math', 'smf5'), - ('application/x-starwriter', 'sdw'), - ('application/vnd.stardivision.writer', 'sdw5'), - ('application/vnd.stardivision.writer-global', 'sgl5'), + 'application/x-starcalc': 'sdc', + 'application/vnd.stardivision.calc': 'sdc5', + 'application/x-starchart': 'sds', + 'application/vnd.stardivision.chart': 'sds5', + 'application/x-stardraw': 'sdd_d', + 'application/vnd.stardivision.draw': 'sda5', + 'application/x-starimpress': 'sdd_i', + 'application/vnd.stardivision.impress': 'sdd5', + 'application/vnd.stardivision.impress-packed': 'sdp5', + 'application/x-starmath': 'smf', + 'application/vnd.stardivision.math': 'smf5', + 'application/x-starwriter': 'sdw', + 'application/vnd.stardivision.writer': 'sdw5', + 'application/vnd.stardivision.writer-global': 'sgl5', # unusual image mimetypes - ('image/cgm', 'cgm'), - ('image/x-targa', 'tga'), -] + 'image/cgm': 'cgm', + 'image/x-targa': 'tga', +} # disabled for now, this would download gigs of pngs/jpegs... common_image_mimetypes = [ @@ -241,16 +334,27 @@ common_image_mimetypes = [ ('image/png', 'png'), ] -for (mimetype,extension) in mimetypes: +for (mimetype,extension) in mimetypes.items(): + get_through_rss_query(novell, mimetype, "novell", extension) + +for (mimetype,extension) in mimetypes.items(): + get_through_rss_query(kde, mimetype, "kde", extension) + +for (mimetype,extension) in mimetypes.items(): + get_through_rss_query(gnome, mimetype, "gnome", extension) + +for (mimetype,extension) in mimetypes.items(): + get_through_rss_query(abisource, mimetype, "abi", extension) + +for (mimetype,extension) in mimetypes.items(): get_through_rss_query(freedesktop, mimetype, "fdo", extension) -for (mimetype,extension) in mimetypes: +for (mimetype,extension) in mimetypes.items(): get_through_rpc_query(redhatrpc, redhatbug, mimetype, "rhbz", extension) -#to-do, get attachments some other way, not inline in xml -#get_through_rss_query(novell, 'application/msword', "n", "doc") - -for (mimetype,extension) in mimetypes: +for (mimetype,extension) in mimetypes.items(): get_through_rss_query(openoffice, mimetype, "ooo", extension) +get_launchpad_bugs("lp") + # vim:set shiftwidth=4 softtabstop=4 expandtab: |