#!/usr/bin/env python # -*- tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4 -*- # # Version: MPL 1.1 / GPLv3+ / LGPLv3+ # # The contents of this file are subject to the Mozilla Public License Version # 1.1 (the "License"); you may not use this file except in compliance with # the License or as specified alternatively below. You may obtain a copy of # the License at http://www.mozilla.org/MPL/ # # Software distributed under the License is distributed on an "AS IS" basis, # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License # for the specific language governing rights and limitations under the # License. # # Major Contributor(s): # Copyright (C) 2012 Red Hat, Inc., Michael Stahl # (initial developer) # # All Rights Reserved. # # For minor contributions see the git repository. # # Alternatively, the contents of this file may be used under the terms of # either the GNU General Public License Version 3 or later (the "GPLv3+"), or # the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), # in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable # instead of those above. # Simple script to load a bunch of documents and export them as Flat ODF # # Personally I run it like this: # ~/lo/master-suse/instdir/program/python ~/lo/master-suse/bin/benchmark-document-loading --soffice=path:/home/tml/lo/master-suse/instdir/program/soffice --outdir=file://$PWD/out --userdir=file:///tmp/test $PWD/docs # import argparse import datetime import os import subprocess import sys import threading import time import urllib try: from urllib.parse import quote except ImportError: from urllib import quote import uuid try: import pyuno import uno import unohelper except ImportError: print("pyuno not found: try to set PYTHONPATH and URE_BOOTSTRAP variables") print("PYTHONPATH=/installation/opt/program") print("URE_BOOTSTRAP=file:///installation/opt/program/fundamentalrc") raise try: from com.sun.star.document import XDocumentEventListener from com.sun.star.io import XOutputStream except ImportError: print("UNO API class not found: try to set URE_BOOTSTRAP variable") print("URE_BOOTSTRAP=file:///installation/opt/program/fundamentalrc") raise validCalcFileExtensions = [ ".xlsx", ".xls", ".ods", ".fods" ] validWriterFileExtensions = [ ".docx" , ".rtf", ".odt", ".fodt", ".doc" ] validImpressFileExtensions = [ ".ppt", ".pptx", ".odp", ".fodp" ] validDrawFileExtensions = [ ".odg", ".fodg" ] validReverseFileExtensions = [ ".vsd", ".vdx", ".cdr", ".pub", ".wpd" ] validFileExtensions = {"calc": validCalcFileExtensions, "writer": validWriterFileExtensions, "impress": validImpressFileExtensions, "draw": validDrawFileExtensions, "reverse": validReverseFileExtensions} flatODFTypes = {"calc": (".fods", "OpenDocument Spreadsheet Flat XML"), "writer": (".fodt", "OpenDocument Text Flat XML"), "impress": (".fodp", "OpenDocument Presentation Flat XML"), "draw": (".fodg", "OpenDocument Drawing Flat XML")} outdir = "" def partition(list, pred): left = [] right = [] for e in list: if pred(e): left.append(e) else: right.append(e) return (left, right) def filelist(directory, suffix): if not directory: raise Exception("filelist: empty directory") if directory[-1] != "/": directory += "/" files = [directory + f for f in os.listdir(directory)] # print(files) return [f for f in files if os.path.isfile(f) and os.path.splitext(f)[1] == suffix] def getFiles(dirs, suffix): # print( dirs ) files = [] for d in dirs: files += filelist(d, suffix) return files ### UNO utilities ### class OutputStream( unohelper.Base, XOutputStream ): def __init__( self ): self.closed = 0 def closeOutput(self): self.closed = 1 def writeBytes( self, seq ): sys.stdout.write( seq.value ) def flush( self ): pass class OfficeConnection: def __init__(self, args): self.args = args self.soffice = None self.socket = None self.xContext = None self.pro = None def setUp(self): (method, sep, rest) = self.args.soffice.partition(":") if sep != ":": raise Exception("soffice parameter does not specify method") if method == "path": socket = "pipe,name=pytest" + str(uuid.uuid1()) userdir = self.args.userdir if not userdir: raise Exception("'path' method requires --userdir") if not userdir.startswith("file://"): raise Exception("--userdir must be file URL") self.soffice = self.bootstrap(rest, userdir, socket) elif method == "connect": socket = rest else: raise Exception("unsupported connection method: " + method) self.xContext = self.connect(socket) def bootstrap(self, soffice, userdir, socket): argv = [ soffice, "--accept=" + socket + ";urp", "-env:UserInstallation=" + userdir, "--quickstart=no", "--norestore", "--nologo", "--headless" ] if self.args.valgrind: argv.append("--valgrind") os.putenv("SAL_LOG", "-INFO-WARN") os.putenv("LIBO_ONEWAY_STABLE_ODF_EXPORT", "YES") self.pro = subprocess.Popen(argv) # print(self.pro.pid) def connect(self, socket): xLocalContext = uno.getComponentContext() xUnoResolver = xLocalContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", xLocalContext) url = "uno:" + socket + ";urp;StarOffice.ComponentContext" # print("OfficeConnection: connecting to: " + url) while True: try: xContext = xUnoResolver.resolve(url) return xContext # except com.sun.star.connection.NoConnectException except pyuno.getClass("com.sun.star.connection.NoConnectException"): # print("NoConnectException: sleeping...") time.sleep(1) def tearDown(self): if self.soffice: if self.xContext: try: # print("tearDown: calling terminate()...") xMgr = self.xContext.ServiceManager xDesktop = xMgr.createInstanceWithContext("com.sun.star.frame.Desktop", self.xContext) xDesktop.terminate() # print("...done") # except com.sun.star.lang.DisposedException: except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"): # print("caught UnknownPropertyException while TearDown") pass # ignore, also means disposed except pyuno.getClass("com.sun.star.lang.DisposedException"): # print("caught DisposedException while TearDown") pass # ignore else: self.soffice.terminate() ret = self.soffice.wait() self.xContext = None self.socket = None self.soffice = None if ret != 0: raise Exception("Exit status indicates failure: " + str(ret)) # return ret def kill(self): command = "kill " + str(self.pro.pid) with open("killFile.log", "a") as killFile: killFile.write(command + "\n") # print("kill") # print(command) os.system(command) class PersistentConnection: def __init__(self, args): self.args = args self.connection = None def getContext(self): return self.connection.xContext def setUp(self): assert(not self.connection) conn = OfficeConnection(self.args) conn.setUp() self.connection = conn def preTest(self): assert(self.connection) def postTest(self): assert(self.connection) def tearDown(self): if self.connection: try: self.connection.tearDown() finally: self.connection = None def kill(self): if self.connection: self.connection.kill() def simpleInvoke(connection, test): try: connection.preTest() test.run(connection.getContext(), connection) finally: connection.postTest() def runConnectionTests(connection, invoker, tests): try: connection.setUp() for test in tests: invoker(connection, test) finally: pass #connection.tearDown() class EventListener(XDocumentEventListener,unohelper.Base): def __init__(self): self.layoutFinished = False def documentEventOccured(self, event): # print(str(event.EventName)) if event.EventName == "OnLayoutFinished": self.layoutFinished = True def disposing(event): pass def mkPropertyValue(name, value): return uno.createUnoStruct("com.sun.star.beans.PropertyValue", name, 0, value, 0) ### tests ### def logTimeSpent(url, startTime): print(os.path.basename(urllib.parse.urlparse(url).path) + "\t" + str(time.time()-startTime)) def loadFromURL(xContext, url, t, component): xDesktop = xContext.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", xContext) props = [("Hidden", True), ("ReadOnly", True)] # FilterName? loadProps = tuple([mkPropertyValue(name, value) for (name, value) in props]) xListener = None if component == "writer": xListener = EventListener() xGEB = xContext.getValueByName( "/singletons/com.sun.star.frame.theGlobalEventBroadcaster") xGEB.addDocumentEventListener(xListener) try: xDoc = None startTime = time.time() xDoc = xDesktop.loadComponentFromURL(url, "_blank", 0, loadProps) if component == "calc": try: if xDoc: xDoc.calculateAll() except AttributeError: pass t.cancel() logTimeSpent(url, startTime) return xDoc elif component == "writer": time_ = 0 t.cancel() while time_ < 30: if xListener.layoutFinished: logTimeSpent(url, startTime) return xDoc # print("delaying...") time_ += 1 time.sleep(1) else: t.cancel() logTimeSpent(url, startTime) return xDoc with open("file.log", "a") as fh: fh.write("layout did not finish\n") return xDoc except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"): xListener = None raise # means crashed, handle it later except pyuno.getClass("com.sun.star.lang.DisposedException"): xListener = None raise # means crashed, handle it later except pyuno.getClass("com.sun.star.lang.IllegalArgumentException"): pass # means could not open the file, ignore it except: if xDoc: # print("CLOSING") xDoc.close(True) raise finally: if xListener: xGEB.removeDocumentEventListener(xListener) def exportToODF(xContext, xDoc, baseName, t, component): exportFileName = outdir + "/" + os.path.splitext(baseName)[0] + flatODFTypes[component][0] print("exportToODF " + baseName + " => " + exportFileName) props = [("FilterName", flatODFTypes[component][1]), ("Overwrite", True)] storeProps = tuple([mkPropertyValue(name, value) for (name, value) in props]) xDoc.storeToURL(exportFileName, tuple(storeProps)) def handleCrash(file, disposed): # print("File: " + file + " crashed") with open("crashlog.txt", "a") as crashLog: crashLog.write('Crash:' + file + ' ') if disposed == 1: crashLog.write('through disposed\n') # crashed_files.append(file) # add here the remaining handling code for crashed files def alarm_handler(args): args.kill() class HandleFileTest: def __init__(self, file, state, component): self.file = file self.state = state self.component = component def run(self, xContext, connection): # print("Loading document: " + self.file) t = None args = None try: url = "file://" + quote(self.file) with open("file.log", "a") as fh: fh.write(url + "\n") xDoc = None args = [connection] t = threading.Timer(60, alarm_handler, args) t.start() xDoc = loadFromURL(xContext, url, t, self.component) self.state.goodFiles.append(self.file) exportToODF(xContext, xDoc, os.path.basename(urllib.parse.urlparse(url).path), t, self.component) except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"): # print("caught UnknownPropertyException " + self.file) if not t.is_alive(): # print("TIMEOUT!") self.state.timeoutFiles.append(self.file) else: t.cancel() handleCrash(self.file, 0) self.state.badPropertyFiles.append(self.file) connection.tearDown() connection.setUp() except pyuno.getClass("com.sun.star.lang.DisposedException"): # print("caught DisposedException " + self.file) if not t.is_alive(): # print("TIMEOUT!") self.state.timeoutFiles.append(self.file) else: t.cancel() handleCrash(self.file, 1) self.state.badDisposedFiles.append(self.file) connection.tearDown() connection.setUp() finally: if t.is_alive(): t.cancel() try: if xDoc: t = threading.Timer(10, alarm_handler, args) t.start() xDoc.close(True) t.cancel() except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"): print("caught UnknownPropertyException while closing") self.state.badPropertyFiles.append(self.file) connection.tearDown() connection.setUp() except pyuno.getClass("com.sun.star.lang.DisposedException"): print("caught DisposedException while closing") if t.is_alive(): t.cancel() else: self.state.badDisposedFiles.append(self.file) connection.tearDown() connection.setUp() # print("...done with: " + self.file) class State: def __init__(self): self.goodFiles = [] self.badDisposedFiles = [] self.badPropertyFiles = [] self.timeoutFiles = [] def write_state_report(files_list, start_time, report_filename, description): with open(report_filename, "w") as fh: fh.write("%s:\n" % description) fh.write("Starttime: %s\n" % start_time.isoformat()) for f in files_list: fh.write("%s\n" % f) def writeReport(state, startTime): write_state_report(state.goodFiles, startTime, "goodFiles.log", "Files which loaded perfectly") write_state_report(state.badDisposedFiles, startTime, "badDisposedFiles.log", "Files which crashed with DisposedException") write_state_report(state.badPropertyFiles, startTime, "badPropertyFiles.log", "Files which crashed with UnknownPropertyException") write_state_report(state.timeoutFiles, startTime, "timeoutFiles.log", "Files which timed out") def runHandleFileTests(opts): startTime = datetime.datetime.now() connection = PersistentConnection(opts) global outdir outdir = os.path.join(opts.outdir, startTime.strftime('%Y%m%d.%H%M%S')) try: tests = [] state = State() # print("before map") for component, validExtension in validFileExtensions.items(): files = [] for suffix in validExtension: files.extend(getFiles(opts.dirs, suffix)) files.sort() tests.extend( (HandleFileTest(file, state, component) for file in files) ) runConnectionTests(connection, simpleInvoke, tests) finally: connection.kill() writeReport(state, startTime) def parseArgs(argv): epilog = "'location' is a pathname, not a URL. 'outdir' and 'userdir' are URLs.\n" \ "The 'directory' parameters should be full absolute pathnames, not URLs." parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, epilog=epilog) parser.add_argument('--soffice', metavar='method:location', required=True, help="specify soffice instance to connect to\n" "supported methods: 'path', 'connect'") parser.add_argument('--outdir', metavar='URL', required=True, help="specify the output directory for flat ODF exports") parser.add_argument('--userdir', metavar='URL', help="specify user installation directory for 'path' method") parser.add_argument('--valgrind', action='store_true', help="pass --valgrind to soffice for 'path' method") parser.add_argument('dirs', metavar='directory', nargs='+') args = parser.parse_args(argv[1:]) return args if __name__ == "__main__": opts = parseArgs(sys.argv) runHandleFileTests(opts) # vim:set shiftwidth=4 softtabstop=4 expandtab: >