diff options
Diffstat (limited to 'wiki-to-help/convert.py')
-rwxr-xr-x | wiki-to-help/convert.py | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/wiki-to-help/convert.py b/wiki-to-help/convert.py new file mode 100755 index 0000000000..323a834d19 --- /dev/null +++ b/wiki-to-help/convert.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python +#!/usr/bin/python -i +""" +Convert an XML-Dump to platformspecific help files. +Copyright 2011 Timo Richter + +This program depends on: +mwlib +python +python-lxml +xsltproc +Microsoft HHC: http://go.microsoft.com/fwlink/?LinkId=14188 + + +""" + +import subprocess, tempfile, os, shutil, argparse + +#import mwlib_mods # is being imported. see below +from hhc import HHC +from mw import MW +from metabook_translated import MetabookTranslated +from metabook_translated import LanguageSeparator +from executor import Executor + +scriptpath=os.path.dirname(os.path.realpath(__file__) ) + +class Main(object): + ''' Defines program parameters and returns them as a dictionary ''' + def parseArgs(self): + parser = argparse.ArgumentParser(description='Conversion from a mediawiki xml-dumpfile to helpfiles') + parser.add_argument("--startpage", metavar="PATH", dest="startpage", default=None, type=str, help="Sets a HTML-file as the start page") + parser.add_argument("--images", metavar="PATH", dest="imgPath", default=None, type=str, help="Uses images from PATH. PATH is a zipfile or a directory.") + parser.add_argument("--keep", dest="keepTmp", default=False, action='store_true', help="Keeps temporary files in /tmp") + parser.add_argument("--only-en", dest="onlyEn", action='store_true', default=False, help="Converts only English articles") + parser.add_argument("--no-chm", dest="createChm", default=True, action='store_false', help="Avoids creation of a CHM file at the end") + parser.add_argument("-v", dest="verbose", default=False, action='store_true', help="Verbose") + parser.add_argument("input", type=str, help="XML input") + parser.add_argument("output", type=str, help="Directory for output") + + return parser.parse_args() + + def __init__(self): + args = self.parseArgs() + import mwlib_mods + r = Converter( + keepTmp=args.keepTmp, + createChm=args.createChm, + source=args.input, + dest=args.output, + startpage=args.startpage, + onlyEn=args.onlyEn, + imgPath=args.imgPath, + verbose=args.verbose, + )() + exit(int(not r)) + + +class Converter(object): + verbose=False + createChm = None # + keepTmp = None # + #style=os.path.join(scriptpath,'xsl/htmlhelp/htmlhelp.xsl') # final + style=os.path.join(scriptpath,'htmlhelp.xsl') # final + title="Book" # final + + tmp=None + includeFiles=[] + + def __init__(self,source,dest,onlyEn,imgPath,verbose, + keepTmp=False,createChm=True,startpage=None): + """ + Parameters are documented in Main.parseArgs() + """ + self.createChm = createChm + self.keepTmp=keepTmp + self.tmp = tempfile.mkdtemp() + self.style = os.path.abspath(self.style) + source = os.path.abspath(source) + dest = os.path.abspath(dest) + if startpage is not None: + startpage = os.path.abspath(startpage) + self.source=source + self.dest=dest + self.startpage=startpage + self.onlyEn = onlyEn + self.imgPath = imgPath + self.verbose = verbose + self.ex = Executor(showErr=verbose,showOutput=True,showCmd=verbose) + self.hhc = HHC(showErr=True,showOutput=verbose,showCmd=verbose) + self.title = self.getTitle(self.title) + + def getTitle(self,default=None): + """ + If given, return TEXT from <siteinfo><sitename>TEXT</sitename></siteinfo> + in xml file self.source. + Otherwise return @default + """ + import xml.dom.minidom + print "Loading title" + dom = xml.dom.minidom.parse(self.source) + try: + siteinfo = dom.getElementsByTagName("siteinfo")[0] + sitename = siteinfo.getElementsByTagName("sitename")[0] + name = sitename.childNodes[0].data + except IndexError: + return default + else: + return name + + def createDir(self,path): + try: + os.mkdir(path) + except OSError: + pass + + def setupImgPath(self): + """ + If --images is not given, the path will be in the format "images/name.jpg". + If --images is given a zipfile, it is being extracted to "images/". + If --images is a directory, it is being copied to "images/". + The filenames in images/ are being stored to self.includeFiles. + """ + imgDest = "images" # puts images to output/imgDest/ + if not self.imgPath: + self.imgPath = os.path.join(imgDest,"IMAGENAME") + return + extension = os.path.splitext(self.imgPath)[1].lower() + imgTmp = os.path.join(self.tmp,imgDest) + print "Copying images..." + if extension == ".zip": + self.ex("unzip","-q","-o","-j","-d",imgTmp,self.imgPath) + else: + shutil.copytree(self.imgPath,imgTmp) + shutil.copytree(imgTmp, os.path.join(self.dest,imgDest) ) + self.imgPath = os.path.join(imgDest,"IMAGENAME") + # Save filenames for inclusion in chm + for fname in os.listdir(imgTmp): + fpath = os.path.join(imgDest,fname) + self.includeFiles.append(fpath) + + def writeHhp(self): + """ + Writes changes to the .hhp-file. + self.includeFiles will be flushed to the hhp. + """ + hhp=os.path.join(self.tmp,"htmlhelp.hhp") + with open(hhp,"a") as f: + f.write("\n".join(self.includeFiles)) + + def __call__(self): + """ + Create the environment for conversion and call convert() + @return boolean Success + """ + tmp = self.tmp + self.createDir(self.dest) + + print "Working directory: "+tmp + + self.setupImgPath() + + shutil.copy(os.path.join(scriptpath,"nfo.json"),tmp) + metabook_template = os.path.join(scriptpath,"metabook.json") + ls = LanguageSeparator.fromFileToFiles(metabook_template,self.source,tmp) + MW.buildcdb(self.source,tmp) + + if self.onlyEn: + return self.convert("en",ls["en"]) + else: + + for lang, metabook in ls.iteritems(): + if not self.convert(lang,metabook): return False + + def convert(self,lang,metabook): + """ + Private. + This function executes the programs for the conversion. + @lang Language of book + @metabook Path to metabook-json-file + """ + print "Rendering language "+lang + tmp = self.tmp + docbookfile = os.path.join(tmp,"%s.xml"%lang) + chmDest = os.path.join(self.dest,lang+".chm") + + renderArgs = ("-L",lang,"-W","imagesrcresolver=%s"%self.imgPath, + "--config=%s/wikiconf.txt"%(tmp), + "-w","docbook","-o",docbookfile,"-m",metabook,"--title",self.title) + MW.quietCall(MW.render,renderArgs,showErr=self.verbose) + shutil.copy(docbookfile,self.dest) + print "Parsing docbook" + xsltreturn = self.ex("/usr/bin/xsltproc","--nonet","--novalid","-o",tmp+'/',self.style,docbookfile) + if not xsltreturn: return False + self.setStartpage(self.startpage) + self.writeHhp() + if self.createChm: + print("Compiling chm...") + self.hhc(tmp) + shutil.copy(os.path.join(tmp,'htmlhelp.chm'),chmDest) + return True + + def setStartpage(self,startpage): + """ + Private. + Copies @startpage to our tmp dir so that it will be used as the start page. + @return False if @startpage doesnt exist, otherwise True. + """ + if startpage is None: return True + filename="index.html" + if not os.path.exists(startpage): return False + os.remove(os.path.join(self.tmp,filename)) + shutil.copy(startpage, os.path.join(self.tmp,filename)) + return True + + def __del__(self): + if not self.keepTmp: + shutil.rmtree(self.tmp) # remove temp files + +if __name__ == '__main__': + Main() + |