From 4c61129db43e2c32ba3f184aa85b3e9f369f3839 Mon Sep 17 00:00:00 2001 From: Jan Holesovsky Date: Fri, 26 Nov 2010 11:47:51 +0100 Subject: wikihelp: Improve the titles of the pages. --- helpcontent2/to-wiki/getalltitles.py | 62 +++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/helpcontent2/to-wiki/getalltitles.py b/helpcontent2/to-wiki/getalltitles.py index f4034ab34d..1e80584c2c 100755 --- a/helpcontent2/to-wiki/getalltitles.py +++ b/helpcontent2/to-wiki/getalltitles.py @@ -4,9 +4,6 @@ import sys import os import xml.parsers.expat -title="" -parsing=True -istitle=False alltitles=[] def is_present(title): @@ -27,8 +24,8 @@ def make_unique(title): return t replace_text_list = [ - ["$[officename]","LibreOffice"], - ["%PRODUCTNAME","LibreOffice"], + ["$[officename]",""], # to fit both LibreOffice and BrOffice + ["%PRODUCTNAME",""], # to fit both LibreOffice and BrOffice ['"+"',"Plus"], ['"*"',"Star"], ['"-"',"Minus"], @@ -36,6 +33,8 @@ replace_text_list = [ ['"^"',"Cap"], [')','_'], ['(','_'], + [']','_'], + ['[','_'], ['\\','_'], ['/','_'], ['&',"and"], @@ -71,41 +70,41 @@ def replace_text(text, replace_list): return text def wiki_text(text): - return replace_text(text, replace_text_list) + t = replace_text(text, replace_text_list) + if t == '': + t = 'LibreOffice' # hardcoded fallback + return t.strip() def readable_text(text): return replace_text(text, replace_readable_list) -def start_element(name, attrs): - global parsing, istitle, title - if not parsing: - return - if name == 'title': - istitle=True - title = "" +class TitleParser: + title = '' + is_title = False -def end_element(name): - global parsing, istitle - if not parsing: - return - if name == 'title': - parsing = False - istitle = False + def start_element(self, name, attrs): + if name == 'title': + self.is_title = True -def char_data(data): - global title, parsing - if not istitle: - return - title = title + data + def end_element(self, name): + if name == 'title': + self.is_title = False + + def char_data(self, data): + if self.is_title: + self.title = self.title + data + + def get_title(self): + return self.title.strip() def parsexhp(filename): - global parsing, title parsing = True file=open(filename,"r") p = xml.parsers.expat.ParserCreate() - p.StartElementHandler = start_element - p.EndElementHandler = end_element - p.CharacterDataHandler = char_data + tp = TitleParser() + p.StartElementHandler = tp.start_element + p.EndElementHandler = tp.end_element + p.CharacterDataHandler = tp.char_data buf = file.read() try: p.Parse(buf) @@ -114,10 +113,13 @@ def parsexhp(filename): file.close() return file.close() + title = tp.get_title() if len(title): readable_title = readable_text(title) title = get_module(filename) + "/" + wiki_text(title) title = title.replace(" ", "_") + title = title.replace("___", "_") + title = title.replace("__", "_") title = make_unique(title) alltitles.append(title) print filename + ";" + title + ";" + readable_title @@ -133,3 +135,5 @@ for root, dirs, files in os.walk(sys.argv[1]): for i in files: if i.find(pattern) >= 0: parsexhp(root+"/"+i) + +# vim:set shiftwidth=4 softtabstop=4 expandtab: -- cgit