diff options
author | Jan Holesovsky <kendy@suse.cz> | 2011-01-18 00:00:16 +0100 |
---|---|---|
committer | Jan Holesovsky <kendy@suse.cz> | 2011-01-18 00:00:16 +0100 |
commit | 4f1c38ea3869f42e1ec299224e2ef468e17d2778 (patch) | |
tree | e7b70f28512b52318006a90196bbc0b023ae3f2a /helpcontent2 | |
parent | 7e5c805ecda3967bb251305f0da78f005254511e (diff) |
wikihelp: Simplification of the localization process.
Diffstat (limited to 'helpcontent2')
-rwxr-xr-x | helpcontent2/to-wiki/wikiconv2.py | 164 |
1 files changed, 60 insertions, 104 deletions
diff --git a/helpcontent2/to-wiki/wikiconv2.py b/helpcontent2/to-wiki/wikiconv2.py index 719ea120c4..9d9ab57b0c 100755 --- a/helpcontent2/to-wiki/wikiconv2.py +++ b/helpcontent2/to-wiki/wikiconv2.py @@ -199,7 +199,7 @@ def replace_gt_lt(str,char,replace): str = str[:index]+replace+str[index+1:] return str[1:] -def get_localized_text(id, text): +def get_localized_text(id): # Note: The order is important replace_localized_strs = [ ["\\\"","\""], @@ -223,10 +223,6 @@ def get_localized_text(id, text): return str return "" -def get_localized_objects(parser, loc_text, attrs): - p = LocalizedText(parser, loc_text, attrs) - return p.parse() - def href_to_fname_id(href): link = href.replace('"', '') fname = link @@ -310,9 +306,10 @@ class ElementBase: (id, fname)) def unhandled_element(self, parser, name): - filename = "Localization File" - if parser: + try: filename = parser.filename + except: + filename = "Localized paragraph" sys.stderr.write('Warning: Unhandled element "%s" in "%s" (%s)\n'% \ (name, self.name, filename)) @@ -372,55 +369,6 @@ class XhpFile(ElementBase): else: self.unhandled_element(parser, name) -class LocalizedText(ElementBase): - def __init__(self, parser, data, attrs): - # Initialized with some 'tag' such that the parser - # never needs to access the parent (which in this - # case is null) - ElementBase.__init__(self, 'localizedtext', None) - header = u'<?xml version="1.0" encoding="UTF-8"?><paragraph>' - self.data = header + data #+ '</paragraph>' - self.xml = self.data.encode('utf-8') - self.follow_embed = True - self.head_obj = None - self.attrs = attrs - self.parser = parser - #print self.data.encode('utf-8') - - def parse(self): - p = xml.parsers.expat.ParserCreate() - p.StartElementHandler = self.start_element - p.EndElementHandler = self.end_element - p.CharacterDataHandler = self.char_data - try: - p.Parse(self.xml) - except: - # TODO: Check different exceptions - sys.stderr.write('Trying to parse: '+self.xml+'\n') - print self.xml - raise - return self.objects[0].objects - - def start_element(self, name, attrs): - if name == 'paragraph': - self.parse_child(Paragraph(self.attrs, self)) - else: - if self.child_parsing: - self.get_curobj().start_element(self.parser, name, attrs) - else: - self.unhandled_element(None, name) - - def char_data(self, data): - if self.child_parsing: - self.get_curobj().char_data(self, data) - else: - # Should never occur - self.unhandled_element(None,"Unhandled Data:"+data) - - def end_element(self, name): - if self.child_parsing: - self.get_curobj().end_element(self, name) - class Bookmark(ElementBase): def __init__(self, attrs, parent, type, parser): ElementBase.__init__(self, 'bookmark', parent) @@ -975,7 +923,6 @@ class Paragraph(ElementBase): self.level = 0 self.is_first = (len(self.parent.objects) == 0) - self.localized_objects = [] def start_element(self, parser, name, attrs): if name == 'ahelp': @@ -1002,6 +949,10 @@ class Paragraph(ElementBase): self.parse_child(Item(attrs, self)) elif name == 'link': self.parse_child(Link(attrs, self)) + elif name == 'localized': + # we ignore this tag, it is added arbitrary for the paragraphs + # that come from .sdf files + pass elif name == 'switchinline': self.parse_child(SwitchInline(attrs, self, parser.embedding_app)) elif name == 'variable': @@ -1009,9 +960,6 @@ class Paragraph(ElementBase): else: self.unhandled_element(parser, name) - def end_element(self, parser, name): - ElementBase.end_element(self, parser, name) - def char_data(self, parser, data): if self.role == 'paragraph' or self.role == 'heading' or \ self.role == 'listitem' or self.role == 'variable': @@ -1019,23 +967,10 @@ class Paragraph(ElementBase): data = ' ' + data.lstrip() data = data.replace('\n', ' ') - if len(self.localized_objects): - return - loc_text = u'' - if len(self.id): - loc_text = get_localized_text(self.id, data) - if len(loc_text): - attrs = {'role':self.role, - 'level':self.level} - self.localized_objects = get_localized_objects(parser, loc_text, attrs) - elif len(data): + if len(data): self.objects.append(Text(data)) def get_all(self): - # Localization objects present, drop the other objects - if len(self.localized_objects): - self.objects = self.localized_objects - role = self.role if role == 'heading': if self.level <= 0: @@ -1130,44 +1065,20 @@ class TableContentParagraph(Paragraph): else: self.role = 'tablecontent' -class XhpParser: - def __init__(self, filename, follow_embed, embedding_app, wiki_page_name): - self.head_obj = XhpFile() - self.filename = filename +class ParserBase: + def __init__(self, follow_embed, embedding_app, current_app, wiki_page_name, head_object, buffer): + self.head_obj = head_object self.follow_embed = follow_embed + self.embedding_app = embedding_app + self.current_app = current_app self.wiki_page_name = wiki_page_name - # we want to ignore the 1st level="1" heading, because in most of the - # cases, it is the only level="1" heading in the file, and it is the - # same as the page title - self.ignore_heading = True - - self.current_app = '' - self.current_app_raw = '' - for i in [['sbasic', 'BASIC'], ['scalc', 'CALC'], \ - ['sdatabase', 'DATABASE'], ['sdraw', 'DRAW'], \ - ['schart', 'CHART'], ['simpress', 'IMPRESS'], \ - ['smath', 'MATH'], ['swriter', 'WRITER']]: - if filename.find('/%s/'% i[0]) >= 0: - self.current_app_raw = i[0] - self.current_app = i[1] - break - - if embedding_app != '': - self.embedding_app = embedding_app - else: - self.embedding_app = self.current_app - - file = codecs.open(filename, "r", "utf-8") p = xml.parsers.expat.ParserCreate() - p.StartElementHandler = self.start_element p.EndElementHandler = self.end_element p.CharacterDataHandler = self.char_data - buf = file.read() - p.Parse(buf.encode('utf-8')) - file.close() + p.Parse(buffer) def start_element(self, name, attrs): self.head_obj.get_curobj().start_element(self, name, attrs) @@ -1193,11 +1104,56 @@ class XhpParser: ignore_this = True except: pass + + try: + localized_text = get_localized_text(attrs['id']) + except: + pass + if ignore_this: obj.parse_child(Ignore(attrs, obj, 'paragraph')) + elif localized_text != '': + # parse the localized text + localized_para = Paragraph(attrs, obj) + text = u'<?xml version="1.0" encoding="UTF-8"?><localized>' + localized_text + '</localized>' + ParserBase(self.follow_embed, self.embedding_app, self.current_app, self.wiki_page_name, \ + localized_para, text.encode('utf-8')) + # add it to the overall structure + obj.objects.append(localized_para) + # and ignore the original text + obj.parse_child(Ignore(attrs, obj, 'paragraph')) else: obj.parse_child(Paragraph(attrs, obj)) +class XhpParser(ParserBase): + def __init__(self, filename, follow_embed, embedding_app, wiki_page_name): + self.filename = filename + + # we want to ignore the 1st level="1" heading, because in most of the + # cases, it is the only level="1" heading in the file, and it is the + # same as the page title + self.ignore_heading = True + + current_app = '' + self.current_app_raw = '' + for i in [['sbasic', 'BASIC'], ['scalc', 'CALC'], \ + ['sdatabase', 'DATABASE'], ['sdraw', 'DRAW'], \ + ['schart', 'CHART'], ['simpress', 'IMPRESS'], \ + ['smath', 'MATH'], ['swriter', 'WRITER']]: + if filename.find('/%s/'% i[0]) >= 0: + self.current_app_raw = i[0] + current_app = i[1] + break + + if embedding_app == '': + embedding_app = current_app + + file = codecs.open(filename, "r", "utf-8") + buf = file.read() + file.close() + + ParserBase.__init__(self, follow_embed, embedding_app, current_app, wiki_page_name, XhpFile(), buf.encode('utf-8')) + def loadallfiles(filename): global titles file = codecs.open(filename, "r", "utf-8") |