wikihelp: Simplification of the localization process.

author: Jan Holesovsky <kendy@suse.cz> 2011-01-18 00:00:16 +0100
committer: Jan Holesovsky <kendy@suse.cz> 2011-01-18 00:00:16 +0100
commit: 4f1c38ea3869f42e1ec299224e2ef468e17d2778 (patch)
tree: e7b70f28512b52318006a90196bbc0b023ae3f2a
parent: 7e5c805ecda3967bb251305f0da78f005254511e (diff)
1 files changed, 60 insertions, 104 deletions
diff --git a/helpcontent2/to-wiki/wikiconv2.py b/helpcontent2/to-wiki/wikiconv2.py
index 719ea120c4..9d9ab57b0c 100755
--- a/helpcontent2/to-wiki/wikiconv2.py
+++ b/helpcontent2/to-wiki/wikiconv2.py
@@ -199,7 +199,7 @@ def replace_gt_lt(str,char,replace):
             str = str[:index]+replace+str[index+1:]
     return str[1:]
 
-def get_localized_text(id, text):
+def get_localized_text(id):
     # Note: The order is important
     replace_localized_strs = [
             ["\\\"","\""],
@@ -223,10 +223,6 @@ def get_localized_text(id, text):
             return str
     return ""
 
-def get_localized_objects(parser, loc_text, attrs):
-    p = LocalizedText(parser, loc_text, attrs)
-    return p.parse()
-
 def href_to_fname_id(href):
     link = href.replace('"', '')
     fname = link
@@ -310,9 +306,10 @@ class ElementBase:
                     (id, fname))
 
     def unhandled_element(self, parser, name):
-        filename = "Localization File"
-        if parser:
+        try:
             filename = parser.filename
+        except:
+            filename = "Localized paragraph"
         sys.stderr.write('Warning: Unhandled element "%s" in "%s" (%s)\n'% \
                         (name, self.name, filename))
 
@@ -372,55 +369,6 @@ class XhpFile(ElementBase):
         else:
             self.unhandled_element(parser, name)
 
-class LocalizedText(ElementBase):
-    def __init__(self, parser, data, attrs):
-        # Initialized with some 'tag' such that the parser
-        # never needs to access the parent (which in this
-        # case is null)
-        ElementBase.__init__(self, 'localizedtext', None)
-        header = u'<?xml version="1.0" encoding="UTF-8"?><paragraph>'
-        self.data = header + data #+ '</paragraph>'
-        self.xml = self.data.encode('utf-8')
-        self.follow_embed = True
-        self.head_obj = None
-        self.attrs = attrs
-        self.parser = parser
-        #print self.data.encode('utf-8')
-
-    def parse(self):
-        p = xml.parsers.expat.ParserCreate()
-        p.StartElementHandler = self.start_element
-        p.EndElementHandler = self.end_element
-        p.CharacterDataHandler = self.char_data
-        try:
-            p.Parse(self.xml)
-        except:
-            # TODO: Check different exceptions
-            sys.stderr.write('Trying to parse: '+self.xml+'\n')
-            print self.xml
-            raise
-        return self.objects[0].objects
-
-    def start_element(self, name, attrs):
-        if name == 'paragraph':
-            self.parse_child(Paragraph(self.attrs, self))
-        else:
-            if self.child_parsing:
-                self.get_curobj().start_element(self.parser, name, attrs)
-            else:
-                self.unhandled_element(None, name)
-
-    def char_data(self, data):
-        if self.child_parsing:
-            self.get_curobj().char_data(self, data)
-        else:
-            # Should never occur
-            self.unhandled_element(None,"Unhandled Data:"+data)
-
-    def end_element(self, name):
-        if self.child_parsing:
-            self.get_curobj().end_element(self, name)
-
 class Bookmark(ElementBase):
     def __init__(self, attrs, parent, type, parser):
         ElementBase.__init__(self, 'bookmark', parent)
@@ -975,7 +923,6 @@ class Paragraph(ElementBase):
             self.level = 0
 
         self.is_first = (len(self.parent.objects) == 0)
-        self.localized_objects = []
 
     def start_element(self, parser, name, attrs):
         if name == 'ahelp':
@@ -1002,6 +949,10 @@ class Paragraph(ElementBase):
             self.parse_child(Item(attrs, self))
         elif name == 'link':
             self.parse_child(Link(attrs, self))
+        elif name == 'localized':
+            # we ignore this tag, it is added arbitrary for the paragraphs
+            # that come from .sdf files
+            pass
         elif name == 'switchinline':
             self.parse_child(SwitchInline(attrs, self, parser.embedding_app))
         elif name == 'variable':
@@ -1009,9 +960,6 @@ class Paragraph(ElementBase):
         else:
             self.unhandled_element(parser, name)
 
-    def end_element(self, parser, name):
-        ElementBase.end_element(self, parser, name)
-
     def char_data(self, parser, data):
         if self.role == 'paragraph' or self.role == 'heading' or \
                 self.role == 'listitem' or self.role == 'variable':
@@ -1019,23 +967,10 @@ class Paragraph(ElementBase):
                 data = ' ' + data.lstrip()
             data = data.replace('\n', ' ')
 
-        if len(self.localized_objects):
-            return
-        loc_text = u''
-        if len(self.id):
-            loc_text = get_localized_text(self.id, data)
-        if len(loc_text):
-            attrs = {'role':self.role,
-                     'level':self.level}
-            self.localized_objects = get_localized_objects(parser, loc_text, attrs)
-        elif len(data):
+        if len(data):
             self.objects.append(Text(data))
 
     def get_all(self):
-        # Localization objects present, drop the other objects
-        if len(self.localized_objects):
-            self.objects = self.localized_objects
-
         role = self.role
         if role == 'heading':
             if self.level <= 0:
@@ -1130,44 +1065,20 @@ class TableContentParagraph(Paragraph):
             else:
                 self.role = 'tablecontent'
 
-class XhpParser:
-    def __init__(self, filename, follow_embed, embedding_app, wiki_page_name):
-        self.head_obj = XhpFile()
-        self.filename = filename
+class ParserBase:
+    def __init__(self, follow_embed, embedding_app, current_app, wiki_page_name, head_object, buffer):
+        self.head_obj = head_object
         self.follow_embed = follow_embed
+        self.embedding_app = embedding_app
+        self.current_app = current_app
         self.wiki_page_name = wiki_page_name
 
-        # we want to ignore the 1st level="1" heading, because in most of the
-        # cases, it is the only level="1" heading in the file, and it is the
-        # same as the page title
-        self.ignore_heading = True
-
-        self.current_app = ''
-        self.current_app_raw = ''
-        for i in [['sbasic', 'BASIC'], ['scalc', 'CALC'], \
-                  ['sdatabase', 'DATABASE'], ['sdraw', 'DRAW'], \
-                  ['schart', 'CHART'], ['simpress', 'IMPRESS'], \
-                  ['smath', 'MATH'], ['swriter', 'WRITER']]:
-            if filename.find('/%s/'% i[0]) >= 0:
-                self.current_app_raw = i[0]
-                self.current_app = i[1]
-                break
-
-        if embedding_app != '':
-            self.embedding_app = embedding_app
-        else:
-            self.embedding_app = self.current_app
-
-        file = codecs.open(filename, "r", "utf-8")
         p = xml.parsers.expat.ParserCreate()
-
         p.StartElementHandler = self.start_element
         p.EndElementHandler = self.end_element
         p.CharacterDataHandler = self.char_data
 
-        buf = file.read()
-        p.Parse(buf.encode('utf-8'))
-        file.close()
+        p.Parse(buffer)
 
     def start_element(self, name, attrs):
         self.head_obj.get_curobj().start_element(self, name, attrs)
@@ -1193,11 +1104,56 @@ class XhpParser:
                 ignore_this = True
         except:
             pass
+
+        try:
+            localized_text = get_localized_text(attrs['id'])
+        except:
+            pass
+
         if ignore_this:
             obj.parse_child(Ignore(attrs, obj, 'paragraph'))
+        elif localized_text != '':
+            # parse the localized text
+            localized_para = Paragraph(attrs, obj)
+            text = u'<?xml version="1.0" encoding="UTF-8"?><localized>' + localized_text + '</localized>'
+            ParserBase(self.follow_embed, self.embedding_app, self.current_app, self.wiki_page_name, \
+                    localized_para, text.encode('utf-8'))
+            # add it to the overall structure
+            obj.objects.append(localized_para)
+            # and ignore the original text
+            obj.parse_child(Ignore(attrs, obj, 'paragraph'))
         else:
             obj.parse_child(Paragraph(attrs, obj))
 
+class XhpParser(ParserBase):
+    def __init__(self, filename, follow_embed, embedding_app, wiki_page_name):
+        self.filename = filename
+
+        # we want to ignore the 1st level="1" heading, because in most of the
+        # cases, it is the only level="1" heading in the file, and it is the
+        # same as the page title
+        self.ignore_heading = True
+
+        current_app = ''
+        self.current_app_raw = ''
+        for i in [['sbasic', 'BASIC'], ['scalc', 'CALC'], \
+                  ['sdatabase', 'DATABASE'], ['sdraw', 'DRAW'], \
+                  ['schart', 'CHART'], ['simpress', 'IMPRESS'], \
+                  ['smath', 'MATH'], ['swriter', 'WRITER']]:
+            if filename.find('/%s/'% i[0]) >= 0:
+                self.current_app_raw = i[0]
+                current_app = i[1]
+                break
+
+        if embedding_app == '':
+            embedding_app = current_app
+
+        file = codecs.open(filename, "r", "utf-8")
+        buf = file.read()
+        file.close()
+
+        ParserBase.__init__(self, follow_embed, embedding_app, current_app, wiki_page_name, XhpFile(), buf.encode('utf-8'))
+
 def loadallfiles(filename):
     global titles
     file = codecs.open(filename, "r", "utf-8")
author	Jan Holesovsky <kendy@suse.cz>	2011-01-18 00:00:16 +0100
committer	Jan Holesovsky <kendy@suse.cz>	2011-01-18 00:00:16 +0100
commit	4f1c38ea3869f42e1ec299224e2ef468e17d2778 (patch)
tree	e7b70f28512b52318006a90196bbc0b023ae3f2a
parent	7e5c805ecda3967bb251305f0da78f005254511e (diff)