wikihelp: Larger update to fix the behavior of tables.

author: Jan Holesovsky <kendy@suse.cz> 2010-11-12 12:31:43 +0100
committer: Jan Holesovsky <kendy@suse.cz> 2010-11-12 12:31:43 +0100
commit: 0e5e9fa9419bfe375db7d8c66712a1c7434bac36 (patch)
tree: d0c7a1d157940ef489fa62057242e228fdc509f1
parent: f9f3b89b2103d5d6868b022b170cb228045027c1 (diff)
1 files changed, 145 insertions, 188 deletions
diff --git a/helpcontent2/to-wiki/wikiconv2.py b/helpcontent2/to-wiki/wikiconv2.py
index 05edbc983e..0d4979ca41 100755
--- a/helpcontent2/to-wiki/wikiconv2.py
+++ b/helpcontent2/to-wiki/wikiconv2.py
@@ -8,21 +8,47 @@ root="source/"
 titles = []
 
 # list of elements that we can directly convert to wiki text
-replace_start_list = [
-        ["emph","'''"],
-        ["comment","<!-- "]
-        ]
-
-replace_end_list = [
-        ["emph","'''"],
-        ["comment"," -->"]
-        ]
+replace_element = \
+    {'start':{'emph': "'''",
+              'comment': "<!-- "
+             },
+     'end':  {'emph': "'''",
+              'comment': " -->"
+             }
+    }
+
+replace_paragraph_role = \
+    {'start':{'heading1': '= ',
+              'heading2': '== ',
+              'heading3': '=== ',
+              'heading4': '==== ',
+              'heading5': '===== ',
+              'heading6': '====== ',
+              'paragraph': '',
+              'tablecontent': '| ',
+              'tablehead': '! scope="col" | ',
+              'tip': '{{Tip|',
+              'warning': '{{Warning|',
+             },
+     'end':{'heading1': ' =\n\n',
+            'heading2': ' ==\n\n',
+            'heading3': ' ===\n\n',
+            'heading4': ' ====\n\n',
+            'heading5': ' =====\n\n',
+            'heading6': ' ======\n\n',
+            'paragraph': '\n\n',
+            'tablecontent': '\n\n',
+            'tablehead': '\n\n',
+            'tip': '}}\n\n',
+            'warning': '}}\n\n',
+           }
+    }
 
 # text snippets that we need to convert
-replace_text_list = [
-        ["$[officename]","{{ProductName}}"],
-        ["%PRODUCTNAME","{{ProductName}}"]
-        ]
+replace_text_list = \
+    [["$[officename]", "{{ProductName}}"],
+     ["%PRODUCTNAME", "{{ProductName}}"]
+    ]
 
 help_id_patterns = [
         "HID",
@@ -106,20 +132,20 @@ class cxml:
             self.objects.append(para)
         if not self.parser_state:
             return
-        if name == 'embed':
-            link=attrs['href'].replace('"','')
-            fname=link
-            section=""
-            if link.find("#") >= 0:
-                fname = link[:link.find("#")]
-                section = link[link.find("#")+1:]
-
-            my_attrs = {}
-            my_attrs['href'] = fname
-            my_attrs['name'] = get_link_name(fname)
-            self.objects.append(clink(my_attrs, self))
-            # add a '\n' after each of the links
-            self.objects.append(ctext(""))
+        #if name == 'embed':
+        #    link=attrs['href'].replace('"','')
+        #    fname=link
+        #    section=""
+        #    if link.find("#") >= 0:
+        #        fname = link[:link.find("#")]
+        #        section = link[link.find("#")+1:]
+
+        #    my_attrs = {}
+        #    my_attrs['href'] = fname
+        #    my_attrs['name'] = get_link_name(fname)
+        #    self.objects.append(clink(my_attrs, self))
+        #    # add a '\n' after each of the links
+        #    self.objects.append(ctext(""))
 
         if name == 'table':
             child = ctable(attrs, self)
@@ -142,19 +168,14 @@ class cxml:
 
     def get_curobj(self):
         if self.child_parsing:
-            #try:
-            #    raise self.objects[len(self.objects)-1]
-            #except cxml:
             return self.objects[len(self.objects)-1].get_curobj()
-            #except:
-            #    return self.objects[len(self.objects)-1]
-        else:
-            return self
+        return self
 
-    def print_all(self):
+    def get_all(self):
+        text = ""
         for i in self.objects:
-            i.print_all()
-
+            text = text + i.get_all()
+        return text
 
 class cbookmark:
     bookmarks_list   = []
@@ -185,10 +206,6 @@ class cbookmark:
         cbookmark.current_bookmark = self.bookmark
         return ""
 
-    def print_all(self):
-        self.get_all()
-        pass
-
     def get_curobj(self):
         return self
 
@@ -248,9 +265,6 @@ class cimage:
         wikitext = wikitext + self.alttext+"]]"
         return wikitext
 
-    def print_all(self):
-        print self.get_all()
-
     def get_curobj(self):
         return self
 
@@ -258,8 +272,8 @@ class ctext:
     def __init__(self, text):
         self.wikitext = replace_text(text)
 
-    def print_all(self):
-        print self.wikitext
+    def get_all(self):
+        return self.wikitext
 
 class ctabcell:
     def __init__(self, attrs, parent):
@@ -267,39 +281,24 @@ class ctabcell:
         self.objects = []
         self.child_parsing = False
         self.parent = parent
-        self.header = False
-        pass
 
     def start_element(self, name, attrs):
         if name == 'paragraph':
-            if attrs['role'] == 'tablehead':
-                self.header = True
-            para=cparagraph(attrs, self, '', 0)
-            self.child_parsing=True
+            para = cparagraph(attrs, self, '', 0)
+            self.child_parsing = True
             self.objects.append(para)
-        pass
 
     def end_element(self, name):
         if name == 'tablecell':
             self.parent.child_parsing = False
-        pass
 
     def char_data(self, data):
-        pass
-
-    def print_all(self):
-        for i in self.objects:
-            i.print_all()
+        return
 
     def get_all(self):
         text = ""
-        first = True
         for i in self.objects:
-            if first:
-                text = i.get_all()
-                first = False
-            else:
-                text = text + "\n" + i.get_all()
+            text = text + i.get_all()
         return text
 
     def get_curobj(self):
@@ -307,73 +306,65 @@ class ctabcell:
             return self.objects[len(self.objects)-1].get_curobj()
         return self
 
-
-class ctable:
+class ctabrow:
     def __init__(self, attrs, parent):
-        # TODO/Check: Might Require filtering too...
-        try:
-            self.tableid    = attrs['id']
-        except:
-            self.tableid    = 0
-        self.header         = []
-        self.crow           = []
-        self.content        = [[]]
-        self.child_parsing  = False
-        self.child          = None
-        self.parent         = parent
-
-    def check_add_cell(self):
-        if self.child:
-            self.crow.append(self.child)
-            self.child = None
-
-    def check_add_row(self):
-        if len(self.crow):
-            if self.crow[0].header:
-                self.header = self.crow
-            else:
-                self.content.append(self.crow)
-            self.crow = []
+        self.objects = []
+        self.child_parsing = False
+        self.parent = parent
 
     def start_element(self, name, attrs):
         if name == 'tablecell':
-            self.check_add_cell()
-            self.child = ctabcell(attrs, self)
+            tabcell = ctabcell(attrs, self)
             self.child_parsing = True
+            self.objects.append(tabcell)
+
+    def end_element(self, name):
+        if name == 'tablerow':
+            self.parent.child_parsing = False
+
+    def char_data(self, data):
+        return
+
+    def get_all(self):
+        text = '|-\n'
+        for i in self.objects:
+            text = text + i.get_all()
+        return text
+
+    def get_curobj(self):
+        if self.child_parsing:
+            return self.objects[len(self.objects)-1].get_curobj()
+        return self
+
+class ctable:
+    def __init__(self, attrs, parent):
+        self.objects = []
+        self.child_parsing = False
+        self.parent = parent
+
+    def start_element(self, name, attrs):
         if name == 'tablerow':
-            self.check_add_cell()
-            self.check_add_row()
+            tabrow = ctabrow(attrs, self)
+            self.child_parsing = True
+            self.objects.append(tabrow)
 
     def end_element(self, name):
         if name == 'table':
-            # the following checks may be unnecessary
-            self.check_add_cell()
-            self.check_add_row()
             self.parent.child_parsing = False
 
     def char_data(self, data):
-        pass
+        return
 
     def get_all(self):
         text = '{| border="1"\n' # + ' align="left"'
-        if len(self.header):
-            # text = text + "\n|+ caption"
-            text = text +"|-\n"
-            for i in self.header:
-                text = text + '! scope="col" | ' + i.get_all()
-        for i in self.content:
-            text = text + "|-\n"
-            for j in i:
-                text = text + "| "+j.get_all()
-        text = text + "|}\n"
+        for i in self.objects:
+            text = text + i.get_all()
+        text = text + '|}\n\n'
         return text
 
-    def print_all(self):
-        print self.get_all().encode('ascii','replace')
-
     def get_curobj(self):
         if self.child_parsing:
-            return self.child.get_curobj()
+            return self.objects[len(self.objects)-1].get_curobj()
         return self
 
 class clink:
@@ -410,9 +401,6 @@ class clink:
             text = "[["+self.lname+"|"+self.wikitext+"]]"
         return text
 
-    def print_all(self):
-        print self.get_all()
-
     def get_curobj(self):
         return self
 
@@ -432,18 +420,13 @@ class cvariable:
         if name == 'variable':
             parent.child_parsing = False
 
-    def print_all(self):
-        print self.wikitext
-
 class cparagraph:
     def __init__(self, attrs, parent, sectionid, depth):
         self.child_parsing = False
-        self.heading=False
         try:
-            if attrs['role'] == "heading":
-                self.heading = True
+            self.role = attrs['role']
         except:
-            pass
+            self.role = 'paragraph'
 
         #try:
         #    self.level=parent.level+1
@@ -463,6 +446,7 @@ class cparagraph:
         self.wikitext=""
         if sectionid != "":
             self.parser_state = False
+        self.is_first = (len(self.parent.objects) == 0)
 
     def __del__(self):
         pass
@@ -471,13 +455,6 @@ class cparagraph:
         if name == 'variable':
             if attrs['id'] == self.filter_section:
                 self.parser_state=True
-        if name == 'paragraph':
-            if not self.parser_state:
-                child = cparagraph(attrs, self, self.filter_section, self.depth+1)
-            else:
-                child = cparagraph(attrs, self, "", self.depth+1)
-            self.child_parsing = True
-            self.objects.append(child)
 
         if not self.parser_state:
             return
@@ -496,11 +473,11 @@ class cparagraph:
             # This shouldn't occur
             print "Warning: Unhandled bookmark content!!!"
 
-        global replace_start_list
-        for n in replace_start_list:
-            if n[0] == name:
-                self.objects.append(ctext(n[1]))
-                break
+        try:
+            global replace_element
+            self.objects.append(ctext(replace_element['start'][name]))
+        except:
+            pass
 
     def end_element(self, name):
         if name == 'paragraph':
@@ -510,11 +487,11 @@ class cparagraph:
         if self.filter_section != "" and name == 'variable':
             self.parser_state = False
 
-        global replace_end_list
-        for n in replace_end_list:
-            if n[0] == name:
-                self.objects.append(ctext(n[1]))
-                break
+        try:
+            global replace_element
+            self.objects.append(ctext(replace_element['end'][name]))
+        except:
+            pass
 
     def char_data(self, data):
         if not self.parser_state or not len(data.strip()):
@@ -522,66 +499,46 @@ class cparagraph:
         self.objects.append(ctext(data))
         #self.wikitext = self.wikitext + text
 
-    def print_all(self):
-        #if self.wikitext != "":
-        #    print self.wikitext
-        text = self.get_all()
-        if len(text):
-            print text.encode('ascii','replace')
-        return
+    def get_all(self):
+        role = self.role
+        if role == 'heading':
+            if self.depth < 6:
+                role = 'heading%d'% self.depth
+            else:
+                role = 'heading6'
+        if ( role == 'tablecontent' or role == 'tablehead' ) and not self.is_first:
+            role = 'paragraph'
 
-        for i in self.objects:
+        # prepend the markup according to the role
+        if len(self.objects) > 0:
             try:
-                raise i
-            except ctext:
-                self.wikitext = self.wikitext + i.wikitext
-            except clink:
-                self.wikitext = self.wikitext + i.get_all() + " "
+                self.wikitext = self.wikitext + replace_paragraph_role['start'][role]
             except:
-                if len(self.wikitext):
-                    print self.wikitext
-                    self.wikitext=""
-                i.print_all()
-        if len(self.wikitext):
-            print self.wikitext
-
-    def get_all(self):
-        # mark this as the heading
-        if len(self.objects) > 0 and self.heading:
-            self.wikitext = self.wikitext + heading(self.depth) + " "
+                sys.stderr.write( "Unknown paragraph role start: " + role + "\n" )
 
+        # the text itself
+        text = ""
         for i in self.objects:
+            text = text + i.get_all()
+        self.wikitext = self.wikitext + text
+
+        # set bookmark info
+        if self.role == "heading":
+            cbookmark.set_heading(text)
+
+        # append the markup according to the role
+        if len(self.objects) > 0:
             try:
-                raise i
-            except ctext:
-                self.wikitext = self.wikitext + i.wikitext
-            except clink:
-                self.wikitext = self.wikitext + i.get_all() + " "
+                self.wikitext = self.wikitext + replace_paragraph_role['end'][role]
             except:
-                if len(self.wikitext):
-                    self.wikitext = self.wikitext + "\n"
-                self.wikitext = self.wikitext + "\n" + i.get_all()
-
-        # end of the heading mark
-        if len(self.objects) > 0 and self.heading:
-            self.wikitext = self.wikitext + " " + heading(self.depth)
-            # Set bookmark info
-            head_txt = self.wikitext
-            head_txt = head_txt[head_txt.find("= ")+2:]
-            head_txt = head_txt[:head_txt.find(" =")]
-            cbookmark.set_heading(head_txt)
-
-        # write an additional \n at the end of paragraph
-        if len(self.objects) > 0:
-            self.wikitext = self.wikitext + "\n"
+                sys.stderr.write( "Unknown paragraph role end: " + role + "\n" )
 
         return self.wikitext
 
     def get_curobj(self):
         if self.child_parsing:
             return self.objects[len(self.objects)-1].get_curobj()
-        else:
-            return self
+        return self
 
 head_obj=cxml("")
 def start_element(name, attrs):
@@ -624,5 +581,5 @@ if len(sys.argv) > 2:
 
 loadallfiles("alltitles.csv")
 parsexhp(sys.argv[1])
-head_obj.print_all()
+print head_obj.get_all().encode('ascii','replace')
 cbookmark.save_bookmarks()
author	Jan Holesovsky <kendy@suse.cz>	2010-11-12 12:31:43 +0100
committer	Jan Holesovsky <kendy@suse.cz>	2010-11-12 12:31:43 +0100
commit	0e5e9fa9419bfe375db7d8c66712a1c7434bac36 (patch)
tree	d0c7a1d157940ef489fa62057242e228fdc509f1
parent	f9f3b89b2103d5d6868b022b170cb228045027c1 (diff)