From d1db994e9fd850ef4163921efde7804f2b074520 Mon Sep 17 00:00:00 2001 From: Korrawit Pruegsanusak Date: Fri, 2 Dec 2011 22:27:03 +0700 Subject: fdo#42924 don't ignore empty TableCell Element, also check if it's a header Checking whether an empty TableCell Element is a header row is now in 2 cases: * | e | h | h | h | h | ... => The empty cell should be a header This case occurs when this row is the first row in the table. * | h | p | p | e | p | ... => The empty cell should not be a header This case occurs when this row is not the first row in the table, but the first column is the row header. where | is a column seperator, e is an empty cell, h is a header cell (which has role="tablehead" attribute), and p is not a header cell. Note that parsing occurs left-to-right, so isTableHeader depends on the last TableCell Element in that row. I assume that if the last element is a header, that row should be a header row. Currently this code gives correct behaviour, but checking whether a row is the first row might be more correct. --- helpcontent2/to-wiki/wikiconv2.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'helpcontent2/to-wiki') diff --git a/helpcontent2/to-wiki/wikiconv2.py b/helpcontent2/to-wiki/wikiconv2.py index acc1b457b7..7772030aca 100755 --- a/helpcontent2/to-wiki/wikiconv2.py +++ b/helpcontent2/to-wiki/wikiconv2.py @@ -475,8 +475,10 @@ class Text: class TableCell(ElementBase): def __init__(self, attrs, parent): ElementBase.__init__(self, 'tablecell', parent) + self.cellHasChildElement = False def start_element(self, parser, name, attrs): + self.cellHasChildElement = True if name == 'bookmark': self.parse_child(Bookmark(attrs, self, 'div', parser)) elif name == 'comment': @@ -492,6 +494,18 @@ class TableCell(ElementBase): else: self.unhandled_element(parser, name) + def get_all(self): + text = '' + if not self.cellHasChildElement: # an empty element + if self.parent.isTableHeader: # get from TableRow Element + role = 'tablehead' + else: + role = 'tablecontent' + text = text + replace_paragraph_role['start'][role] + text = text + replace_paragraph_role['end'][role] + text = text + ElementBase.get_all(self) + return text + class TableRow(ElementBase): def __init__(self, attrs, parent): ElementBase.__init__(self, 'tablerow', parent) @@ -1089,6 +1103,10 @@ class TableContentParagraph(Paragraph): self.role = 'tablecontentcode' else: self.role = 'tablecontent' + if self.role == 'tablehead': + self.parent.parent.isTableHeader = True # self.parent.parent is TableRow Element + else: + self.parent.parent.isTableHeader = False class ParserBase: def __init__(self, filename, follow_embed, embedding_app, current_app, wiki_page_name, lang, head_object, buffer): -- cgit