From ba8241b6b1ddd1db8f597152b440c947a8702cd6 Mon Sep 17 00:00:00 2001 From: Dávid Vastag Date: Wed, 6 Feb 2013 19:10:25 +0100 Subject: More error detecting capability has been added to the help-to-wiki tool Change-Id: I0581e808fe9fd53c4a5b9c77139d71f006c4b507 --- to-wiki/wikiconv2.py | 49 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) (limited to 'to-wiki/wikiconv2.py') diff --git a/to-wiki/wikiconv2.py b/to-wiki/wikiconv2.py index 37bf0f5dbf..0e3bb5bf97 100755 --- a/to-wiki/wikiconv2.py +++ b/to-wiki/wikiconv2.py @@ -7,7 +7,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. # -import os, sys, thread, threading, time, re +import os, sys, thread, threading, time, re, copy import xml.parsers.expat import codecs from threading import Thread @@ -298,6 +298,9 @@ def href_to_fname_id(href): return [fname, id] +# Exception classes +class UnhandledItemType(Exception): + pass # Base class for all the elements # # self.name - name of the element, to drop the self.child_parsing flag @@ -548,7 +551,7 @@ class TableCell(ElementBase): if parser.follow_embed: self.embed_href(parser, fname, id) elif name == 'paragraph': - parser.parse_localized_paragraph(TableContentParagraph(attrs, self), attrs, self) + parser.parse_localized_paragraph(TableContentParagraph, attrs, self) elif name == 'section': self.parse_child(Section(attrs, self)) elif name == 'bascode': @@ -589,7 +592,7 @@ class BasicCode(ElementBase): def start_element(self, parser, name, attrs): if name == 'paragraph': - parser.parse_localized_paragraph(BasicCodeParagraph(attrs, self), attrs, self) + parser.parse_localized_paragraph(BasicCodeParagraph, attrs, self) else: self.unhandled_element(parser, name) @@ -628,7 +631,7 @@ class ListItem(ElementBase): if parser.follow_embed: self.embed_href(parser, fname, id) elif name == 'paragraph': - parser.parse_localized_paragraph(ListItemParagraph(attrs, self), attrs, self) + parser.parse_localized_paragraph(ListItemParagraph, attrs, self) elif name == 'list': self.parse_child(List(attrs, self)) else: @@ -1018,8 +1021,12 @@ class Item(ElementBase): text + \ self.replace_type['end'][self.type] except: - sys.stderr.write('Unhandled item type "%s".\n'% self.type) - + try: + sys.stderr.write('Unhandled item type "%s".\n'% self.type) + except: + sys.stderr.write('Unhandled item type. Possibly type has been localized.\n') + finally: + raise UnhandledItemType return replace_text(self.text) @@ -1110,7 +1117,10 @@ class Paragraph(ElementBase): role = 'tablenextpara' # the text itself - children = ElementBase.get_all(self) + try: + children = ElementBase.get_all(self) + except UnhandledItemType: + raise UnhandledItemType('Paragraph id: '+str(self.id)) if self.role != 'emph' and self.role != 'bascode' and self.role != 'logocode': children = children.strip() @@ -1244,23 +1254,30 @@ class ParserBase: def get_variable(self, id): return self.head_obj.get_variable(id) - def parse_localized_paragraph(self, paragraph, attrs, obj): + def parse_localized_paragraph(self, Paragraph_type, attrs, obj): localized_text = '' try: localized_text = get_localized_text(self.filename, attrs['id']) except: pass + paragraph = Paragraph_type(attrs, obj) if localized_text != '': # parse the localized text text = u'' + localized_text + '' - ParserBase(self.filename, self.follow_embed, self.embedding_app, \ - self.current_app, self.wiki_page_name, self.lang, \ - paragraph, text.encode('utf-8')) - # add it to the overall structure - obj.objects.append(paragraph) - # and ignore the original text - obj.parse_child(Ignore(attrs, obj, 'paragraph')) + try: + ParserBase(self.filename, self.follow_embed, self.embedding_app, \ + self.current_app, self.wiki_page_name, self.lang, \ + paragraph, text.encode('utf-8')) + except xml.parsers.expat.ExpatError: + sys.stderr.write( 'Invalid XML in translated text. Using the original text. Error location:\n'\ + + 'Curren xhp: ' + self.filename + '\nParagraph id: ' + attrs['id'] + '\n') + obj.parse_child(Paragraph_type(attrs, obj)) # new paragraph must be created because "paragraph" is corrupted by "ParserBase" + else: + # add it to the overall structure + obj.objects.append(paragraph) + # and ignore the original text + obj.parse_child(Ignore(attrs, obj, 'paragraph')) else: obj.parse_child(paragraph) @@ -1277,7 +1294,7 @@ class ParserBase: if ignore_this: obj.parse_child(Ignore(attrs, obj, 'paragraph')) else: - self.parse_localized_paragraph(Paragraph(attrs, obj), attrs, obj) + self.parse_localized_paragraph(Paragraph, attrs, obj) class XhpParser(ParserBase): def __init__(self, filename, follow_embed, embedding_app, wiki_page_name, lang): -- cgit