diff options
Diffstat (limited to 'wiki-to-help/metabook_translated.py')
-rw-r--r-- | wiki-to-help/metabook_translated.py | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/wiki-to-help/metabook_translated.py b/wiki-to-help/metabook_translated.py new file mode 100644 index 0000000000..a7f90dbcd1 --- /dev/null +++ b/wiki-to-help/metabook_translated.py @@ -0,0 +1,118 @@ +import metabook +import re, os + +class ArticleTranslated(metabook.Article): + lang = "en" # default language code + trans = "" # translated title + + def __init__(self,attributes): + title = attributes["title"] + parts = title.split("/") + #if len(parts) < 2: + # self.include = False + # return + if len(parts) == 1: + # title = "blabla" + self.title = title + if len(parts) == 2: + # title = "Category/englishTitle" + self.title = parts[1] + if len(parts) == 3: + # title = "Category/englishTitle/langCode" + self.lang = parts[2] + self.title = parts[1] + + comment = attributes["comment"] + if '{Lang|' in comment: + # Language-tag exists + r = re.search("\{Lang\|([^\}]*)\}",comment) + trans = r.group(1) + self.trans = trans + else: + self.trans = self.title + + attr = {} + attr["title"] = attributes["title"] + attr["displaytitle"] = self.trans + attr["lang"] = self.lang + self.attributes = attr + +class MetabookTranslated(metabook.Metabook): + """ + This metabook contains all articles with translated titles. + This concrete metabook expects article titles in this form: + Category/Title/lang + Comments include this: + {{Lang|translatedTitle}} + """ + ArticleClass=ArticleTranslated + artTags = ["title","comment"] + +class LanguageSeparator(object): + """ + A translated metabook is a metabook where all titles are in the destination + language. + This class splits a translated metabook into many books with homogenous languages. + """ + books={} # Dict<Str lang, Metabook> + sortedItems={} # Dict<Str lang, List<TranslatedArticle>> + items=[] # List<TranslatedArticle> + + def __init__(self, book): + self.book = book + self.items = book.items + + def splitItemsByLanguage(self): + """ + Sort the articles in self.items by language and put them to self.sortedItems + """ + sortedItems={} + for item in self.items: + if item.lang in sortedItems.keys(): + sortedItems[item.lang].append(item) + else: + sortedItems[item.lang] = [item] + self.sortedItems = sortedItems + #return sortedItems + + def createBooksByLanguage(self): + """ + Generate metabooks to self.books. + Create a metabook for each language from self.sortedItems. + """ + for lang, items in self.sortedItems.iteritems(): + m = self.book.getClone() + m.items = items + m.lang = lang + self.books[lang] = m + + @staticmethod + def fromFileToFiles(jsonStructFile,xmldump,output): + """ + Creates a Metabook from a file and writes it to one file per language. + Short cut Function. This loads a metabook template file, creates the + metabook content from @xmldump and writes the book to @output. + @jsonStructFile String path to Metabook template + @xmldump String path + @output String path to output directory + @return Dict<String lang, String output> + """ + m = MetabookTranslated() + with open(jsonStructFile,"r") as f: + m.loadTemplate(f) + + m.loadArticles(xmldump) + ls = LanguageSeparator(m) + ls.splitItemsByLanguage() + ls.createBooksByLanguage() + + pathlist={} + + for lang, book in ls.books.iteritems(): + book.createBook() + dest = os.path.join(output,"metabook_%s.json" % lang) + pathlist[lang] = dest + with open(dest,"w") as f: + book.write(f) + return pathlist + |