1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
import metabook
import re, os
class ArticleTranslated(metabook.Article):
lang = "en" # default language code
trans = "" # translated title
def __init__(self,attributes):
title = attributes["title"]
parts = title.split("/")
#if len(parts) < 2:
# self.include = False
# return
if len(parts) == 1:
# title = "blabla"
self.title = title
if len(parts) == 2:
# title = "Category/englishTitle"
self.title = parts[1]
if len(parts) == 3:
# title = "Category/englishTitle/langCode"
self.lang = parts[2]
self.title = parts[1]
comment = attributes["comment"]
if '{Lang|' in comment:
# Language-tag exists
r = re.search("\{Lang\|([^\}]*)\}",comment)
trans = r.group(1)
self.trans = trans
else:
self.trans = self.title
attr = {}
attr["title"] = attributes["title"]
attr["displaytitle"] = self.trans
attr["lang"] = self.lang
self.attributes = attr
class MetabookTranslated(metabook.Metabook):
"""
This metabook contains all articles with translated titles.
This concrete metabook expects article titles in this form:
Category/Title/lang
Comments include this:
{{Lang|translatedTitle}}
"""
ArticleClass=ArticleTranslated
artTags = ["title","comment"]
class LanguageSeparator(object):
"""
A translated metabook is a metabook where all titles are in the destination
language.
This class splits a translated metabook into many books with homogenous languages.
"""
books={} # Dict<Str lang, Metabook>
sortedItems={} # Dict<Str lang, List<TranslatedArticle>>
items=[] # List<TranslatedArticle>
def __init__(self, book):
self.book = book
self.items = book.items
def splitItemsByLanguage(self):
"""
Sort the articles in self.items by language and put them to self.sortedItems
"""
sortedItems={}
for item in self.items:
if item.lang in sortedItems.keys():
sortedItems[item.lang].append(item)
else:
sortedItems[item.lang] = [item]
self.sortedItems = sortedItems
#return sortedItems
def createBooksByLanguage(self):
"""
Generate metabooks to self.books.
Create a metabook for each language from self.sortedItems.
"""
for lang, items in self.sortedItems.iteritems():
m = self.book.getClone()
m.items = items
m.lang = lang
self.books[lang] = m
@staticmethod
def fromFileToFiles(jsonStructFile,xmldump,output):
"""
Creates a Metabook from a file and writes it to one file per language.
Short cut Function. This loads a metabook template file, creates the
metabook content from @xmldump and writes the book to @output.
@jsonStructFile String path to Metabook template
@xmldump String path
@output String path to output directory
@return Dict<String lang, String output>
"""
m = MetabookTranslated()
with open(jsonStructFile,"r") as f:
m.loadTemplate(f)
m.loadArticles(xmldump)
ls = LanguageSeparator(m)
ls.splitItemsByLanguage()
ls.createBooksByLanguage()
pathlist={}
for lang, book in ls.books.iteritems():
book.createBook()
dest = os.path.join(output,"metabook_%s.json" % lang)
pathlist[lang] = dest
with open(dest,"w") as f:
book.write(f)
return pathlist
|