summaryrefslogtreecommitdiff
path: root/wiki-to-help/metabook.py
blob: c0ce855bf749902e0297ab6a88aa985f98b53f1c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import json
import xml.dom.minidom as minidom

class Article(object):
    itemTag = {"content_type":"text/x-wiki","type":"article"}
    #itemTag = {"content_type":"text/x-wiki","type":"article","wikiident":"lo","url":"http://asdlkf/","source-url":"http://sourceurl/","source":"http://source/"}
    attributes = {}
    include = True #""" True if this article should be included in the metabook """
    
    def __init__(self,attributes):
        self.attributes = attributes

    def getInclude(self):
        """ @return True if this article should be included in the metabook """
        return self.include

    def toDict(self):
        #if not self.include: return None
        article = self.itemTag.copy()
        article.update(self.attributes) # merge dicts
        return article

class Metabook(object):
    """ 
    I am your metabook and wish you a pleasant evening. 
    Sequence of usage:
        m = Metabook()
        m.loadTemplate(...)
        m.loadArticles(xml input)
        m.createBook()
        m.write(output)
    If template, in- and output are files, use fromFileToFile()
    """
    ArticleClass = Article # final
    artTags = ["title"] # final

    m = {} # Dict metabook
    template = None
    items = []
    #source = "" # String input file, xmldump
    #dest = "" # FileObject destination of json metabook

    def getClone(self):
        m = Metabook()
        m.template = self.template # No copy() necessary here
        m.ArticleClass = self.ArticleClass
        m.artTags = self.artTags
        #m.m = self.m.copy()
        #m.dest = self.dest
        return m

    def getArtTags(self,filename,tagnames):
        """ 
        Get Article Tags
        Reads all specified tags from an xml file and returns a list of all tags.
        @filename XML-file
        @tagnames List of String Tagnames
        @return List of Dict<String Tagname, String Value>
        """
        dom=minidom.parse(filename)
        out = []

        elements=dom.getElementsByTagName("page")
        
        for element in elements:
            tagdict = {}
            for tagname in tagnames:
                tags = element.getElementsByTagName(tagname)
                if len(tags) > 0:
                    tagdict[tagname] = self.getText(tags[0])
                else:
                    tagdict[tagname] = ""
            out.append(tagdict)
        return out

    def getText(self,element):
        """
        @element xml Node
        @return String content
        """
        return element.childNodes[0].data

    def load_data(self,filename):
        """ Unserialize data from jsonfile """
        with open(filename, "r") as infile:
            outdict = json.load(infile)
        return outdict

    def loadTemplate(self,jsonStruct):
        """ 
        Loads an existing json file at the beginning 
        @jsonStruct File object
        """
        self.template = json.load(jsonStruct)
        #self.m = self.load_data(source)

    def loadArticles(self,source):
        """
        Loads the articles and saves them as objects to self.items
        """
        pages = self.getArtTags(source,self.artTags)
        self.items = [self.ArticleClass(page) for page in pages]
        """return
        items=[]
        for page in pages:
            item = self.ArticleClass(page)
            if item.getInclude():
                items.append(item.toDict())
        self.m["items"] = items
        """

    def createBook(self):
        """
        Convert all article objects to dicts and merge them with the template.
        The result is saved to self.m
        """
        if self.template is None:
            self.m = []
        else:
            self.m = self.template.copy()
        self.m["items"] = []
        for item in self.items:
            if item.getInclude():
                self.m["items"].append(item.toDict())

    def __call__(self,source):
        """
        Creates a metabook for @source and writes it to self.m. To continue, 
            use write()
        @source xml-dump
        """
        self.loadArticles(source)
        self.createBook()

    def write(self,dest):
        json.dump(self.m,dest)
        
    def fromFileToFile(jsonStructFile,xmldump,output): 
        """
        Creates a Metabook from a file and writes it to a file.
        Short cut Function. This loads a metabook template file, creates the 
            metabook content from @xmldump and writes the book to @output.
        @jsonStructFile String path to Metabook template
        @xmldump String path
        @output String path
        """
        #m = MetabookTranslated()
        with open(jsonStructFile,"r") as f:
            self.loadTemplate(f)
        self.__call__(xmldump)
        with open(output,"w") as f:
            self.write(f)