summaryrefslogtreecommitdiff
path: root/helpcontent2/to-wiki/getalltitles.py
blob: 97e2649ee8f30a654c414b9d2cb5e5aec8055522 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python

import sys
import os
import xml.parsers.expat

title=""
parsing=True
istitle=False
alltitles=[]

def is_present(title):
    for i in alltitles:
        try:
            if i.strip() == title.strip():
                return True
        except:
            return False
    return False

def make_unique(title):
    n=0
    t = title
    while is_present(t):
        n=n+1
        t = title+"_%d"%(n)
    return t

replace_text_list = [
        ["$[officename]","LibreOffice"],
        ["%PRODUCTNAME","LibreOffice"],
        ['"+"',"plus"],
        ['"*"',"star"],
        ['"-"',"minus"],
        ['"/"',"slash"],
        ['"^"',"cap"],
        [')','_'],
        ['(','_'],
        ['\\','_'],
        ['/','_']
        ]

modules_list = [
        ["sbasic","Basic"],
        ["scalc","Calc"],
        ["schart","Chart"],
        ["sdraw","Draw"],
        ["shared","All"],
        ["simpress","Impress"],
        ["smath","Math"],
        ["swriter","Writer"]
        ]

def get_module(text):
    for i in modules_list:
        if text.find(i[0]) >=0:
            return i[1]
    return ""

def replace_text(text):
    for i in replace_text_list:
        if text.find(i[0]) >= 0:
            text = text.replace(i[0],i[1])
    return text

def start_element(name, attrs):
    global parsing, istitle
    if not parsing:
        return
    if name == 'title':
        istitle=True

def end_element(name):
    global parsing, istitle
    if not parsing:
        return
    if name == 'title':
        parsing = False
        istitle = False

def char_data(data):
    global title, parsing
    if not istitle:
        return
    title = replace_text(data)

def parsexhp(filename):
    global parsing, title
    parsing = True
    file=open(filename,"r")
    p = xml.parsers.expat.ParserCreate()
    p.StartElementHandler = start_element
    p.EndElementHandler = end_element
    p.CharacterDataHandler = char_data
    buf = file.read()
    p.Parse(buf)
    file.close()
    if len(title):
        title=get_module(filename)+"/"+title
        title = title.replace(" ","_")
        title = make_unique(title)
        alltitles.append(title)
        print filename+";"+title
    title=""

if len(sys.argv) < 2:
    print "getalltitles.py <directory>"
    print "e.g. getalltitles.py helcontent2/source/text/scalc"
    sys.exit(1)

pattern = "xhp"

for root, dirs, files in os.walk(sys.argv[1]):
    for i in files:
        if i.find(pattern) >= 0:
            parsexhp(root+"/"+i)