1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
|
#!/usr/bin/python
import sys, getopt, os, pprint, ast, difflib
original_results_dir = ''
saved_results_dir = ''
def main(argv):
#read the arguments
try:
opts, args = getopt.getopt(argv,"ho:s:",["original=","saved="])
except getopt.GetoptError:
print ('compare-ooxml-analyze-results.py -o <original results> -s <saved results>')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print ('compare-ooxml-analyze-results.py -o <original results> -s <saved results>')
sys.exit()
elif opt in ("-o", "--original"):
global original_results_dir
original_results_dir = arg
elif opt in ("-s", "--saved"):
global saved_results_dir
saved_results_dir = arg
# takes result file list produced by ooxml-analyze.py tool. <filename>.result
original_result_files = get_list_of_result_files(original_results_dir)
saved_result_files = get_list_of_result_files(saved_results_dir)
compare_results(original_result_files, saved_result_files)
# takes concanated texts file list produced by ooxml-analyze.py tool. <filename>.text
original_text_files = get_list_of_text_files(original_results_dir)
saved_text_files = get_list_of_text_files(saved_results_dir)
compare_texts(original_text_files, saved_text_files)
# collects <filename>.result files
def get_list_of_result_files(directory_name):
list_of_file = os.listdir(directory_name)
all_files = list()
for filename in list_of_file:
full_path = os.path.join(directory_name, filename)
if os.path.isdir(full_path):
all_files = all_files + get_list_of_result_files(full_path)
else:
if filename.endswith(".result"):
all_files.append(full_path)
return all_files
# collecsts <filename>.text files
def get_list_of_text_files(directory_name):
list_of_file = os.listdir(directory_name)
all_files = list()
for filename in list_of_file:
full_path = os.path.join(directory_name, filename)
if os.path.isdir(full_path):
all_files = all_files + get_list_of_result_files(full_path)
else:
if filename.endswith(".text"):
all_files.append(full_path)
return all_files
# compares the elements the original results and and after saved results.
def compare_results(original_result_files, saved_result_files):
for original_filepath in original_result_files:
saved_filepath = get_corresponding_file(original_filepath)
if saved_filepath == '':
print("No result file after roundtrip for " + original_filepath)
continue
original_result_list = create_list_from_result_file(original_filepath)
saved_result_list = create_list_from_result_file(saved_filepath)
check_text_contents(original_result_list, saved_result_list, original_filepath)
def compare_texts(original_texts_file, saved_texts_file):
for original_filepath in original_texts_file:
saved_filepath = get_corresponding_file(original_filepath)
if saved_filepath == '':
print("No result text file after roundtrip for " + original_filepath)
continue
with open(original_filepath) as file_1:
original_file_text = file_1.readlines()
with open(saved_filepath) as file_2:
saved_file_text = file_2.readlines()
# if os.path.exists("./result"):
# os.remove("result")
for line in difflib.unified_diff(original_file_text, saved_file_text, fromfile=original_filepath, tofile=saved_filepath, lineterm=''):
with open("result", "a") as log_file:
print(line, file=log_file)
log_file.close()
# checks if we missed any text content after saving the file. (except a:t, We are comparing them with compare_text function)
def check_text_contents(original_result_list, saved_result_list, original_file_path):
# detect if we lost or added any text on existing texts of original version.
for line in original_result_list:
text_dict = line[3]
if not bool(text_dict): # check if text context is empty
continue
tag = list(line[0].keys())[0] #if there is a text context, find the owner tag.
if tag == "a:t": # exclude text, we are comparing them seperatly
continue
for sline in saved_result_list:
stag = list(sline[0].keys())[0]
if stag == tag: # check if saved results has same tag too.
saved_text_dict = sline[3]
if text_dict != saved_text_dict:
for key, val in text_dict.items():
if key not in saved_text_dict.keys():
print ("We lost %d \"%s\" text in %s tag in %s." % (val, key, tag, original_file_path))
elif val > saved_text_dict[key]:
print ("We lost %d \"%s\" text in %s tag in %s." % (val - saved_text_dict[key], key, tag, original_file_path))
elif val < saved_text_dict[key]:
print("We added extra %d \"%s\" text in %s tag in %s" % (saved_text_dict[key] - val, key, tag, original_file_path))
# detect if we add any new text that not existed in original version (Reverse comparision)
for line in saved_result_list:
saved_text_dict = line[3]
if not bool(saved_text_dict): # check if text context is empty
continue
tag = list(line[0].keys())[0] #if there is a text context, find the owner tag.
if tag == "a:t": # exclude text, we are comparing them seperatly
continue
for sline in original_result_list:
stag = list(sline[0].keys())[0]
if stag == tag: # check if original results has same tag too.
text_dict = sline[3]
if saved_text_dict != text_dict:
for key, val in saved_text_dict.items():
if key not in text_dict.keys():
print ("We add extra %d \"%s\" text in %s tag in %s." % (val, key, tag, original_file_path))
#reads the file context and create the result list structure from.
# eg res_list[[{},{},{},{}],[{},{},{},{}]...] ]
def create_list_from_result_file(filepath):
result_list = []
result_file = open(filepath, 'r')
for line in result_file.readlines():
tmp_list = [{}, {}, {}, {}]
i = line.find('{')
j = line.find('},')
tmp_list[0] = ast.literal_eval(line[i:j+1])
line = line[j+1:]
i = line.find('{')
j = line.find('},')
tmp_list[1] = ast.literal_eval(line[i:j+1])
line = line[j+1:]
i = line.find('{')
j = line.find('},')
tmp_list[2] = ast.literal_eval(line[i:j+1])
line = line[j+1:]
i = line.find('{')
j = line.find('}]')
tmp_list[3] = ast.literal_eval(line[i:j+1])
result_list.append(tmp_list)
return result_list
# takes the original result file and returns corresponding saved one's path
def get_corresponding_file(filepath):
i = filepath.rfind('/')
filename = filepath[i+1:]
saved_filepath = os.path.join(saved_results_dir, filename)
if(os.path.exists(saved_filepath)):
return saved_filepath
return ''
if __name__ == "__main__":
main(sys.argv[1:])
|