diff options
author | Gülşah Köse <gulsah.kose@collabora.com> | 2021-06-08 08:50:37 +0300 |
---|---|---|
committer | Gülşah Köse <gulsah.kose@collabora.com> | 2021-06-08 08:50:37 +0300 |
commit | 4bf4b28b2d6316a34d0966d1109b76162aa0b336 (patch) | |
tree | ceae35e7e05ff81e10490ab230f85594348d5cc1 | |
parent | 7c03a4c092d9cba10ecb22e7f97aaca851259f1f (diff) |
Compare texts
Change-Id: I0aee62ffa79dbf716bb9ec2af2bc06078e1eb566
-rw-r--r-- | bin/compare-ooxml-analyze-results.py | 87 |
1 files changed, 68 insertions, 19 deletions
diff --git a/bin/compare-ooxml-analyze-results.py b/bin/compare-ooxml-analyze-results.py index d0d89fc30a31..fdd4fa8a0243 100644 --- a/bin/compare-ooxml-analyze-results.py +++ b/bin/compare-ooxml-analyze-results.py @@ -1,6 +1,6 @@ #!/usr/bin/python -import sys, getopt, os, pprint, ast +import sys, getopt, os, pprint, ast, difflib original_results_dir = '' saved_results_dir = '' @@ -24,13 +24,18 @@ def main(argv): global saved_results_dir saved_results_dir = arg - # takes file list produced by ooxml-analyze.py tool. - original_result_files = get_list_of_files(original_results_dir) - saved_result_files = get_list_of_files(saved_results_dir) + # takes result file list produced by ooxml-analyze.py tool. <filename>.result + original_result_files = get_list_of_result_files(original_results_dir) + saved_result_files = get_list_of_result_files(saved_results_dir) + compare_results(original_result_files, saved_result_files) - compare(original_result_files, saved_result_files) + # takes concanated texts file list produced by ooxml-analyze.py tool. <filename>.text + original_text_files = get_list_of_text_files(original_results_dir) + saved_text_files = get_list_of_text_files(saved_results_dir) + compare_texts(original_text_files, saved_text_files) -def get_list_of_files(directory_name): +# collects <filename>.result files +def get_list_of_result_files(directory_name): list_of_file = os.listdir(directory_name) all_files = list() @@ -38,16 +43,32 @@ def get_list_of_files(directory_name): for filename in list_of_file: full_path = os.path.join(directory_name, filename) if os.path.isdir(full_path): - all_files = all_files + get_list_of_files(full_path) + all_files = all_files + get_list_of_result_files(full_path) else: - all_files.append(full_path) + if filename.endswith(".result"): + all_files.append(full_path) + + return all_files + +# collecsts <filename>.text files +def get_list_of_text_files(directory_name): + + list_of_file = os.listdir(directory_name) + all_files = list() + + for filename in list_of_file: + full_path = os.path.join(directory_name, filename) + if os.path.isdir(full_path): + all_files = all_files + get_list_of_result_files(full_path) + else: + if filename.endswith(".text"): + all_files.append(full_path) return all_files # compares the elements the original results and and after saved results. -def compare(original_result_files, saved_result_files): - ind = 1 +def compare_results(original_result_files, saved_result_files): for original_filepath in original_result_files: saved_filepath = get_corresponding_file(original_filepath) if saved_filepath == '': @@ -57,11 +78,31 @@ def compare(original_result_files, saved_result_files): original_result_list = create_list_from_result_file(original_filepath) saved_result_list = create_list_from_result_file(saved_filepath) - check_text_contents(original_result_list, saved_result_list) + check_text_contents(original_result_list, saved_result_list, original_filepath) + +def compare_texts(original_texts_file, saved_texts_file): + for original_filepath in original_texts_file: + saved_filepath = get_corresponding_file(original_filepath) + if saved_filepath == '': + print("No result text file after roundtrip for " + original_filepath) + continue + + with open(original_filepath) as file_1: + original_file_text = file_1.readlines() + + with open(saved_filepath) as file_2: + saved_file_text = file_2.readlines() + + # if os.path.exists("./result"): + # os.remove("result") + for line in difflib.unified_diff(original_file_text, saved_file_text, fromfile=original_filepath, tofile=saved_filepath, lineterm=''): + with open("result", "a") as log_file: + print(line, file=log_file) + log_file.close() -# checks if we missed any text content after saving the file. -def check_text_contents(original_result_list, saved_result_list): +# checks if we missed any text content after saving the file. (except a:t, We are comparing them with compare_text function) +def check_text_contents(original_result_list, saved_result_list, original_file_path): # detect if we lost or added any text on existing texts of original version. for line in original_result_list: @@ -69,6 +110,10 @@ def check_text_contents(original_result_list, saved_result_list): if not bool(text_dict): # check if text context is empty continue tag = list(line[0].keys())[0] #if there is a text context, find the owner tag. + + if tag == "a:t": # exclude text, we are comparing them seperatly + continue + for sline in saved_result_list: stag = list(sline[0].keys())[0] if stag == tag: # check if saved results has same tag too. @@ -76,18 +121,22 @@ def check_text_contents(original_result_list, saved_result_list): if text_dict != saved_text_dict: for key, val in text_dict.items(): if key not in saved_text_dict.keys(): - print ("We lost %d \"%s\" text in %s tag." % (val, key, tag)) + print ("We lost %d \"%s\" text in %s tag in %s." % (val, key, tag, original_file_path)) elif val > saved_text_dict[key]: - print ("We lost %d \"%s\" text in %s tag." % (val - saved_text_dict[key], key, tag)) + print ("We lost %d \"%s\" text in %s tag in %s." % (val - saved_text_dict[key], key, tag, original_file_path)) elif val < saved_text_dict[key]: - print("We added extra %d \"%s\" text in %s tag" % (saved_text_dict[key] - val, key, tag)) + print("We added extra %d \"%s\" text in %s tag in %s" % (saved_text_dict[key] - val, key, tag, original_file_path)) - # detect if we add any new text that not existed in original version + # detect if we add any new text that not existed in original version (Reverse comparision) for line in saved_result_list: saved_text_dict = line[3] if not bool(saved_text_dict): # check if text context is empty continue tag = list(line[0].keys())[0] #if there is a text context, find the owner tag. + + if tag == "a:t": # exclude text, we are comparing them seperatly + continue + for sline in original_result_list: stag = list(sline[0].keys())[0] if stag == tag: # check if original results has same tag too. @@ -95,9 +144,9 @@ def check_text_contents(original_result_list, saved_result_list): if saved_text_dict != text_dict: for key, val in saved_text_dict.items(): if key not in text_dict.keys(): - print ("We add extra %d \"%s\" text in %s tag." % (val, key, tag)) + print ("We add extra %d \"%s\" text in %s tag in %s." % (val, key, tag, original_file_path)) -#reads the file context and create the result list structer from. +#reads the file context and create the result list structure from. # eg res_list[[{},{},{},{}],[{},{},{},{}]...] ] def create_list_from_result_file(filepath): result_list = [] |