From 480de8e23d4fc39e33646eb11987a00c311ee28d Mon Sep 17 00:00:00 2001 From: Markus Mohrhard Date: Wed, 14 Jan 2015 12:56:44 +0100 Subject: add script to extract tooltips Change-Id: I4c79b944edf246b80a8dd5ea54c3651e3909f54b --- bin/extract-tooltip.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100755 bin/extract-tooltip.py (limited to 'bin') diff --git a/bin/extract-tooltip.py b/bin/extract-tooltip.py new file mode 100755 index 000000000000..5397c718ff2b --- /dev/null +++ b/bin/extract-tooltip.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python +import sys +import os +import re +import urlparse + +def usage(): + message = """ usage: {program} inDir outDir +inDir: directory containing .ht files +outDir: target for the new files""" + print(message.format(program = os.path.basename(sys.argv[0]))) + +def parseFile(filename): + file = open(filename, "r") + data = file.readlines() + data = [line.rstrip('\n') for line in data] + + pairs = {} + regEx = re.compile("^(\S+)\s(\S+)\s(\S+)\s((?:\s*\S*)+)$") + old_line = None + for line in data: + if len(line) > 0: + if(old_line != None): + print filename + #print("failed to parse line") + #print(old_line) + line = old_line + line + print line + old_line = None + split_line = regEx.split(line) + #print(split_line) + #print(urlparse.unquote(split_line[2])) + #print(split_line[4]) + if(old_line == None and split_line[4] == "" and split_line[3] != "0"): + print(line) + print(split_line) + old_line = line + else: + pairs[urlparse.unquote(split_line[2])] = split_line[4] + assert(len(split_line) == 6) + #print data + #print(pairs) + return pairs + +def parseFiles(dir): + strings = [] + for files in os.listdir(dir): + if files.endswith(".ht"): + string = parseFile(os.path.join(dir,files)) + print(files) + #print string + strings.append([files, string]) + return strings + +def extractSharedEntries(strings): + first_dict = strings[0][1] + shared_dict = {} + #print(first_dict) + for key, value in first_dict.iteritems(): + # check that the entry in the same in all dics + is_in_all_dicts = True + for dict_file_pair in strings: + dict = dict_file_pair[1] + if not dict.has_key(key): + is_in_all_dicts = False + elif not dict[key] == value: + print("Element with different values") + print(key) + is_in_all_dicts = False + if is_in_all_dicts: + shared_dict[key] = value + #print(shared_dict) + for dict_file_pair in strings: + for key in shared_dict.iterkeys(): + dict_file_pair[1].pop(key) + + strings.append(["shared.ht", shared_dict]) + return strings + +def writeOutFiles(dir, strings): + for string in strings: + file_name_base = string[0] + file_name_base = file_name_base.replace(".ht", ".properties") + file_name = os.path.join(dir, file_name_base) + file = open(file_name, "w") + for key, value in string[1].iteritems(): + try: + file.write(key) + file.write("=") + file.write(value) + file.write("\n") + except UnicodeDecodeError: + print key + print value + file.close() + +def main (args): + if(len(args) != 3): + usage() + sys.exit(1) + + strings = parseFiles(args[1]) + new_strings = extractSharedEntries(strings) + writeOutFiles(args[2], new_strings) + +if __name__ == "__main__": + main(sys.argv) -- cgit