From ea20fcce1dac735a9730ab6672bf60ccec595e71 Mon Sep 17 00:00:00 2001 From: Noel Grandin Date: Mon, 22 Jul 2019 16:10:32 +0200 Subject: close some more holes in structures and improve the pahole script so I can just run it once over the whole codebase Change-Id: I7e1775974a3a61f8c0e40646158f01163ace60cc Reviewed-on: https://gerrit.libreoffice.org/76122 Tested-by: Jenkins Reviewed-by: Noel Grandin --- compilerplugins/clang/pahole-all-classes.py | 156 +++++++++++++++------------- 1 file changed, 81 insertions(+), 75 deletions(-) (limited to 'compilerplugins') diff --git a/compilerplugins/clang/pahole-all-classes.py b/compilerplugins/clang/pahole-all-classes.py index ec8d20e767e7..b95b92543427 100755 --- a/compilerplugins/clang/pahole-all-classes.py +++ b/compilerplugins/clang/pahole-all-classes.py @@ -23,10 +23,10 @@ import re # search for all the class names in the file produced by the unusedfields loplugin #a = subprocess.Popen("grep 'definition:' workdir/loplugin.unusedfields.log | sort -u", stdout=subprocess.PIPE, shell=True) -a = subprocess.Popen("cat ../libo/n1", stdout=subprocess.PIPE, shell=True) +a = subprocess.Popen("cat n1", stdout=subprocess.PIPE, shell=True) -classSourceLocDict = dict() classSet = set() +classSourceLocDict = dict() with a.stdout as txt: for line in txt: tokens = line.decode('utf8').strip().split("\t") @@ -36,98 +36,104 @@ with a.stdout as txt: if "anonymous" in className: continue # ignore duplicates if className in classSet: continue - # for now, just check the stuff in /sc/inc - if srcLoc.startswith("a"): - classSourceLocDict[srcLoc] = className - classSet.add(className) + classSet.add(className) + classSourceLocDict[className] = srcLoc a.terminate() -gdbProc = subprocess.Popen("gdb", stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) - -stdin = io.TextIOWrapper(gdbProc.stdin, 'utf-8') - -# make gdb load all the debugging info -stdin.write("set confirm off\n") -for filename in sorted(os.listdir('instdir/program')): - if filename.endswith(".so"): - stdin.write("add-symbol-file instdir/program/" + filename + "\n") -stdin.flush() - - # Some of the pahole commands are going to fail, and I cannot read the error stream and the input stream # together because python has no way of (easily) doing a non-blocking read. # So I have to write the commands out using a background thread, and then read the entire resulting # stream out below. -def write_pahole_commands(): - for srcLoc in sorted(classSourceLocDict.keys()): - className = classSourceLocDict[srcLoc] - stdin.write("echo " + className + " " + srcLoc + "\n") +def write_pahole_commands(classes): + for className in classes: + stdin.write("echo " + className + " " + classSourceLocDict[className] + "\n") stdin.write("pahole " + className + "\n") stdin.flush() stdin.write("echo all-done\n") stdin.flush() stdin.close() # only way to make it flush the last echo command -_thread.start_new_thread( write_pahole_commands, () ) - # Use generator because lines often end up merged together in gdb's output, and we need # to split them up, and that creates a mess in the parsing logic. -def read_generator(): +def read_generator(gdbOutput): while True: - line = gdbProc.stdout.readline().decode('utf8').strip() + line = gdbOutput.readline().decode('utf8').strip() for split in line.split("(gdb)"): split = split.strip() if len(split) == 0: continue if "all-done" in split: return yield split -firstLineRegex = re.compile("/\*\s+(\d+)\s+\*/ struct") -fieldLineRegex = re.compile("/\*\s+(\d+)\s+(\d+)\s+\*/ ") -holeLineRegex = re.compile("/\* XXX (\d+) bit hole, try to pack \*/") -# sometimes pahole can't determine the size of a sub-struct, and then it returns bad data -bogusLineRegex = re.compile("/\*\s+\d+\s+0\s+\*/") -structLines = list() -foundHole = False -cumulativeHoleBits = 0 -structSize = 0 -foundBogusLine = False -# pahole doesn't report space at the end of the structure, so work it out myself -sizeOfFields = 0 -for line in read_generator(): - structLines.append(line) - firstLineMatch = firstLineRegex.match(line) - if firstLineMatch: - structSize = int(firstLineMatch.group(1)) - holeLineMatch = holeLineRegex.match(line) - if holeLineMatch: - foundHole = True - cumulativeHoleBits += int(holeLineMatch.group(1)) - fieldLineMatch = fieldLineRegex.match(line) - if fieldLineMatch: - fieldSize = int(fieldLineMatch.group(2)) - sizeOfFields = int(fieldLineMatch.group(1)) + fieldSize - if bogusLineRegex.match(line): - foundBogusLine = True - if line == "}": - # Ignore very large structs, packing those is not going to help much, and - # re-organising them can make them much less readable. - if foundHole and len(structLines) < 12 and structSize < 100 and not foundBogusLine: - # Verify that we have enough hole-space that removing it will result in a structure - # that still satisfies alignment requirements, otherwise the compiler will just put empty - # space at the end of the struct. - # TODO improve detection of the required alignment for a structure - potentialSpace = (cumulativeHoleBits / 8) + (sizeOfFields - structSize) - if potentialSpace >= 8: - for line in structLines: - print(line) - if (sizeOfFields - structSize) > 0: - print("hole at end of struct: " + str(sizeOfFields - structSize)) - # reset state - structLines.clear() - foundHole = False - cumulativeHoleBits = 0 - structSize = 0 - foundBogusLine = False - actualStructSize = 0 +classList = sorted(classSet) + +# Process 200 classes at a time, otherwise gdb's memory usage blows up and kills the machine +# +while len(classList) > 0: + + currClassList = classList[1:200]; + classList = classList[200:] + + gdbProc = subprocess.Popen("gdb", stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) + + stdin = io.TextIOWrapper(gdbProc.stdin, 'utf-8') + + # make gdb load all the debugging info + stdin.write("set confirm off\n") + for filename in sorted(os.listdir('instdir/program')): + if filename.endswith(".so"): + stdin.write("add-symbol-file instdir/program/" + filename + "\n") + stdin.flush() + + + _thread.start_new_thread( write_pahole_commands, (currClassList,) ) + + firstLineRegex = re.compile("/\*\s+(\d+)\s+\*/ struct") + fieldLineRegex = re.compile("/\*\s+(\d+)\s+(\d+)\s+\*/ ") + holeLineRegex = re.compile("/\* XXX (\d+) bit hole, try to pack \*/") + # sometimes pahole can't determine the size of a sub-struct, and then it returns bad data + bogusLineRegex = re.compile("/\*\s+\d+\s+0\s+\*/") + structLines = list() + foundHole = False + cumulativeHoleBits = 0 + structSize = 0 + foundBogusLine = False + # pahole doesn't report space at the end of the structure, so work it out myself + sizeOfFields = 0 + for line in read_generator(gdbProc.stdout): + structLines.append(line) + firstLineMatch = firstLineRegex.match(line) + if firstLineMatch: + structSize = int(firstLineMatch.group(1)) + holeLineMatch = holeLineRegex.match(line) + if holeLineMatch: + foundHole = True + cumulativeHoleBits += int(holeLineMatch.group(1)) + fieldLineMatch = fieldLineRegex.match(line) + if fieldLineMatch: + fieldSize = int(fieldLineMatch.group(2)) + sizeOfFields = int(fieldLineMatch.group(1)) + fieldSize + if bogusLineRegex.match(line): + foundBogusLine = True + if line == "}": + # Ignore very large structs, packing those is not going to help much, and + # re-organising them can make them much less readable. + if foundHole and len(structLines) < 12 and structSize < 100 and not foundBogusLine: + # Verify that we have enough hole-space that removing it will result in a structure + # that still satisfies alignment requirements, otherwise the compiler will just put empty + # space at the end of the struct. + # TODO improve detection of the required alignment for a structure + potentialSpace = (cumulativeHoleBits / 8) + (sizeOfFields - structSize) + if potentialSpace >= 8: + for line in structLines: + print(line) + if (sizeOfFields - structSize) > 0: + print("hole at end of struct: " + str(sizeOfFields - structSize)) + # reset state + structLines.clear() + foundHole = False + cumulativeHoleBits = 0 + structSize = 0 + foundBogusLine = False + actualStructSize = 0 -gdbProc.terminate() + gdbProc.terminate() -- cgit