#!/usr/bin/python import sys import re import io definitionSet = set() definitionToSourceLocationMap = dict() readSet = set() writeSet = set() sourceLocationSet = set() # clang does not always use exactly the same numbers in the type-parameter vars it generates # so I need to substitute them to ensure we can match correctly. normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+") def normalizeTypeParams( line ): return normalizeTypeParamsRegex.sub("type-parameter-?-?", line) def parseFieldInfo( tokens ): if len(tokens) == 3: return (normalizeTypeParams(tokens[1]), tokens[2]) else: return (normalizeTypeParams(tokens[1]), "") # The parsing here is designed to avoid grabbing stuff which is mixed in from gbuild. # I have not yet found a way of suppressing the gbuild output. with io.open("loplugin.unusedenumconstants.log", "rb", buffering=1024*1024) as txt: for line in txt: tokens = line.strip().split("\t") if tokens[0] == "definition:": fieldInfo = (normalizeTypeParams(tokens[1]), tokens[2]) srcLoc = tokens[3] # ignore external source code if (srcLoc.startswith("external/")): continue # ignore build folder if (srcLoc.startswith("workdir/")): continue definitionSet.add(fieldInfo) definitionToSourceLocationMap[fieldInfo] = srcLoc elif tokens[0] == "read:": readSet.add(parseFieldInfo(tokens)) elif tokens[0] == "write:": writeSet.add(parseFieldInfo(tokens)) else: print( "unknown line: " + line) # Invert the definitionToSourceLocationMap # If we see more than one method at the same sourceLocation, it's being autogenerated as part of a template # and we should just ignore sourceLocationToDefinitionMap = {} for k, v in definitionToSourceLocationMap.iteritems(): sourceLocationToDefinitionMap[v] = sourceLocationToDefinitionMap.get(v, []) sourceLocationToDefinitionMap[v].append(k) for k, definitions in sourceLocationToDefinitionMap.iteritems(): if len(definitions) > 1: for d in definitions: definitionSet.remove(d) def startswith_one_of( srcLoc, fileSet ): for f in fileSet: if srcLoc.startswith(f): return True; return False; untouchedSet = set() for d in definitionSet: if d in readSet or d in writeSet: continue srcLoc = definitionToSourceLocationMap[d]; if startswith_one_of(srcLoc, [ # this is all representations of on-disk or external data structures "basic/source/inc/filefmt.hxx", "basic/source/sbx/sbxscan.cxx", "cppcanvas/source/mtfrenderer/emfpbrush.hxx", "filter/source/graphicfilter/ipcd/ipcd.cxx", "filter/source/t602/t602filter.hxx", "include/filter/msfilter/escherex.hxx", "include/filter/msfilter/svdfppt.hxx", "hwpfilter/", "include/registry/types.hxx", "lotuswordpro/", "include/sot/formats.hxx", "include/svx/msdffdef.hxx", "sc/source/filter/inc/xlconst.hxx", "include/unotools/saveopt.hxx", "sw/inc/fldbas.hxx", "sw/source/filter/inc/wwstyles.hxx", "sw/source/filter/ww8/fields.hxx", "vcl/source/fontsubset/cff.cxx", "include/vcl/settings.hxx", # stored in a setting, can't remove it without potentially triggering UBSAN # unit test code "cppu/source/uno/check.cxx", # general weird nonsense going on "framework/inc/helper/mischelper.hxx" "include/sfx2/shell.hxx", # Windows or OSX only "include/canvas/rendering/icolorbuffer.hxx", "include/vcl/commandevent.hxx", "vcl/inc/unx/gendata.hxx", # must match some other enum "include/editeng/bulletitem.hxx", "include/editeng/svxenum.hxx", "include/formula/opcode.hxx", "include/i18nutil/paper.hxx", "include/oox/drawingml/shapepropertymap.hxx", "include/svl/nfkeytab.hx", "include/svl/zforlist.hxx", # represents constants from an external API "opencl/inc/opencl_device_selection.h", "vcl/inc/sft.hxx", "vcl/inc/unx/XIM.h", "vcl/unx/gtk/xid_fullscreen_on_all_monitors.c", ]): continue if d[1] == "UNKNOWN" or d[1].endswith("NONE") or d[1].endswith("None") or d[1].endswith("EQUAL_SIZE"): continue untouchedSet.add((d[0] + " " + d[1], srcLoc)) writeonlySet = set() for d in writeSet: if d in readSet: continue srcLoc = definitionToSourceLocationMap[d]; writeonlySet.add((d[0] + " " + d[1], srcLoc)) readonlySet = set() for d in readSet: if d in writeSet: continue srcLoc = definitionToSourceLocationMap[d]; readonlySet.add((d[0] + " " + d[1], srcLoc)) # sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely def natural_sort_key(s, _nsre=re.compile('([0-9]+)')): return [int(text) if text.isdigit() else text.lower() for text in re.split(_nsre, s)] # sort results by name and line number tmp1list = sorted(untouchedSet, key=lambda v: natural_sort_key(v[1])) tmp2list = sorted(writeonlySet, key=lambda v: natural_sort_key(v[1])) tmp3list = sorted(readonlySet, key=lambda v: natural_sort_key(v[1])) # print out the results with open("loplugin.unusedenumconstants.report-untouched", "wt") as f: for t in tmp1list: f.write( t[1] + "\n" ) f.write( " " + t[0] + "\n" ) with open("loplugin.unusedenumconstants.report-writeonly", "wt") as f: for t in tmp2list: f.write( t[1] + "\n" ) f.write( " " + t[0] + "\n" ) with open("loplugin.unusedenumconstants.report-readonly", "wt") as f: for t in tmp3list: f.write( t[1] + "\n" ) f.write( " " + t[0] + "\n" )