From 22326cb4f5cbe230ef1e35d9162eefec2d8d1ada Mon Sep 17 00:00:00 2001 From: Noel Grandin Date: Fri, 3 Feb 2017 11:28:17 +0200 Subject: new loplugin unusedenumvalues Change-Id: I03d684fc35238a45a6d99855e5ee6d3a5e33740d --- compilerplugins/clang/unusedenumconstants.cxx | 198 ++++++++++++++++++++++++++ compilerplugins/clang/unusedenumconstants.py | 158 ++++++++++++++++++++ 2 files changed, 356 insertions(+) create mode 100644 compilerplugins/clang/unusedenumconstants.cxx create mode 100755 compilerplugins/clang/unusedenumconstants.py (limited to 'compilerplugins') diff --git a/compilerplugins/clang/unusedenumconstants.cxx b/compilerplugins/clang/unusedenumconstants.cxx new file mode 100644 index 000000000000..c26589493f23 --- /dev/null +++ b/compilerplugins/clang/unusedenumconstants.cxx @@ -0,0 +1,198 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include +#include +#include +#include +#include +#include "plugin.hxx" +#include "compat.hxx" + +/** +This looks for unused enum constants + +We search for 3 things +(a) constants that are declared but never used +(b) constants only used in a "read" fashion i.e. we compare stuff against them, but we never store a value anywhere +(c) constants only used in a "write" fashion i.e. we store a value, but never check for that value + +(a) is fairly reliable but (b) and (c) will need some checking before acting on. + +Be warned that it produces around 5G of log file. + +The process goes something like this: + $ make check + $ make FORCE_COMPILE_ALL=1 COMPILER_PLUGIN_TOOL='unusedenumconstants' check + $ ./compilerplugins/clang/unusedenumconstants.py + +Note that the actual process may involve a fair amount of undoing, hand editing, and general messing around +to get it to work :-) + +*/ + +namespace { + +struct MyFieldInfo +{ + std::string parentClass; + std::string fieldName; + std::string sourceLocation; +}; +bool operator < (const MyFieldInfo &lhs, const MyFieldInfo &rhs) +{ + return std::tie(lhs.parentClass, lhs.fieldName) + < std::tie(rhs.parentClass, rhs.fieldName); +} + + +// try to limit the voluminous output a little +static std::set definitionSet; +static std::set writeSet; +static std::set readSet; + + +class UnusedEnumConstants: + public RecursiveASTVisitor, public loplugin::Plugin +{ +public: + explicit UnusedEnumConstants(InstantiationData const & data): Plugin(data) {} + + virtual void run() override + { + TraverseDecl(compiler.getASTContext().getTranslationUnitDecl()); + + // dump all our output in one write call - this is to try and limit IO "crosstalk" between multiple processes + // writing to the same logfile + std::string output; + for (const MyFieldInfo & s : definitionSet) + output += "definition:\t" + s.parentClass + "\t" + s.fieldName + "\t" + s.sourceLocation + "\n"; + for (const MyFieldInfo & s : writeSet) + output += "write:\t" + s.parentClass + "\t" + s.fieldName + "\n"; + for (const MyFieldInfo & s : readSet) + output += "read:\t" + s.parentClass + "\t" + s.fieldName + "\n"; + ofstream myfile; + myfile.open( SRCDIR "/loplugin.unusedenumconstants.log", ios::app | ios::out); + myfile << output; + myfile.close(); + } + + bool shouldVisitTemplateInstantiations () const { return true; } + bool shouldVisitImplicitCode() const { return true; } + + bool VisitEnumConstantDecl( const EnumConstantDecl * ); + bool VisitDeclRefExpr( const DeclRefExpr * ); +private: + MyFieldInfo niceName(const EnumConstantDecl*); +}; + +MyFieldInfo UnusedEnumConstants::niceName(const EnumConstantDecl* enumConstantDecl) +{ + MyFieldInfo aInfo; + + aInfo.parentClass = enumConstantDecl->getType().getAsString(); + aInfo.fieldName = enumConstantDecl->getNameAsString(); + + SourceLocation expansionLoc = compiler.getSourceManager().getExpansionLoc( enumConstantDecl->getLocation() ); + StringRef name = compiler.getSourceManager().getFilename(expansionLoc); + aInfo.sourceLocation = std::string(name.substr(strlen(SRCDIR)+1)) + ":" + std::to_string(compiler.getSourceManager().getSpellingLineNumber(expansionLoc)); + normalizeDotDotInFilePath(aInfo.sourceLocation); + + return aInfo; +} + +bool UnusedEnumConstants::VisitEnumConstantDecl( const EnumConstantDecl* enumConstantDecl ) +{ + enumConstantDecl = enumConstantDecl->getCanonicalDecl(); + if (ignoreLocation( enumConstantDecl )) { + return true; + } + // ignore stuff that forms part of the stable URE interface + if (isInUnoIncludeFile(compiler.getSourceManager().getSpellingLoc(enumConstantDecl->getLocation()))) { + return true; + } + + definitionSet.insert(niceName(enumConstantDecl)); + return true; +} + +bool UnusedEnumConstants::VisitDeclRefExpr( const DeclRefExpr* declRefExpr ) +{ + auto enumConstantDecl = dyn_cast(declRefExpr->getDecl()); + if (!enumConstantDecl) { + return true; + } + enumConstantDecl = enumConstantDecl->getCanonicalDecl(); + if (ignoreLocation(enumConstantDecl)) { + return true; + } + // ignore stuff that forms part of the stable URE interface + if (isInUnoIncludeFile(compiler.getSourceManager().getSpellingLoc(enumConstantDecl->getLocation()))) { + return true; + } + + const Stmt * parent = declRefExpr; +try_again: + parent = parentStmt(parent); + bool bWrite = false; + bool bRead = false; + bool bDump = false; + if (!parent) + { + // Could probably do better here. + // Sometimes this is a constructor-initialiser-expression, so just make a pessimistic assumption. + bWrite = true; + } else if (isa(parent) || isa(parent) || isa(parent) + || isa(parent) || isa(parent) + || isa(parent) + || isa(parent)) + { + bWrite = true; + } else if (isa(parent) || isa(parent)) + { + bRead = true; + } else if (const BinaryOperator * binaryOp = dyn_cast(parent)) + { + if (BinaryOperator::isAssignmentOp(binaryOp->getOpcode())) { + bWrite = true; + } else { + bRead = true; + } + } else if (isa(parent) || isa(parent) + || isa(parent) || isa(parent) + || isa(parent) + || isa(parent)) + { + goto try_again; + } else { + bDump = true; + } + + // to let me know if I missed something + if (bDump) { + parent->dump(); + report( DiagnosticsEngine::Warning, + "unhandled clang AST node type", + parent->getLocStart()); + } + + if (bWrite) { + writeSet.insert(niceName(enumConstantDecl)); + } + if (bRead) { + readSet.insert(niceName(enumConstantDecl)); + } + return true; +} + +loplugin::Plugin::Registration< UnusedEnumConstants > X("unusedenumconstants", true); + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/compilerplugins/clang/unusedenumconstants.py b/compilerplugins/clang/unusedenumconstants.py new file mode 100755 index 000000000000..5a9fe362edb2 --- /dev/null +++ b/compilerplugins/clang/unusedenumconstants.py @@ -0,0 +1,158 @@ +#!/usr/bin/python + +import sys +import re +import io + +definitionSet = set() +definitionToSourceLocationMap = dict() +readSet = set() +writeSet = set() +sourceLocationSet = set() + +# clang does not always use exactly the same numbers in the type-parameter vars it generates +# so I need to substitute them to ensure we can match correctly. +normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+") +def normalizeTypeParams( line ): + return normalizeTypeParamsRegex.sub("type-parameter-?-?", line) + +def parseFieldInfo( tokens ): + if len(tokens) == 3: + return (normalizeTypeParams(tokens[1]), tokens[2]) + else: + return (normalizeTypeParams(tokens[1]), "") + +# The parsing here is designed to avoid grabbing stuff which is mixed in from gbuild. +# I have not yet found a way of suppressing the gbuild output. +with io.open("loplugin.unusedenumconstants.log", "rb", buffering=1024*1024) as txt: + for line in txt: + tokens = line.strip().split("\t") + if tokens[0] == "definition:": + fieldInfo = (normalizeTypeParams(tokens[1]), tokens[2]) + srcLoc = tokens[3] + # ignore external source code + if (srcLoc.startswith("external/")): + continue + # ignore build folder + if (srcLoc.startswith("workdir/")): + continue + definitionSet.add(fieldInfo) + definitionToSourceLocationMap[fieldInfo] = srcLoc + elif tokens[0] == "read:": + readSet.add(parseFieldInfo(tokens)) + elif tokens[0] == "write:": + writeSet.add(parseFieldInfo(tokens)) + else: + print( "unknown line: " + line) + +# Invert the definitionToSourceLocationMap +# If we see more than one method at the same sourceLocation, it's being autogenerated as part of a template +# and we should just ignore +sourceLocationToDefinitionMap = {} +for k, v in definitionToSourceLocationMap.iteritems(): + sourceLocationToDefinitionMap[v] = sourceLocationToDefinitionMap.get(v, []) + sourceLocationToDefinitionMap[v].append(k) +for k, definitions in sourceLocationToDefinitionMap.iteritems(): + if len(definitions) > 1: + for d in definitions: + definitionSet.remove(d) + +def startswith_one_of( srcLoc, fileSet ): + for f in fileSet: + if srcLoc.startswith(f): + return True; + return False; + +untouchedSet = set() +for d in definitionSet: + if d in readSet or d in writeSet: + continue + srcLoc = definitionToSourceLocationMap[d]; + if startswith_one_of(srcLoc, + [ + # this is all representations of on-disk or external data structures + "basic/source/inc/filefmt.hxx", + "basic/source/sbx/sbxscan.cxx", + "cppcanvas/source/mtfrenderer/emfpbrush.hxx", + "filter/source/graphicfilter/ipcd/ipcd.cxx", + "filter/source/t602/t602filter.hxx", + "include/filter/msfilter/escherex.hxx", + "include/filter/msfilter/svdfppt.hxx", + "hwpfilter/", + "include/registry/types.hxx", + "lotuswordpro/", + "include/sot/formats.hxx", + "include/svx/msdffdef.hxx", + "sc/source/filter/inc/xlconst.hxx", + "include/unotools/saveopt.hxx", + "sw/inc/fldbas.hxx", + "sw/source/filter/inc/wwstyles.hxx", + "sw/source/filter/ww8/fields.hxx", + "vcl/source/fontsubset/cff.cxx", + # unit test code + "cppu/source/uno/check.cxx", + # general weird nonsense going on + "framework/inc/helper/mischelper.hxx" + "include/sfx2/shell.hxx", + # Windows or OSX only + "include/canvas/rendering/icolorbuffer.hxx", + "include/vcl/commandevent.hxx", + # must match some other enum + "include/editeng/bulletitem.hxx", + "include/editeng/svxenum.hxx", + "include/formula/opcode.hxx", + "include/i18nutil/paper.hxx", + "include/oox/drawingml/shapepropertymap.hxx", + "include/svl/nfkeytab.hx", + "include/svl/zforlist.hxx", + # represents constants from an external API + "opencl/inc/opencl_device_selection.h", + "vcl/inc/sft.hxx", + "vcl/inc/unx/XIM.h", + "vcl/unx/gtk/xid_fullscreen_on_all_monitors.c", + ]): + continue + + if d[1] == "UNKNOWN" or d[1].endswith("NONE") or d[1].endswith("None") or d[1].endswith("EQUAL_SIZE"): continue + + untouchedSet.add((d[0] + " " + d[1], srcLoc)) + +writeonlySet = set() +for d in writeSet: + if d in readSet: + continue + srcLoc = definitionToSourceLocationMap[d]; + writeonlySet.add((d[0] + " " + d[1], srcLoc)) + +readonlySet = set() +for d in readSet: + if d in writeSet: + continue + srcLoc = definitionToSourceLocationMap[d]; + readonlySet.add((d[0] + " " + d[1], srcLoc)) + +# sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely +def natural_sort_key(s, _nsre=re.compile('([0-9]+)')): + return [int(text) if text.isdigit() else text.lower() + for text in re.split(_nsre, s)] + +# sort results by name and line number +tmp1list = sorted(untouchedSet, key=lambda v: natural_sort_key(v[1])) +tmp2list = sorted(writeonlySet, key=lambda v: natural_sort_key(v[1])) +tmp3list = sorted(readonlySet, key=lambda v: natural_sort_key(v[1])) + +# print out the results +with open("loplugin.unusedenumconstants.report-untouched", "wt") as f: + for t in tmp1list: + f.write( t[1] + "\n" ) + f.write( " " + t[0] + "\n" ) +with open("loplugin.unusedenumconstants.report-writeonly", "wt") as f: + for t in tmp2list: + f.write( t[1] + "\n" ) + f.write( " " + t[0] + "\n" ) +with open("loplugin.unusedenumconstants.report-readonly", "wt") as f: + for t in tmp3list: + f.write( t[1] + "\n" ) + f.write( " " + t[0] + "\n" ) + + -- cgit