From 8d861bd7028a6ac3cf21ca9758d1f72eef2ac05c Mon Sep 17 00:00:00 2001 From: Noel Grandin Date: Thu, 23 Jun 2016 13:12:53 +0200 Subject: new loplugin: singlevalfields look for fields that only have a single constant value assigned to them Change-Id: Iafcd37fdb8a8119bbc00f92981a1a01badf9c5a2 --- compilerplugins/clang/constantparam.cxx | 3 +- compilerplugins/clang/singlevalfields.cxx | 411 ++++++++++++++++++++++++++++++ compilerplugins/clang/singlevalfields.py | 69 +++++ 3 files changed, 481 insertions(+), 2 deletions(-) create mode 100644 compilerplugins/clang/singlevalfields.cxx create mode 100755 compilerplugins/clang/singlevalfields.py (limited to 'compilerplugins') diff --git a/compilerplugins/clang/constantparam.cxx b/compilerplugins/clang/constantparam.cxx index 99db54554dbd..54fe00c7477e 100644 --- a/compilerplugins/clang/constantparam.cxx +++ b/compilerplugins/clang/constantparam.cxx @@ -16,8 +16,7 @@ #include "compat.hxx" /* - Find methods with default params, where the callers never specify the default param i.e. - might as well remove it. + Find params on methods where the param is only ever passed as a single constant value. The process goes something like this: $ make check diff --git a/compilerplugins/clang/singlevalfields.cxx b/compilerplugins/clang/singlevalfields.cxx new file mode 100644 index 000000000000..b1c7cc80254c --- /dev/null +++ b/compilerplugins/clang/singlevalfields.cxx @@ -0,0 +1,411 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include +#include +#include +#include +#include +#include "plugin.hxx" +#include "compat.hxx" + +/** +Look for fields that are only ever assigned a single constant value. + +We dmp a list of values assigned to fields, and a list of field definitions. +Then we will post-process the 2 lists and find the set of interesting fields. + +Be warned that it produces around 5G of log file. + +The process goes something like this: + $ make check + $ make FORCE_COMPILE_ALL=1 COMPILER_PLUGIN_TOOL='singlevalfields' check + $ ./compilerplugins/clang/singlevalfields.py + +Note that the actual process may involve a fair amount of undoing, hand editing, and general messing around +to get it to work :-) + +@TODO we don't spot fields that have been zero-initialised via calloc or rtl_allocateZeroMemory + +*/ + +namespace { + +struct MyFieldInfo +{ + std::string parentClass; + std::string fieldName; + std::string sourceLocation; +}; +bool operator < (const MyFieldInfo &lhs, const MyFieldInfo &rhs) +{ + return std::tie(lhs.parentClass, lhs.fieldName) + < std::tie(rhs.parentClass, rhs.fieldName); +} + +struct MyFieldAssignmentInfo : public MyFieldInfo +{ + std::string value; +}; + +bool operator < (const MyFieldAssignmentInfo &lhs, const MyFieldAssignmentInfo &rhs) +{ + return std::tie(lhs.parentClass, lhs.fieldName, lhs.value) + < std::tie(rhs.parentClass, rhs.fieldName, rhs.value); +} + + +// try to limit the voluminous output a little +static std::set assignedSet; +static std::set definitionSet; + + +class SingleValFields: + public RecursiveASTVisitor, public loplugin::Plugin +{ +public: + explicit SingleValFields(InstantiationData const & data): Plugin(data) {} + + virtual void run() override + { + TraverseDecl(compiler.getASTContext().getTranslationUnitDecl()); + + // dump all our output in one write call - this is to try and limit IO "crosstalk" between multiple processes + // writing to the same logfile + std::string output; + for (const MyFieldAssignmentInfo & s : assignedSet) + output += "asgn:\t" + s.parentClass + "\t" + s.fieldName + "\t" + s.value + "\n"; + for (const MyFieldInfo & s : definitionSet) + output += "defn:\t" + s.parentClass + "\t" + s.fieldName + "\t" + s.sourceLocation + "\n"; + ofstream myfile; + myfile.open( SRCDIR "/singlevalfields.log", ios::app | ios::out); + myfile << output; + myfile.close(); + } + + bool shouldVisitTemplateInstantiations () const { return true; } + + bool VisitFieldDecl( const FieldDecl* ); + bool VisitMemberExpr( const MemberExpr* ); + bool VisitCXXConstructorDecl( const CXXConstructorDecl* ); +private: + void niceName(const FieldDecl*, MyFieldInfo&); + std::string fullyQualifiedName(const FunctionDecl*); + std::string getExprValue(const Expr*); + bool isInterestingType(const QualType&); + const FunctionDecl* get_top_FunctionDecl_from_Stmt(const Stmt&); +}; + +void SingleValFields::niceName(const FieldDecl* fieldDecl, MyFieldInfo& aInfo) +{ + aInfo.parentClass = fieldDecl->getParent()->getQualifiedNameAsString(); + aInfo.fieldName = fieldDecl->getNameAsString(); + + SourceLocation expansionLoc = compiler.getSourceManager().getExpansionLoc( fieldDecl->getLocation() ); + StringRef name = compiler.getSourceManager().getFilename(expansionLoc); + aInfo.sourceLocation = std::string(name.substr(strlen(SRCDIR)+1)) + ":" + std::to_string(compiler.getSourceManager().getSpellingLineNumber(expansionLoc)); +} + +std::string SingleValFields::fullyQualifiedName(const FunctionDecl* functionDecl) +{ + std::string ret = compat::getReturnType(*functionDecl).getCanonicalType().getAsString(); + ret += " "; + if (isa(functionDecl)) { + const CXXRecordDecl* recordDecl = dyn_cast(functionDecl)->getParent(); + ret += recordDecl->getQualifiedNameAsString(); + ret += "::"; + } + ret += functionDecl->getNameAsString() + "("; + bool bFirst = true; + for (const ParmVarDecl *pParmVarDecl : functionDecl->params()) { + if (bFirst) + bFirst = false; + else + ret += ","; + ret += pParmVarDecl->getType().getCanonicalType().getAsString(); + } + ret += ")"; + if (isa(functionDecl) && dyn_cast(functionDecl)->isConst()) { + ret += " const"; + } + + return ret; +} + +bool SingleValFields::VisitFieldDecl( const FieldDecl* fieldDecl ) +{ + fieldDecl = fieldDecl->getCanonicalDecl(); + const FieldDecl* canonicalDecl = fieldDecl; + + if( ignoreLocation( fieldDecl ) || !isInterestingType(fieldDecl->getType()) ) + return true; + + MyFieldInfo aInfo; + niceName(canonicalDecl, aInfo); + definitionSet.insert(aInfo); + return true; +} + +bool SingleValFields::VisitCXXConstructorDecl( const CXXConstructorDecl* decl ) +{ + if( ignoreLocation( decl ) ) + return true; + + // doesn't count as a write to fields because it's self->self + if (decl->isCopyOrMoveConstructor()) + return true; + + for(auto it = decl->init_begin(); it != decl->init_end(); ++it) + { + const CXXCtorInitializer* init = *it; + const FieldDecl* fieldDecl = init->getMember(); + if( !fieldDecl || !isInterestingType(fieldDecl->getType()) ) + continue; + MyFieldAssignmentInfo aInfo; + niceName(fieldDecl, aInfo); + aInfo.value = getExprValue(init->getInit()); + assignedSet.insert(aInfo); + } + return true; +} + +const Decl* get_DeclContext_from_Stmt(ASTContext& context, const Stmt& stmt) +{ + auto it = context.getParents(stmt).begin(); + + if (it == context.getParents(stmt).end()) + return nullptr; + + const Decl *aDecl = it->get(); + if (aDecl) + return aDecl; + + const Stmt *aStmt = it->get(); + if (aStmt) + return get_DeclContext_from_Stmt(context, *aStmt); + + return nullptr; +} + +const FunctionDecl* SingleValFields::get_top_FunctionDecl_from_Stmt(const Stmt& stmt) +{ + const Decl *decl = get_DeclContext_from_Stmt(compiler.getASTContext(), stmt); + if (decl) + return static_cast(decl->getNonClosureContext()); + + return nullptr; +} + + +bool SingleValFields::VisitMemberExpr( const MemberExpr* memberExpr ) +{ + const ValueDecl* decl = memberExpr->getMemberDecl(); + const FieldDecl* fieldDecl = dyn_cast(decl); + if (!fieldDecl) { + return true; + } + + if (ignoreLocation(memberExpr) || !isInterestingType(fieldDecl->getType())) + return true; + + const CXXMethodDecl* methodDecl = dyn_cast_or_null(get_top_FunctionDecl_from_Stmt(*memberExpr)); + if (methodDecl && (methodDecl->isCopyAssignmentOperator() || methodDecl->isMoveAssignmentOperator() + || dyn_cast(methodDecl))) + return true; + + // walk up the tree until we find something interesting + const Stmt* child = memberExpr; + const Stmt* parent = parentStmt(memberExpr); + bool bPotentiallyAssignedTo = false; + bool bDump = false; + std::string assignValue; + do { + // check for field being accessed by a reference variable e.g. Foo& f = m.foo; + auto parentsList = compiler.getASTContext().getParents(*child); + auto it = parentsList.begin(); + if (it != parentsList.end()) { + const VarDecl *varDecl = it->get(); + if (varDecl) { + QualType qt = varDecl->getType().getDesugaredType(compiler.getASTContext()); + if (!qt.isConstQualified() && qt->isReferenceType()) { + assignValue = "?"; + bPotentiallyAssignedTo = true; + break; + } + } + } + + if (!parent) { + return true; + } + if (isa(parent) || isa(parent) || isa(parent) || isa(parent) + || isa(parent)) + { + child = parent; + parent = parentStmt(parent); + } + else if (isa(parent)) + { + const UnaryOperator* unaryOperator = dyn_cast(parent); + int x = unaryOperator->getOpcode(); + if (x == UO_AddrOf || x == UO_PostInc || x == UO_PostDec || x == UO_PreInc || x == UO_PreDec) { + assignValue = "?"; + bPotentiallyAssignedTo = true; + break; + } + child = parent; + parent = parentStmt(parent); + } + else if (isa(parent)) + { + const CallExpr* callExpr = dyn_cast(parent); + if (callExpr->getCallee() == child) { + break; + } + const FunctionDecl* functionDecl; + if (isa(callExpr)) { + functionDecl = dyn_cast(callExpr)->getMethodDecl(); + } + else { + functionDecl = callExpr->getDirectCallee(); + } + if (!functionDecl) { + break; + } + bool bFound = false; + for (unsigned i = 0; i < callExpr->getNumArgs(); ++i) { + if (i >= functionDecl->getNumParams()) // can happen in template code + break; + if (callExpr->getArg(i) == child) { + const ParmVarDecl* parmVarDecl = functionDecl->getParamDecl(i); + QualType qt = parmVarDecl->getType().getDesugaredType(compiler.getASTContext()); + if (!qt.isConstQualified() && qt->isReferenceType()) { + assignValue = "?"; + bPotentiallyAssignedTo = true; + } + bFound = true; + break; + } + } + break; + } + else if (isa(parent)) + { + const CXXConstructExpr* consExpr = dyn_cast(parent); + const CXXConstructorDecl* consDecl = consExpr->getConstructor(); + bool bFound = false; + for (unsigned i = 0; i < consExpr->getNumArgs(); ++i) { + if (i >= consDecl->getNumParams()) // can happen in template code + break; + if (consExpr->getArg(i) == child) { + const ParmVarDecl* parmVarDecl = consDecl->getParamDecl(i); + QualType qt = parmVarDecl->getType().getDesugaredType(compiler.getASTContext()); + if (!qt.isConstQualified() && qt->isReferenceType()) { + assignValue = "?"; + bPotentiallyAssignedTo = true; + } + bFound = true; + break; + } + } + break; + } + else if (isa(parent)) + { + const BinaryOperator* binaryOp = dyn_cast(parent); + if ( binaryOp->getLHS() != child ) { + // do nothing + } + else if ( binaryOp->getOpcode() == BO_Assign ) { + assignValue = getExprValue(binaryOp->getRHS()); + bPotentiallyAssignedTo = true; + } else { + assignValue = "?"; + bPotentiallyAssignedTo = true; + } + break; + } + else if ( isa(parent) + || isa(parent) || isa(parent) || isa(parent) + || isa(parent) || isa(parent) + || isa(parent) + || isa(parent) + || isa(parent) + || isa(parent) + || isa(parent) + || isa(parent) + || isa(parent) + || isa(parent) + || isa(parent) + || isa(parent) + || isa(parent) + || isa(parent) + || isa(parent) //??? + || isa(parent) + || isa(parent) + ) + { + break; + } + else { + bPotentiallyAssignedTo = true; + bDump = true; + break; + } + } while (true); + if (bDump) + { + report( + DiagnosticsEngine::Warning, + "oh dear, what can the matter be?", + memberExpr->getLocStart()) + << memberExpr->getSourceRange(); + parent->dump(); + } + if (bPotentiallyAssignedTo) + { + MyFieldAssignmentInfo aInfo; + niceName(fieldDecl, aInfo); + aInfo.value = assignValue; + assignedSet.insert(aInfo); + } + + return true; +} + +bool SingleValFields::isInterestingType(const QualType& qt) { + return qt.isCXX11PODType(compiler.getASTContext()); +} + +std::string SingleValFields::getExprValue(const Expr* arg) +{ + if (!arg) + return "?"; + arg = arg->IgnoreParenCasts(); +// arg->dump(); + // workaround bug in clang + if (isa(arg)) + return "?"; + // ignore this, it seems to trigger an infinite recursion + if (isa(arg)) { + return "?"; + } + APSInt x1; + if (arg->EvaluateAsInt(x1, compiler.getASTContext())) + { + return x1.toString(10); + } + return "?"; +} + +loplugin::Plugin::Registration< SingleValFields > X("singlevalfields", false); + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/compilerplugins/clang/singlevalfields.py b/compilerplugins/clang/singlevalfields.py new file mode 100755 index 000000000000..523ffa5520f2 --- /dev/null +++ b/compilerplugins/clang/singlevalfields.py @@ -0,0 +1,69 @@ +#!/usr/bin/python + +import sys +import re +import io + +definitionToSourceLocationMap = dict() # dict of tuple(parentClass, fieldName) to sourceLocation +fieldAssignDict = dict() # dict of tuple(parentClass, fieldName) to (set of values) + +# clang does not always use exactly the same numbers in the type-parameter vars it generates +# so I need to substitute them to ensure we can match correctly. +normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+") +def normalizeTypeParams( line ): + return normalizeTypeParamsRegex.sub("type-parameter-?-?", line) + +# reading as binary (since we known it is pure ascii) is much faster than reading as unicode +with io.open("singlevalfields.log", "rb", buffering=1024*1024) as txt: + for line in txt: + if line.startswith("defn:\t"): + idx1 = line.find("\t") + idx2 = line.find("\t",idx1+1) + idx3 = line.find("\t",idx2+1) + parentClass = normalizeTypeParams(line[idx1+1:idx2]) + fieldName = normalizeTypeParams(line[idx2+1:idx3]) + sourceLocation = line[idx3+1:].strip() + fieldInfo = (parentClass, fieldName) + definitionToSourceLocationMap[fieldInfo] = sourceLocation + elif line.startswith("asgn:\t"): + idx1 = line.find("\t") + idx2 = line.find("\t",idx1+1) + idx3 = line.find("\t",idx2+1) + parentClass = normalizeTypeParams(line[idx1+1:idx2]) + fieldName = normalizeTypeParams(line[idx2+1:idx3]) + assignValue = line[idx3+1:].strip() + fieldInfo = (parentClass, fieldName) + if not fieldInfo in fieldAssignDict: + fieldAssignDict[fieldInfo] = set() + fieldAssignDict[fieldInfo].add(assignValue) + +tmp1list = list() +for fieldInfo, assignValues in fieldAssignDict.iteritems(): + if len(assignValues) != 1: + continue + if "?" in assignValues: + continue + # if it contains anything other than this set, ignore it + if len(assignValues - set(["0", "1", "-1", "nullptr"])) > 0: + continue + v0 = fieldInfo[0] + " " + fieldInfo[1] + v1 = (",".join(assignValues)) + v2 = "" + if fieldInfo in definitionToSourceLocationMap: + v2 = definitionToSourceLocationMap[fieldInfo] + tmp1list.append((v0,v1,v2)) + +# sort results by filename:lineno +def natural_sort_key(s, _nsre=re.compile('([0-9]+)')): + return [int(text) if text.isdigit() else text.lower() + for text in re.split(_nsre, s)] +tmp1list.sort(key=lambda v: natural_sort_key(v[2])) + +# print out the results +with open("loplugin.singlevalfields", "wt") as f: + for v in tmp1list: + f.write(v[2] + "\n") + f.write(" " + v[0] + "\n") + f.write(" " + v[1] + "\n") + + -- cgit