diff options
author | Noel Grandin <noel.grandin@collabora.co.uk> | 2016-10-17 15:21:00 +0200 |
---|---|---|
committer | Noel Grandin <noel.grandin@collabora.co.uk> | 2016-10-18 08:51:07 +0200 |
commit | a18d92bca8ff50a34cacdd978467dc2c61f16f31 (patch) | |
tree | 32898b1d696f117ed515a8a006b0c3d7d14c316b /compilerplugins | |
parent | 296f8a579181a4ad0555e2e5eaa84db3ec2fafd1 (diff) |
new loplugin inlineablemethods
look for methods that are:
(*) non-virtual
(*) only called once
(*) only called from inside their own class
(*) small i.e. < 40 characters
which are candidates for just having their code inlined
Change-Id: I0e9e8125d140282cdcdd2a77374059b17b2fcd7d
Diffstat (limited to 'compilerplugins')
-rw-r--r-- | compilerplugins/clang/inlineablemethods.cxx | 332 | ||||
-rwxr-xr-x | compilerplugins/clang/inlineablemethods.py | 148 |
2 files changed, 480 insertions, 0 deletions
diff --git a/compilerplugins/clang/inlineablemethods.cxx b/compilerplugins/clang/inlineablemethods.cxx new file mode 100644 index 000000000000..3dd58d352c45 --- /dev/null +++ b/compilerplugins/clang/inlineablemethods.cxx @@ -0,0 +1,332 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <cassert> +#include <string> +#include <iostream> +#include <fstream> +#include <set> +#include <unordered_map> + + +#include "clang/AST/Attr.h" + +#include "plugin.hxx" +#include "compat.hxx" + +/** + Methods that are only called from inside their own class, and are only called from one spot + + + TODO if a method has only one call-site, and that call site is inside a constructor + then it's probably worth inlining, since it's probably an old method that was intended to be shared amongst + multiple constructors +*/ + +namespace { + +struct MyFuncInfo +{ + std::string access; + std::string returnType; + std::string nameAndParams; + std::string sourceLocation; + +}; +bool operator < (const MyFuncInfo &lhs, const MyFuncInfo &rhs) +{ + return std::tie(lhs.returnType, lhs.nameAndParams) + < std::tie(rhs.returnType, rhs.nameAndParams); +} + +// try to limit the voluminous output a little + +static std::unordered_map<std::string, MyFuncInfo> calledFromMap; +static std::set<MyFuncInfo> definitionSet; +static std::set<MyFuncInfo> calledFromOutsideSet; +static std::set<MyFuncInfo> largeFunctionSet; +static std::set<MyFuncInfo> addressOfSet; + + +class InlineableMethods: + public RecursiveASTVisitor<InlineableMethods>, public loplugin::Plugin +{ +public: + explicit InlineableMethods(InstantiationData const & data): Plugin(data) {} + + virtual void run() override + { + TraverseDecl(compiler.getASTContext().getTranslationUnitDecl()); + + // dump all our output in one write call - this is to try and limit IO "crosstalk" between multiple processes + // writing to the same logfile + + std::string output; + for (const MyFuncInfo & s : definitionSet) + output += "definition:\t" + s.access + "\t" + s.returnType + "\t" + s.nameAndParams + "\t" + s.sourceLocation + "\n"; + for (const MyFuncInfo & s : calledFromOutsideSet) + output += "outside:\t" + s.returnType + "\t" + s.nameAndParams + "\n"; + for (const std::pair<std::string,MyFuncInfo> & s : calledFromMap) + output += "calledFrom:\t" + s.first + + "\t" + s.second.returnType + "\t" + s.second.nameAndParams + "\n"; + for (const MyFuncInfo & s : largeFunctionSet) + output += "large:\t" + s.returnType + "\t" + s.nameAndParams + "\n"; + for (const MyFuncInfo & s : addressOfSet) + output += "addrof:\t" + s.returnType + "\t" + s.nameAndParams + "\n"; + ofstream myfile; + myfile.open( SRCDIR "/loplugin.inlineablemethods.log", ios::app | ios::out); + myfile << output; + myfile.close(); + } + + bool shouldVisitTemplateInstantiations () const { return true; } + bool shouldVisitImplicitCode() const { return true; } + + bool VisitFunctionDecl( const FunctionDecl* ); + bool VisitDeclRefExpr( const DeclRefExpr* ); + bool VisitMemberExpr( const MemberExpr* ); + // interception methods for FunctionDecl and all its subclasses + bool TraverseFunctionDecl( FunctionDecl* ); + bool TraverseCXXMethodDecl( CXXMethodDecl* ); + bool TraverseCXXConstructorDecl( CXXConstructorDecl* ); + bool TraverseCXXConversionDecl( CXXConversionDecl* ); + bool TraverseCXXDestructorDecl( CXXDestructorDecl* ); + +private: + MyFuncInfo niceName(const FunctionDecl* functionDecl); + std::string toString(SourceLocation loc); + void functionTouchedFromExpr( const FunctionDecl* calleeFunctionDecl, const Expr* expr ); + bool isCalleeFunctionInteresting( const FunctionDecl* ); + void logCalledFrom(SourceLocation calleeSite, const FunctionDecl* functionDecl); + + // I use traverse and a member variable because I cannot find a reliable way of walking back up the AST tree using the parentStmt() stuff + // TODO doesn't cope with nested functions + const FunctionDecl* mpTraversingFunction = nullptr; +}; + +MyFuncInfo InlineableMethods::niceName(const FunctionDecl* functionDecl) +{ + if (functionDecl->getInstantiatedFromMemberFunction()) + functionDecl = functionDecl->getInstantiatedFromMemberFunction(); + else if (functionDecl->getClassScopeSpecializationPattern()) + functionDecl = functionDecl->getClassScopeSpecializationPattern(); +// workaround clang-3.5 issue +#if CLANG_VERSION >= 30600 + else if (functionDecl->getTemplateInstantiationPattern()) + functionDecl = functionDecl->getTemplateInstantiationPattern(); +#endif + + MyFuncInfo aInfo; + switch (functionDecl->getAccess()) + { + case AS_public: aInfo.access = "public"; break; + case AS_private: aInfo.access = "private"; break; + case AS_protected: aInfo.access = "protected"; break; + default: aInfo.access = "unknown"; break; + } + if (!isa<CXXConstructorDecl>(functionDecl)) { + aInfo.returnType = compat::getReturnType(*functionDecl).getCanonicalType().getAsString(); + } else { + aInfo.returnType = ""; + } + + if (isa<CXXMethodDecl>(functionDecl)) { + const CXXRecordDecl* recordDecl = dyn_cast<CXXMethodDecl>(functionDecl)->getParent(); + aInfo.nameAndParams += recordDecl->getQualifiedNameAsString(); + aInfo.nameAndParams += "::"; + } + aInfo.nameAndParams += functionDecl->getNameAsString() + "("; + bool bFirst = true; + for (const ParmVarDecl *pParmVarDecl : compat::parameters(*functionDecl)) { + if (bFirst) + bFirst = false; + else + aInfo.nameAndParams += ","; + aInfo.nameAndParams += pParmVarDecl->getType().getCanonicalType().getAsString(); + } + aInfo.nameAndParams += ")"; + if (isa<CXXMethodDecl>(functionDecl) && dyn_cast<CXXMethodDecl>(functionDecl)->isConst()) { + aInfo.nameAndParams += " const"; + } + + aInfo.sourceLocation = toString( functionDecl->getLocation() ); + + return aInfo; +} + +std::string InlineableMethods::toString(SourceLocation loc) +{ + SourceLocation expansionLoc = compiler.getSourceManager().getExpansionLoc( loc ); + StringRef name = compiler.getSourceManager().getFilename(expansionLoc); + std::string sourceLocation = std::string(name.substr(strlen(SRCDIR)+1)) + ":" + std::to_string(compiler.getSourceManager().getSpellingLineNumber(expansionLoc)); + normalizeDotDotInFilePath(sourceLocation); + return sourceLocation; +} + +bool InlineableMethods::VisitFunctionDecl( const FunctionDecl* functionDecl ) +{ + const FunctionDecl* canonicalFunctionDecl = functionDecl->getCanonicalDecl(); + if (!isCalleeFunctionInteresting(canonicalFunctionDecl)) { + return true; + } + definitionSet.insert(niceName(canonicalFunctionDecl)); + + if (functionDecl->doesThisDeclarationHaveABody()) { + bool bLargeFunction = false; + if (const CompoundStmt* compoundStmt = dyn_cast<CompoundStmt>(functionDecl->getBody())) { + if (compoundStmt->size() > 1) { + bLargeFunction = true; + } + if (!bLargeFunction) { + auto s1 = compiler.getSourceManager().getCharacterData(compoundStmt->getLBracLoc()); + auto s2 = compiler.getSourceManager().getCharacterData(compoundStmt->getRBracLoc()); + bLargeFunction = (s2 - s1) > 40; + // any function that uses a parameter more than once + if (!bLargeFunction) { + StringRef bodyText(s1, s2-s1); + for (const ParmVarDecl* param : compat::parameters(*functionDecl)) { + StringRef name = param->getName(); + if (name.empty()) + continue; + size_t idx = bodyText.find(name); + if (idx != StringRef::npos && bodyText.find(name, idx+1) != StringRef::npos) { + bLargeFunction = true; + break; + } + } + } + } + } + if (bLargeFunction) { + largeFunctionSet.insert(niceName(canonicalFunctionDecl)); + } + } + return true; +} + +bool InlineableMethods::TraverseFunctionDecl( FunctionDecl* p ) +{ + mpTraversingFunction = p; + bool ret = RecursiveASTVisitor::TraverseFunctionDecl(p); + mpTraversingFunction = nullptr; + return ret; +} +bool InlineableMethods::TraverseCXXMethodDecl( CXXMethodDecl* p ) +{ + mpTraversingFunction = p; + bool ret = RecursiveASTVisitor::TraverseCXXMethodDecl(p); + mpTraversingFunction = nullptr; + return ret; +} +bool InlineableMethods::TraverseCXXConstructorDecl( CXXConstructorDecl* p ) +{ + mpTraversingFunction = p; + bool ret = RecursiveASTVisitor::TraverseCXXConstructorDecl(p); + mpTraversingFunction = nullptr; + return ret; +} +bool InlineableMethods::TraverseCXXConversionDecl( CXXConversionDecl* p ) +{ + mpTraversingFunction = p; + bool ret = RecursiveASTVisitor::TraverseCXXConversionDecl(p); + mpTraversingFunction = nullptr; + return ret; +} +bool InlineableMethods::TraverseCXXDestructorDecl( CXXDestructorDecl* p ) +{ + mpTraversingFunction = p; + bool ret = RecursiveASTVisitor::TraverseCXXDestructorDecl(p); + mpTraversingFunction = nullptr; + return ret; +} + +bool InlineableMethods::VisitMemberExpr( const MemberExpr* memberExpr ) +{ + const FunctionDecl* functionDecl = dyn_cast<FunctionDecl>(memberExpr->getMemberDecl()); + if (functionDecl) { + functionTouchedFromExpr(functionDecl, memberExpr); + } + return true; +} + +bool InlineableMethods::VisitDeclRefExpr( const DeclRefExpr* declRefExpr ) +{ + const FunctionDecl* functionDecl = dyn_cast<FunctionDecl>(declRefExpr->getDecl()); + if (functionDecl) { + functionTouchedFromExpr(functionDecl, declRefExpr); + } + return true; +} + +void InlineableMethods::functionTouchedFromExpr( const FunctionDecl* calleeFunctionDecl, const Expr* expr ) +{ + if (!mpTraversingFunction) { + return; + } + const FunctionDecl* canonicalFunctionDecl = calleeFunctionDecl->getCanonicalDecl(); + if (!isCalleeFunctionInteresting(canonicalFunctionDecl)) { + return; + } + + logCalledFrom(expr->getLocStart(), canonicalFunctionDecl); + + if (const UnaryOperator* unaryOp = dyn_cast_or_null<UnaryOperator>(parentStmt(expr))) { + if (unaryOp->getOpcode() == UO_AddrOf) { + addressOfSet.insert(niceName(canonicalFunctionDecl)); + } + } + + const CXXMethodDecl* calleeMethodDecl = dyn_cast<CXXMethodDecl>(calleeFunctionDecl); + const CXXMethodDecl* callsiteParentMethodDecl = dyn_cast<CXXMethodDecl>(mpTraversingFunction); + if (!callsiteParentMethodDecl + || calleeMethodDecl->getParent() != callsiteParentMethodDecl->getParent()) + { + calledFromOutsideSet.insert(niceName(canonicalFunctionDecl)); + } +} + +void InlineableMethods::logCalledFrom(SourceLocation calleeLoc, const FunctionDecl* functionDecl) +{ + functionDecl = functionDecl->getCanonicalDecl(); + while (functionDecl->getTemplateInstantiationPattern()) + functionDecl = functionDecl->getTemplateInstantiationPattern(); + calledFromMap.emplace(toString(calleeLoc), niceName(functionDecl)); +} + +bool InlineableMethods::isCalleeFunctionInteresting(const FunctionDecl* functionDecl) +{ + // ignore stuff that forms part of the stable URE interface + if (isInUnoIncludeFile(compiler.getSourceManager().getSpellingLoc( + functionDecl->getNameInfo().getLoc()))) { + return false; + } + if (isa<CXXDestructorDecl>(functionDecl)) { + return false; + } + if (functionDecl->isDeleted() || functionDecl->isDefaulted()) { + return false; + } + if (isa<CXXConstructorDecl>(functionDecl) && dyn_cast<CXXConstructorDecl>(functionDecl)->isCopyConstructor()) { + return false; + } + if (!functionDecl->getLocation().isValid() || ignoreLocation(functionDecl)) { + return false; + } + const CXXMethodDecl* methodDecl = dyn_cast<CXXMethodDecl>(functionDecl); + if (!methodDecl || methodDecl->isVirtual()) { + return false; + } + return true; +} + +loplugin::Plugin::Registration< InlineableMethods > X("inlineablemethods", true); + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/compilerplugins/clang/inlineablemethods.py b/compilerplugins/clang/inlineablemethods.py new file mode 100755 index 000000000000..a9b75472b3a9 --- /dev/null +++ b/compilerplugins/clang/inlineablemethods.py @@ -0,0 +1,148 @@ +#!/usr/bin/python + +import sys +import re +import io + +# -------------------------------------------------------------------------------------------- +# globals +# -------------------------------------------------------------------------------------------- + +definitionSet = set() # set of tuple(return_type, name_and_params) +definitionToSourceLocationMap = dict() +calledFromDict = dict() +calledFromOutsideSet = set() +largeFunctionSet = set() # set of tuple(return_type, name_and_params) +addressOfSet = set() # set of tuple(return_type, name_and_params) + +# clang does not always use exactly the same numbers in the type-parameter vars it generates +# so I need to substitute them to ensure we can match correctly. +normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+") +def normalizeTypeParams( line ): + return normalizeTypeParamsRegex.sub("type-parameter-?-?", line) + +# -------------------------------------------------------------------------------------------- +# primary input loop +# -------------------------------------------------------------------------------------------- + +# The parsing here is designed to avoid grabbing stuff which is mixed in from gbuild. +# I have not yet found a way of suppressing the gbuild output. +with io.open("loplugin.inlineablemethods.log", "rb", buffering=1024*1024) as txt: + for line in txt: + tokens = line.strip().split("\t") + if tokens[0] == "definition:": + access = tokens[1] + returnType = tokens[2] + nameAndParams = tokens[3] + sourceLocation = tokens[4] + funcInfo = (normalizeTypeParams(returnType), normalizeTypeParams(nameAndParams)) + definitionSet.add(funcInfo) + definitionToSourceLocationMap[funcInfo] = sourceLocation + elif tokens[0] == "calledFrom:": + calleeLocation = tokens[1] + returnType = tokens[2] + nameAndParams = tokens[3] + funcInfo = (normalizeTypeParams(returnType), normalizeTypeParams(nameAndParams)) + if not funcInfo in calledFromDict: + calledFromDict[funcInfo] = set() + calledFromDict[funcInfo].add(calleeLocation) + elif tokens[0] == "outside:": + returnType = tokens[1] + nameAndParams = tokens[2] + calledFromOutsideSet.add((normalizeTypeParams(returnType), normalizeTypeParams(nameAndParams))) + elif tokens[0] == "large:": + returnType = tokens[1] + nameAndParams = tokens[2] + largeFunctionSet.add((normalizeTypeParams(returnType), normalizeTypeParams(nameAndParams))) + elif tokens[0] == "addrof:": + returnType = tokens[1] + nameAndParams = tokens[2] + addressOfSet.add((normalizeTypeParams(returnType), normalizeTypeParams(nameAndParams))) + else: + print( "unknown line: " + line) + +# Invert the definitionToSourceLocationMap. +# If we see more than one method at the same sourceLocation, it's being autogenerated as part of a template +# and we should just ignore it. +sourceLocationToDefinitionMap = {} +for k, v in definitionToSourceLocationMap.iteritems(): + sourceLocationToDefinitionMap[v] = sourceLocationToDefinitionMap.get(v, []) + sourceLocationToDefinitionMap[v].append(k) +for k, definitions in sourceLocationToDefinitionMap.iteritems(): + if len(definitions) > 1: + for d in definitions: + definitionSet.remove(d) + +def isOtherConstness( d, callSet ): + method = d[0] + " " + d[1] + # if this method is const, and there is a non-const variant of it, and the non-const variant is in use, then leave it alone + if d[0].startswith("const ") and d[1].endswith(" const"): + if ((d[0][6:],d[1][:-6]) in callSet): + return True + elif method.endswith(" const"): + method2 = method[:len(method)-6] # strip off " const" + if ((d[0],method2) in callSet): + return True + if method.endswith(" const") and ("::iterator" in method): + method2 = method[:len(method)-6] # strip off " const" + method2 = method2.replace("::const_iterator", "::iterator") + if ((d[0],method2) in callSet): + return True + # if this method is non-const, and there is a const variant of it, and the const variant is in use, then leave it alone + if (not method.endswith(" const")) and ((d[0],"const " + method + " const") in callSet): + return True + if (not method.endswith(" const")) and ("::iterator" in method): + method2 = method.replace("::iterator", "::const_iterator") + " const" + if ((d[0],method2) in callSet): + return True + return False + +# sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely +def natural_sort_key(s, _nsre=re.compile('([0-9]+)')): + return [int(text) if text.isdigit() else text.lower() + for text in re.split(_nsre, s)] +def sort_set_by_natural_key(s): + return sorted(s, key=lambda v: natural_sort_key(v[1])) + + +# -------------------------------------------------------------------------------------------- +# Methods that are only called from inside their own class, and are only called from one spot +# -------------------------------------------------------------------------------------------- + +tmp4set = set() +for d in definitionSet: + if d in calledFromOutsideSet: + continue + if isOtherConstness(d, calledFromOutsideSet): + continue + if d not in definitionToSourceLocationMap: + print("warning, method has no location: " + d[0] + " " + d[1]) + continue + # ignore external code + if definitionToSourceLocationMap[d].startswith("external/"): + continue + # ignore constructors, calledFromOutsideSet does not provide accurate info for them + tokens = d[1].split("(")[0].split("::") + if len(tokens)>1 and tokens[-2] == tokens[-1]: + continue + # ignore large methods, which make the code clearer by being out of line + if d in largeFunctionSet: + continue + # ignore methods whose address we take + if d in addressOfSet: + continue + # ignore unused methods, leave them to the dedicated analysis + if d not in calledFromDict: + continue + # ignore methods called from more than one site + if len(calledFromDict[d]) > 1: + continue + + method = d[0] + " " + d[1] + tmp4set.add((method, definitionToSourceLocationMap[d])) + +# print output, sorted by name and line number +with open("loplugin.inlineablemethods.report", "wt") as f: + for t in sort_set_by_natural_key(tmp4set): + f.write(t[1] + "\n") + f.write(" " + t[0] + "\n") |