1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
|
//===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "IncludeCleaner.h"
#include "Config.h"
#include "Headers.h"
#include "ParsedAST.h"
#include "Protocol.h"
#include "SourceCode.h"
#include "support/Logger.h"
#include "support/Trace.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
namespace clang {
namespace clangd {
static bool AnalyzeStdlib = false;
void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; }
namespace {
/// Crawler traverses the AST and feeds in the locations of (sometimes
/// implicitly) used symbols into \p Result.
class ReferencedLocationCrawler
: public RecursiveASTVisitor<ReferencedLocationCrawler> {
public:
ReferencedLocationCrawler(ReferencedLocations &Result,
const SourceManager &SM)
: Result(Result), SM(SM) {}
bool VisitDeclRefExpr(DeclRefExpr *DRE) {
add(DRE->getDecl());
add(DRE->getFoundDecl());
return true;
}
bool VisitMemberExpr(MemberExpr *ME) {
add(ME->getMemberDecl());
add(ME->getFoundDecl().getDecl());
return true;
}
bool VisitTagType(TagType *TT) {
add(TT->getDecl());
return true;
}
bool VisitFunctionDecl(FunctionDecl *FD) {
// Function definition will require redeclarations to be included.
if (FD->isThisDeclarationADefinition())
add(FD);
return true;
}
bool VisitCXXConstructExpr(CXXConstructExpr *CCE) {
add(CCE->getConstructor());
return true;
}
bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) {
if (isNew(TST)) {
add(TST->getTemplateName().getAsTemplateDecl()); // Primary template.
add(TST->getAsCXXRecordDecl()); // Specialization
}
return true;
}
bool VisitUsingType(UsingType *UT) {
add(UT->getFoundDecl());
return true;
}
bool VisitTypedefType(TypedefType *TT) {
add(TT->getDecl());
return true;
}
// Consider types of any subexpression used, even if the type is not named.
// This is helpful in getFoo().bar(), where Foo must be complete.
// FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to
// consider types "used" when they are not directly spelled in code.
bool VisitExpr(Expr *E) {
TraverseType(E->getType());
return true;
}
bool TraverseType(QualType T) {
if (isNew(T.getTypePtrOrNull())) // don't care about quals
Base::TraverseType(T);
return true;
}
bool VisitUsingDecl(UsingDecl *D) {
for (const auto *Shadow : D->shadows())
add(Shadow->getTargetDecl());
return true;
}
// Enums may be usefully forward-declared as *complete* types by specifying
// an underlying type. In this case, the definition should see the declaration
// so they can be checked for compatibility.
bool VisitEnumDecl(EnumDecl *D) {
if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo())
add(D);
return true;
}
// When the overload is not resolved yet, mark all candidates as used.
bool VisitOverloadExpr(OverloadExpr *E) {
for (const auto *ResolutionDecl : E->decls())
add(ResolutionDecl);
return true;
}
private:
using Base = RecursiveASTVisitor<ReferencedLocationCrawler>;
void add(const Decl *D) {
if (!D || !isNew(D->getCanonicalDecl()))
return;
if (auto SS = StdRecognizer(D)) {
Result.Stdlib.insert(*SS);
return;
}
// Special case RecordDecls, as it is common for them to be forward
// declared multiple times. The most common cases are:
// - Definition available in TU, only mark that one as usage. The rest is
// likely to be unnecessary. This might result in false positives when an
// internal definition is visible.
// - There's a forward declaration in the main file, no need for other
// redecls.
if (const auto *RD = llvm::dyn_cast<RecordDecl>(D)) {
if (const auto *Definition = RD->getDefinition()) {
Result.User.insert(Definition->getLocation());
return;
}
if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation()))
return;
}
for (const Decl *Redecl : D->redecls())
Result.User.insert(Redecl->getLocation());
}
bool isNew(const void *P) { return P && Visited.insert(P).second; }
ReferencedLocations &Result;
llvm::DenseSet<const void *> Visited;
const SourceManager &SM;
stdlib::Recognizer StdRecognizer;
};
// Given a set of referenced FileIDs, determines all the potentially-referenced
// files and macros by traversing expansion/spelling locations of macro IDs.
// This is used to map the referenced SourceLocations onto real files.
struct ReferencedFilesBuilder {
ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {}
llvm::DenseSet<FileID> Files;
llvm::DenseSet<FileID> Macros;
const SourceManager &SM;
void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); }
void add(FileID FID, SourceLocation Loc) {
if (FID.isInvalid())
return;
assert(SM.isInFileID(Loc, FID));
if (Loc.isFileID()) {
Files.insert(FID);
return;
}
// Don't process the same macro FID twice.
if (!Macros.insert(FID).second)
return;
const auto &Exp = SM.getSLocEntry(FID).getExpansion();
add(Exp.getSpellingLoc());
add(Exp.getExpansionLocStart());
add(Exp.getExpansionLocEnd());
}
};
// Returns the range starting at '#' and ending at EOL. Escaped newlines are not
// handled.
clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) {
clangd::Range Result;
Result.end = Result.start = offsetToPosition(Code, HashOffset);
// Span the warning until the EOL or EOF.
Result.end.character +=
lspLength(Code.drop_front(HashOffset).take_until([](char C) {
return C == '\n' || C == '\r';
}));
return Result;
}
// Finds locations of macros referenced from within the main file. That includes
// references that were not yet expanded, e.g `BAR` in `#define FOO BAR`.
void findReferencedMacros(ParsedAST &AST, ReferencedLocations &Result) {
trace::Span Tracer("IncludeCleaner::findReferencedMacros");
auto &SM = AST.getSourceManager();
auto &PP = AST.getPreprocessor();
// FIXME(kirillbobyrev): The macros from the main file are collected in
// ParsedAST's MainFileMacros. However, we can't use it here because it
// doesn't handle macro references that were not expanded, e.g. in macro
// definitions or preprocessor-disabled sections.
//
// Extending MainFileMacros to collect missing references and switching to
// this mechanism (as opposed to iterating through all tokens) will improve
// the performance of findReferencedMacros and also improve other features
// relying on MainFileMacros.
for (const syntax::Token &Tok :
AST.getTokens().spelledTokens(SM.getMainFileID())) {
auto Macro = locateMacroAt(Tok, PP);
if (!Macro)
continue;
auto Loc = Macro->Info->getDefinitionLoc();
if (Loc.isValid())
Result.User.insert(Loc);
// FIXME: support stdlib macros
}
}
static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) {
if (Inc.BehindPragmaKeep)
return false;
// FIXME(kirillbobyrev): We currently do not support the umbrella headers.
// System headers are likely to be standard library headers.
// Until we have good support for umbrella headers, don't warn about them.
if (Inc.Written.front() == '<') {
if (AnalyzeStdlib && stdlib::Header::named(Inc.Written))
return true;
return false;
}
// Headers without include guards have side effects and are not
// self-contained, skip them.
assert(Inc.HeaderID);
auto FE = AST.getSourceManager().getFileManager().getFile(
AST.getIncludeStructure().getRealPath(
static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID)));
assert(FE);
if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
*FE)) {
dlog("{0} doesn't have header guard and will not be considered unused",
(*FE)->getName());
return false;
}
return true;
}
// In case symbols are coming from non self-contained header, we need to find
// its first includer that is self-contained. This is the header users can
// include, so it will be responsible for bringing the symbols from given
// header into the scope.
FileID headerResponsible(FileID ID, const SourceManager &SM,
const IncludeStructure &Includes) {
// Unroll the chain of non self-contained headers until we find the one that
// can be included.
for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID();
FE = SM.getFileEntryForID(ID)) {
// If FE is nullptr, we consider it to be the responsible header.
if (!FE)
break;
auto HID = Includes.getID(FE);
assert(HID && "We're iterating over headers already existing in "
"IncludeStructure");
if (Includes.isSelfContained(*HID))
break;
// The header is not self-contained: put the responsibility for its symbols
// on its includer.
ID = SM.getFileID(SM.getIncludeLoc(ID));
}
return ID;
}
} // namespace
ReferencedLocations findReferencedLocations(ParsedAST &AST) {
trace::Span Tracer("IncludeCleaner::findReferencedLocations");
ReferencedLocations Result;
ReferencedLocationCrawler Crawler(Result, AST.getSourceManager());
Crawler.TraverseAST(AST.getASTContext());
findReferencedMacros(AST, Result);
return Result;
}
ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs,
const IncludeStructure &Includes,
const SourceManager &SM) {
std::vector<SourceLocation> Sorted{Locs.User.begin(), Locs.User.end()};
llvm::sort(Sorted); // Group by FileID.
ReferencedFilesBuilder Builder(SM);
for (auto It = Sorted.begin(); It < Sorted.end();) {
FileID FID = SM.getFileID(*It);
Builder.add(FID, *It);
// Cheaply skip over all the other locations from the same FileID.
// This avoids lots of redundant Loc->File lookups for the same file.
do
++It;
while (It != Sorted.end() && SM.isInFileID(*It, FID));
}
// If a header is not self-contained, we consider its symbols a logical part
// of the including file. Therefore, mark the parents of all used
// non-self-contained FileIDs as used. Perform this on FileIDs rather than
// HeaderIDs, as each inclusion of a non-self-contained file is distinct.
llvm::DenseSet<FileID> UserFiles;
for (FileID ID : Builder.Files)
UserFiles.insert(headerResponsible(ID, SM, Includes));
llvm::DenseSet<stdlib::Header> StdlibFiles;
for (const auto &Symbol : Locs.Stdlib)
for (const auto &Header : Symbol.headers())
StdlibFiles.insert(Header);
return {std::move(UserFiles), std::move(StdlibFiles)};
}
std::vector<const Inclusion *>
getUnused(ParsedAST &AST,
const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles) {
trace::Span Tracer("IncludeCleaner::getUnused");
std::vector<const Inclusion *> Unused;
for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
if (!MFI.HeaderID)
continue;
auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
bool Used = ReferencedFiles.contains(IncludeID);
if (!Used && !mayConsiderUnused(MFI, AST)) {
dlog("{0} was not used, but is not eligible to be diagnosed as unused",
MFI.Written);
continue;
}
if (!Used)
Unused.push_back(&MFI);
dlog("{0} is {1}", MFI.Written, Used ? "USED" : "UNUSED");
}
return Unused;
}
#ifndef NDEBUG
// Is FID a <built-in>, <scratch space> etc?
static bool isSpecialBuffer(FileID FID, const SourceManager &SM) {
const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile();
return FI.getName().startswith("<");
}
#endif
llvm::DenseSet<IncludeStructure::HeaderID>
translateToHeaderIDs(const ReferencedFiles &Files,
const IncludeStructure &Includes,
const SourceManager &SM) {
trace::Span Tracer("IncludeCleaner::translateToHeaderIDs");
llvm::DenseSet<IncludeStructure::HeaderID> TranslatedHeaderIDs;
TranslatedHeaderIDs.reserve(Files.User.size());
for (FileID FID : Files.User) {
const FileEntry *FE = SM.getFileEntryForID(FID);
if (!FE) {
assert(isSpecialBuffer(FID, SM));
continue;
}
const auto File = Includes.getID(FE);
assert(File);
TranslatedHeaderIDs.insert(*File);
}
for (stdlib::Header StdlibUsed : Files.Stdlib)
for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed))
TranslatedHeaderIDs.insert(HID);
return TranslatedHeaderIDs;
}
std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) {
const auto &SM = AST.getSourceManager();
auto Refs = findReferencedLocations(AST);
auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(),
AST.getSourceManager());
auto ReferencedHeaders =
translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM);
return getUnused(AST, ReferencedHeaders);
}
std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST,
llvm::StringRef Code) {
const Config &Cfg = Config::current();
if (Cfg.Diagnostics.UnusedIncludes != Config::UnusedIncludesPolicy::Strict ||
Cfg.Diagnostics.SuppressAll ||
Cfg.Diagnostics.Suppress.contains("unused-includes"))
return {};
trace::Span Tracer("IncludeCleaner::issueUnusedIncludesDiagnostics");
std::vector<Diag> Result;
std::string FileName =
AST.getSourceManager()
.getFileEntryForID(AST.getSourceManager().getMainFileID())
->getName()
.str();
for (const auto *Inc : computeUnusedIncludes(AST)) {
Diag D;
D.Message =
llvm::formatv("included header {0} is not used",
llvm::sys::path::filename(
Inc->Written.substr(1, Inc->Written.size() - 2),
llvm::sys::path::Style::posix));
D.Name = "unused-includes";
D.Source = Diag::DiagSource::Clangd;
D.File = FileName;
D.Severity = DiagnosticsEngine::Warning;
D.Tags.push_back(Unnecessary);
D.Range = getDiagnosticRange(Code, Inc->HashOffset);
// FIXME(kirillbobyrev): Removing inclusion might break the code if the
// used headers are only reachable transitively through this one. Suggest
// including them directly instead.
// FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
// (keep/export) remove the warning once we support IWYU pragmas.
D.Fixes.emplace_back();
D.Fixes.back().Message = "remove #include directive";
D.Fixes.back().Edits.emplace_back();
D.Fixes.back().Edits.back().range.start.line = Inc->HashLine;
D.Fixes.back().Edits.back().range.end.line = Inc->HashLine + 1;
D.InsideMainFile = true;
Result.push_back(std::move(D));
}
return Result;
}
} // namespace clangd
} // namespace clang
|