1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
|
//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The FileDistance structure allows calculating the minimum distance to paths
// in a single tree.
// We simply walk up the path's ancestors until we find a node whose cost is
// known, and add the cost of walking back down. Initialization ensures this
// gives the correct path to the roots.
// We cache the results, so that the runtime is O(|A|), where A is the set of
// all distinct ancestors of visited paths.
//
// Example after initialization with /=2, /bar=0, DownCost = 1:
// / = 2
// /bar = 0
//
// After querying /foo/bar and /bar/foo:
// / = 2
// /bar = 0
// /bar/foo = 1
// /foo = 3
// /foo/bar = 4
//
// URIDistance creates FileDistance lazily for each URI scheme encountered. In
// practice this is a small constant factor.
//
//===-------------------------------------------------------------------------//
#include "FileDistance.h"
#include "support/Logger.h"
#include "llvm/ADT/STLExtras.h"
#include <queue>
namespace clang {
namespace clangd {
// Convert a path into the canonical form.
// Canonical form is either "/", or "/segment" * N:
// C:\foo\bar --> /c:/foo/bar
// /foo/ --> /foo
// a/b/c --> /a/b/c
static llvm::SmallString<128> canonicalize(llvm::StringRef Path) {
llvm::SmallString<128> Result = Path.rtrim('/');
native(Result, llvm::sys::path::Style::posix);
if (Result.empty() || Result.front() != '/')
Result.insert(Result.begin(), '/');
return Result;
}
constexpr const unsigned FileDistance::Unreachable;
const llvm::hash_code FileDistance::RootHash =
llvm::hash_value(llvm::StringRef("/"));
FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources,
const FileDistanceOptions &Opts)
: Opts(Opts) {
llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code>> DownEdges;
// Compute the best distance following only up edges.
// Keep track of down edges, in case we can use them to improve on this.
for (const auto &S : Sources) {
auto Canonical = canonicalize(S.getKey());
dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost,
S.second.MaxUpTraversals);
// Walk up to ancestors of this source, assigning cost.
llvm::StringRef Rest = Canonical;
llvm::hash_code Hash = llvm::hash_value(Rest);
for (unsigned I = 0; !Rest.empty(); ++I) {
Rest = parent_path(Rest, llvm::sys::path::Style::posix);
auto NextHash = llvm::hash_value(Rest);
auto &Down = DownEdges[NextHash];
if (!llvm::is_contained(Down, Hash))
Down.push_back(Hash);
// We can't just break after MaxUpTraversals, must still set DownEdges.
if (I > S.getValue().MaxUpTraversals) {
if (Cache.find(Hash) != Cache.end())
break;
} else {
unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
auto R = Cache.try_emplace(Hash, Cost);
if (!R.second) {
if (Cost < R.first->second) {
R.first->second = Cost;
} else {
// If we're not the best way to get to this path, stop assigning.
break;
}
}
}
Hash = NextHash;
}
}
// Now propagate scores parent -> child if that's an improvement.
// BFS ensures we propagate down chains (must visit parents before children).
std::queue<llvm::hash_code> Next;
for (auto Child : DownEdges.lookup(llvm::hash_value(llvm::StringRef(""))))
Next.push(Child);
while (!Next.empty()) {
auto Parent = Next.front();
Next.pop();
auto ParentCost = Cache.lookup(Parent);
for (auto Child : DownEdges.lookup(Parent)) {
if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) {
auto &ChildCost =
Cache.try_emplace(Child, Unreachable).first->getSecond();
if (ParentCost + Opts.DownCost < ChildCost)
ChildCost = ParentCost + Opts.DownCost;
}
Next.push(Child);
}
}
}
unsigned FileDistance::distance(llvm::StringRef Path) {
auto Canonical = canonicalize(Path);
unsigned Cost = Unreachable;
llvm::SmallVector<llvm::hash_code> Ancestors;
// Walk up ancestors until we find a path we know the distance for.
for (llvm::StringRef Rest = Canonical; !Rest.empty();
Rest = parent_path(Rest, llvm::sys::path::Style::posix)) {
auto Hash = llvm::hash_value(Rest);
if (Hash == RootHash && !Ancestors.empty() &&
!Opts.AllowDownTraversalFromRoot) {
Cost = Unreachable;
break;
}
auto It = Cache.find(Hash);
if (It != Cache.end()) {
Cost = It->second;
break;
}
Ancestors.push_back(Hash);
}
// Now we know the costs for (known node, queried node].
// Fill these in, walking down the directory tree.
for (llvm::hash_code Hash : llvm::reverse(Ancestors)) {
if (Cost != Unreachable)
Cost += Opts.DownCost;
Cache.try_emplace(Hash, Cost);
}
dlog("distance({0} = {1})", Path, Cost);
return Cost;
}
unsigned URIDistance::distance(llvm::StringRef URI) {
auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable);
if (!R.second)
return R.first->getSecond();
if (auto U = clangd::URI::parse(URI)) {
dlog("distance({0} = {1})", URI, U->body());
R.first->second = forScheme(U->scheme()).distance(U->body());
} else {
log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError());
}
return R.first->second;
}
FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
auto &Delegate = ByScheme[Scheme];
if (!Delegate) {
llvm::StringMap<SourceParams> SchemeSources;
for (const auto &Source : Sources) {
if (auto U = clangd::URI::create(Source.getKey(), Scheme))
SchemeSources.try_emplace(U->body(), Source.getValue());
else
llvm::consumeError(U.takeError());
}
dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme,
SchemeSources.size(), Sources.size());
Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
}
return *Delegate;
}
static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) {
llvm::SmallVector<llvm::StringRef> Split;
Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
return {"/" + llvm::join(Split, "/"), Split.size()};
}
static FileDistance
createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) {
FileDistanceOptions Opts;
Opts.UpCost = 2;
Opts.DownCost = 4;
Opts.AllowDownTraversalFromRoot = false;
llvm::StringMap<SourceParams> Sources;
llvm::StringRef Preferred =
QueryScopes.empty() ? "" : QueryScopes.front().c_str();
for (llvm::StringRef S : QueryScopes) {
SourceParams Param;
// Penalize the global scope even it's preferred, as all projects can define
// symbols in it, and there is pattern where using-namespace is used in
// place of enclosing namespaces (e.g. in implementation files).
if (S == Preferred)
Param.Cost = S == "" ? 4 : 0;
else if (Preferred.startswith(S) && !S.empty())
continue; // just rely on up-traversals.
else
Param.Cost = S == "" ? 6 : 2;
auto Path = scopeToPath(S);
// The global namespace is not 'near' its children.
Param.MaxUpTraversals = std::max(Path.second - 1, 0);
Sources[Path.first] = std::move(Param);
}
return FileDistance(std::move(Sources), Opts);
}
ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes)
: Distance(createScopeFileDistance(QueryScopes)) {}
unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) {
return Distance.distance(scopeToPath(SymbolScope).first);
}
} // namespace clangd
} // namespace clang
|