1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
|
//===--- Symbol.h -----------------------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
#include "../Context.h"
#include "clang/Index/IndexSymbol.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
#include <array>
#include <string>
namespace clang {
namespace clangd {
struct SymbolLocation {
// The absolute path of the source file where a symbol occurs.
llvm::StringRef FilePath;
// The 0-based offset to the first character of the symbol from the beginning
// of the source file.
unsigned StartOffset;
// The 0-based offset to the last character of the symbol from the beginning
// of the source file.
unsigned EndOffset;
};
// The class identifies a particular C++ symbol (class, function, method, etc).
//
// As USRs (Unified Symbol Resolution) could be large, especially for functions
// with long type arguments, SymbolID is using 160-bits SHA1(USR) values to
// guarantee the uniqueness of symbols while using a relatively small amount of
// memory (vs storing USRs directly).
//
// SymbolID can be used as key in the symbol indexes to lookup the symbol.
class SymbolID {
public:
SymbolID() = default;
SymbolID(llvm::StringRef USR);
bool operator==(const SymbolID &Sym) const {
return HashValue == Sym.HashValue;
}
bool operator<(const SymbolID &Sym) const {
return HashValue < Sym.HashValue;
}
private:
friend llvm::hash_code hash_value(const SymbolID &ID) {
// We already have a good hash, just return the first bytes.
static_assert(sizeof(size_t) <= 20, "size_t longer than SHA1!");
return *reinterpret_cast<const size_t *>(ID.HashValue.data());
}
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
const SymbolID &ID);
friend void operator>>(llvm::StringRef Str, SymbolID &ID);
std::array<uint8_t, 20> HashValue;
};
// Write SymbolID into the given stream. SymbolID is encoded as a 40-bytes
// hex string.
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID);
// Construct SymbolID from a hex string.
// The HexStr is required to be a 40-bytes hex string, which is encoded from the
// "<<" operator.
void operator>>(llvm::StringRef HexStr, SymbolID &ID);
} // namespace clangd
} // namespace clang
namespace llvm {
// Support SymbolIDs as DenseMap keys.
template <> struct DenseMapInfo<clang::clangd::SymbolID> {
static inline clang::clangd::SymbolID getEmptyKey() {
static clang::clangd::SymbolID EmptyKey("EMPTYKEY");
return EmptyKey;
}
static inline clang::clangd::SymbolID getTombstoneKey() {
static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY");
return TombstoneKey;
}
static unsigned getHashValue(const clang::clangd::SymbolID &Sym) {
return hash_value(Sym);
}
static bool isEqual(const clang::clangd::SymbolID &LHS,
const clang::clangd::SymbolID &RHS) {
return LHS == RHS;
}
};
} // namespace llvm
namespace clang {
namespace clangd {
// The class presents a C++ symbol, e.g. class, function.
//
// WARNING: Symbols do not own much of their underlying data - typically strings
// are owned by a SymbolSlab. They should be treated as non-owning references.
// Copies are shallow.
// When adding new unowned data fields to Symbol, remember to update
// SymbolSlab::Builder in Index.cpp to copy them to the slab's storage.
struct Symbol {
// The ID of the symbol.
SymbolID ID;
// The symbol information, like symbol kind.
index::SymbolInfo SymInfo;
// The unqualified name of the symbol, e.g. "bar" (for "n1::n2::bar").
llvm::StringRef Name;
// The scope (e.g. namespace) of the symbol, e.g. "n1::n2" (for
// "n1::n2::bar").
llvm::StringRef Scope;
// The location of the canonical declaration of the symbol.
//
// A C++ symbol could have multiple declarations and one definition (e.g.
// a function is declared in ".h" file, and is defined in ".cc" file).
// * For classes, the canonical declaration is usually definition.
// * For non-inline functions, the canonical declaration is a declaration
// (not a definition), which is usually declared in ".h" file.
SymbolLocation CanonicalDeclaration;
// FIXME: add definition location of the symbol.
// FIXME: add all occurrences support.
// FIXME: add extra fields for index scoring signals.
// FIXME: add code completion information.
};
// An immutable symbol container that stores a set of symbols.
// The container will maintain the lifetime of the symbols.
class SymbolSlab {
public:
using const_iterator = std::vector<Symbol>::const_iterator;
SymbolSlab() = default;
const_iterator begin() const { return Symbols.begin(); }
const_iterator end() const { return Symbols.end(); }
const_iterator find(const SymbolID &SymID) const;
size_t size() const { return Symbols.size(); }
// Estimates the total memory usage.
size_t bytes() const {
return sizeof(*this) + Arena.getTotalMemory() +
Symbols.capacity() * sizeof(Symbol);
}
// SymbolSlab::Builder is a mutable container that can 'freeze' to SymbolSlab.
// The frozen SymbolSlab will use less memory.
class Builder {
public:
// Adds a symbol, overwriting any existing one with the same ID.
// This is a deep copy: underlying strings will be owned by the slab.
void insert(const Symbol& S);
// Returns the symbol with an ID, if it exists. Valid until next insert().
const Symbol* find(const SymbolID &ID) {
auto I = SymbolIndex.find(ID);
return I == SymbolIndex.end() ? nullptr : &Symbols[I->second];
}
// Consumes the builder to finalize the slab.
SymbolSlab build() &&;
private:
llvm::BumpPtrAllocator Arena;
// Intern table for strings. Contents are on the arena.
llvm::DenseSet<llvm::StringRef> Strings;
std::vector<Symbol> Symbols;
// Values are indices into Symbols vector.
llvm::DenseMap<SymbolID, size_t> SymbolIndex;
};
private:
SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols)
: Arena(std::move(Arena)), Symbols(std::move(Symbols)) {}
llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not.
std::vector<Symbol> Symbols; // Sorted by SymbolID to allow lookup.
};
struct FuzzyFindRequest {
/// \brief A query string for the fuzzy find. This is matched against symbols'
/// un-qualified identifiers and should not contain qualifiers like "::".
std::string Query;
/// \brief If this is non-empty, symbols must be in at least one of the scopes
/// (e.g. namespaces) excluding nested scopes. For example, if a scope "xyz"
/// is provided, the matched symbols must be defined in scope "xyz" but not
/// "xyz::abc".
///
/// A scope must be fully qualified without leading or trailing "::" e.g.
/// "n1::n2". "" is interpreted as the global namespace, and "::" is invalid.
std::vector<std::string> Scopes;
/// \brief The maxinum number of candidates to return.
size_t MaxCandidateCount = UINT_MAX;
};
/// \brief Interface for symbol indexes that can be used for searching or
/// matching symbols among a set of symbols based on names or unique IDs.
class SymbolIndex {
public:
virtual ~SymbolIndex() = default;
/// \brief Matches symbols in the index fuzzily and applies \p Callback on
/// each matched symbol before returning.
///
/// Returns true if the result list is complete, false if it was truncated due
/// to MaxCandidateCount
virtual bool
fuzzyFind(const Context &Ctx, const FuzzyFindRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const = 0;
// FIXME: add interfaces for more index use cases:
// - Symbol getSymbolInfo(SymbolID);
// - getAllOccurrences(SymbolID);
};
} // namespace clangd
} // namespace clang
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
|