1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
//===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This defines Dex - a symbol index implementation based on query iterators
/// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
/// While consuming more memory and having longer build stage due to
/// preprocessing, Dex will have substantially lower latency. It will also allow
/// efficient symbol searching which is crucial for operations like code
/// completion, and can be very important for a number of different code
/// transformations which will be eventually supported by Clangd.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
#include "Iterator.h"
#include "PostingList.h"
#include "Token.h"
#include "Trigram.h"
#include "index/Index.h"
#include "index/MemIndex.h"
#include "index/Relation.h"
#include "index/SymbolCollector.h"
namespace clang {
namespace clangd {
namespace dex {
/// In-memory Dex trigram-based index implementation.
class Dex : public SymbolIndex {
public:
// All data must outlive this index.
template <typename SymbolRange, typename RefsRange, typename RelationsRange>
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations)
: Corpus(0) {
for (auto &&Sym : Symbols)
this->Symbols.push_back(&Sym);
for (auto &&Ref : Refs)
this->Refs.try_emplace(Ref.first, Ref.second);
for (auto &&Rel : Relations)
this->Relations[std::make_pair(Rel.Subject,
static_cast<uint8_t>(Rel.Predicate))]
.push_back(Rel.Object);
buildIndex();
}
// Symbols and Refs are owned by BackingData, Index takes ownership.
template <typename SymbolRange, typename RefsRange, typename RelationsRange,
typename Payload>
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
Payload &&BackingData, size_t BackingDataSize)
: Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
std::forward<RelationsRange>(Relations)) {
KeepAlive = std::shared_ptr<void>(
std::make_shared<Payload>(std::move(BackingData)), nullptr);
this->BackingDataSize = BackingDataSize;
}
template <typename SymbolRange, typename RefsRange, typename RelationsRange,
typename FileRange, typename Payload>
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
FileRange &&Files, IndexContents IdxContents, Payload &&BackingData,
size_t BackingDataSize)
: Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
std::forward<RelationsRange>(Relations),
std::forward<Payload>(BackingData), BackingDataSize) {
this->Files = std::forward<FileRange>(Files);
this->IdxContents = IdxContents;
}
/// Builds an index from slabs. The index takes ownership of the slab.
static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab);
bool
fuzzyFind(const FuzzyFindRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const override;
void lookup(const LookupRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const override;
bool refs(const RefsRequest &Req,
llvm::function_ref<void(const Ref &)> Callback) const override;
void relations(const RelationsRequest &Req,
llvm::function_ref<void(const SymbolID &, const Symbol &)>
Callback) const override;
llvm::unique_function<IndexContents(llvm::StringRef) const>
indexedFiles() const override;
size_t estimateMemoryUsage() const override;
private:
void buildIndex();
std::unique_ptr<Iterator> iterator(const Token &Tok) const;
std::unique_ptr<Iterator>
createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const;
std::unique_ptr<Iterator>
createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const;
/// Stores symbols sorted in the descending order of symbol quality..
std::vector<const Symbol *> Symbols;
/// SymbolQuality[I] is the quality of Symbols[I].
std::vector<float> SymbolQuality;
llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
/// Inverted index is a mapping from the search token to the posting list,
/// which contains all items which can be characterized by such search token.
/// For example, if the search token is scope "std::", the corresponding
/// posting list would contain all indices of symbols defined in namespace
/// std. Inverted index is used to retrieve posting lists which are processed
/// during the fuzzyFind process.
llvm::DenseMap<Token, PostingList> InvertedIndex;
dex::Corpus Corpus;
llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs;
static_assert(sizeof(RelationKind) == sizeof(uint8_t),
"RelationKind should be of same size as a uint8_t");
llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations;
std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
// Set of files which were used during this index build.
llvm::StringSet<> Files;
// Contents of the index (symbols, references, etc.)
IndexContents IdxContents;
// Size of memory retained by KeepAlive.
size_t BackingDataSize = 0;
};
/// Returns Search Token for a number of parent directories of given Path.
/// Should be used within the index build process.
///
/// This function is exposed for testing only.
std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
} // namespace dex
} // namespace clangd
} // namespace clang
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
|