File: Dex.h

package info (click to toggle)
llvm-toolchain-13 1%3A13.0.1-11
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,418,840 kB
  • sloc: cpp: 5,290,826; ansic: 996,570; asm: 544,593; python: 188,212; objc: 72,027; lisp: 30,291; f90: 25,395; sh: 24,898; javascript: 9,780; pascal: 9,398; perl: 7,484; ml: 5,432; awk: 3,523; makefile: 2,913; xml: 953; cs: 573; fortran: 539
file content (143 lines) | stat: -rw-r--r-- 6,057 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
//===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This defines Dex - a symbol index implementation based on query iterators
/// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
/// While consuming more memory and having longer build stage due to
/// preprocessing, Dex will have substantially lower latency. It will also allow
/// efficient symbol searching which is crucial for operations like code
/// completion, and can be very important for a number of different code
/// transformations which will be eventually supported by Clangd.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H

#include "Iterator.h"
#include "PostingList.h"
#include "Token.h"
#include "Trigram.h"
#include "index/Index.h"
#include "index/MemIndex.h"
#include "index/Relation.h"
#include "index/SymbolCollector.h"

namespace clang {
namespace clangd {
namespace dex {

/// In-memory Dex trigram-based index implementation.
class Dex : public SymbolIndex {
public:
  // All data must outlive this index.
  template <typename SymbolRange, typename RefsRange, typename RelationsRange>
  Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations)
      : Corpus(0) {
    for (auto &&Sym : Symbols)
      this->Symbols.push_back(&Sym);
    for (auto &&Ref : Refs)
      this->Refs.try_emplace(Ref.first, Ref.second);
    for (auto &&Rel : Relations)
      this->Relations[std::make_pair(Rel.Subject,
                                     static_cast<uint8_t>(Rel.Predicate))]
          .push_back(Rel.Object);
    buildIndex();
  }
  // Symbols and Refs are owned by BackingData, Index takes ownership.
  template <typename SymbolRange, typename RefsRange, typename RelationsRange,
            typename Payload>
  Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
      Payload &&BackingData, size_t BackingDataSize)
      : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
            std::forward<RelationsRange>(Relations)) {
    KeepAlive = std::shared_ptr<void>(
        std::make_shared<Payload>(std::move(BackingData)), nullptr);
    this->BackingDataSize = BackingDataSize;
  }

  template <typename SymbolRange, typename RefsRange, typename RelationsRange,
            typename FileRange, typename Payload>
  Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
      FileRange &&Files, IndexContents IdxContents, Payload &&BackingData,
      size_t BackingDataSize)
      : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
            std::forward<RelationsRange>(Relations),
            std::forward<Payload>(BackingData), BackingDataSize) {
    this->Files = std::forward<FileRange>(Files);
    this->IdxContents = IdxContents;
  }

  /// Builds an index from slabs. The index takes ownership of the slab.
  static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab);

  bool
  fuzzyFind(const FuzzyFindRequest &Req,
            llvm::function_ref<void(const Symbol &)> Callback) const override;

  void lookup(const LookupRequest &Req,
              llvm::function_ref<void(const Symbol &)> Callback) const override;

  bool refs(const RefsRequest &Req,
            llvm::function_ref<void(const Ref &)> Callback) const override;

  void relations(const RelationsRequest &Req,
                 llvm::function_ref<void(const SymbolID &, const Symbol &)>
                     Callback) const override;

  llvm::unique_function<IndexContents(llvm::StringRef) const>
  indexedFiles() const override;

  size_t estimateMemoryUsage() const override;

private:
  void buildIndex();
  std::unique_ptr<Iterator> iterator(const Token &Tok) const;
  std::unique_ptr<Iterator>
  createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const;
  std::unique_ptr<Iterator>
  createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const;

  /// Stores symbols sorted in the descending order of symbol quality..
  std::vector<const Symbol *> Symbols;
  /// SymbolQuality[I] is the quality of Symbols[I].
  std::vector<float> SymbolQuality;
  llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
  /// Inverted index is a mapping from the search token to the posting list,
  /// which contains all items which can be characterized by such search token.
  /// For example, if the search token is scope "std::", the corresponding
  /// posting list would contain all indices of symbols defined in namespace
  /// std. Inverted index is used to retrieve posting lists which are processed
  /// during the fuzzyFind process.
  llvm::DenseMap<Token, PostingList> InvertedIndex;
  dex::Corpus Corpus;
  llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs;
  static_assert(sizeof(RelationKind) == sizeof(uint8_t),
                "RelationKind should be of same size as a uint8_t");
  llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations;
  std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
  // Set of files which were used during this index build.
  llvm::StringSet<> Files;
  // Contents of the index (symbols, references, etc.)
  IndexContents IdxContents;
  // Size of memory retained by KeepAlive.
  size_t BackingDataSize = 0;
};

/// Returns Search Token for a number of parent directories of given Path.
/// Should be used within the index build process.
///
/// This function is exposed for testing only.
std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);

} // namespace dex
} // namespace clangd
} // namespace clang

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H