1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
|
//===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "FuzzySymbolIndex.h"
#include "llvm/Support/Regex.h"
using clang::find_all_symbols::SymbolAndSignals;
using llvm::StringRef;
namespace clang {
namespace include_fixer {
namespace {
class MemSymbolIndex : public FuzzySymbolIndex {
public:
MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) {
for (auto &Symbol : Symbols) {
auto Tokens = tokenize(Symbol.Symbol.getName());
this->Symbols.emplace_back(
StringRef(llvm::join(Tokens.begin(), Tokens.end(), " ")),
std::move(Symbol));
}
}
std::vector<SymbolAndSignals> search(StringRef Query) override {
auto Tokens = tokenize(Query);
llvm::Regex Pattern("^" + queryRegexp(Tokens));
std::vector<SymbolAndSignals> Results;
for (const Entry &E : Symbols)
if (Pattern.match(E.first))
Results.push_back(E.second);
return Results;
}
private:
using Entry = std::pair<llvm::SmallString<32>, SymbolAndSignals>;
std::vector<Entry> Symbols;
};
// Helpers for tokenize state machine.
enum TokenizeState {
EMPTY, // No pending characters.
ONE_BIG, // Read one uppercase letter, could be WORD or Word.
BIG_WORD, // Reading an uppercase WORD.
SMALL_WORD, // Reading a lowercase word.
NUMBER // Reading a number.
};
enum CharType { UPPER, LOWER, DIGIT, MISC };
CharType classify(char c) {
if (isupper(c))
return UPPER;
if (islower(c))
return LOWER;
if (isdigit(c))
return DIGIT;
return MISC;
}
} // namespace
std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) {
std::vector<std::string> Result;
// State describes the treatment of text from Start to I.
// Once text is Flush()ed into Result, we're done with it and advance Start.
TokenizeState State = EMPTY;
size_t Start = 0;
auto Flush = [&](size_t End) {
if (State != EMPTY) {
Result.push_back(Text.substr(Start, End - Start).lower());
State = EMPTY;
}
Start = End;
};
for (size_t I = 0; I < Text.size(); ++I) {
CharType Type = classify(Text[I]);
if (Type == MISC)
Flush(I);
else if (Type == LOWER)
switch (State) {
case BIG_WORD:
Flush(I - 1); // FOOBar: first token is FOO, not FOOB.
LLVM_FALLTHROUGH;
case ONE_BIG:
State = SMALL_WORD;
LLVM_FALLTHROUGH;
case SMALL_WORD:
break;
default:
Flush(I);
State = SMALL_WORD;
}
else if (Type == UPPER)
switch (State) {
case ONE_BIG:
State = BIG_WORD;
LLVM_FALLTHROUGH;
case BIG_WORD:
break;
default:
Flush(I);
State = ONE_BIG;
}
else if (Type == DIGIT && State != NUMBER) {
Flush(I);
State = NUMBER;
}
}
Flush(Text.size());
return Result;
}
std::string
FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) {
std::string Result;
for (size_t I = 0; I < Tokens.size(); ++I) {
if (I)
Result.append("[[:alnum:]]* ");
for (size_t J = 0; J < Tokens[I].size(); ++J) {
if (J)
Result.append("([[:alnum:]]* )?");
Result.push_back(Tokens[I][J]);
}
}
return Result;
}
llvm::Expected<std::unique_ptr<FuzzySymbolIndex>>
FuzzySymbolIndex::createFromYAML(StringRef FilePath) {
auto Buffer = llvm::MemoryBuffer::getFile(FilePath);
if (!Buffer)
return llvm::errorCodeToError(Buffer.getError());
return std::make_unique<MemSymbolIndex>(
find_all_symbols::ReadSymbolInfosFromYAML(Buffer.get()->getBuffer()));
}
} // namespace include_fixer
} // namespace clang
|