1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
//===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "FuzzySymbolIndex.h"
#include "llvm/Support/Regex.h"
using clang::find_all_symbols::SymbolAndSignals;
using llvm::StringRef;
namespace clang {
namespace include_fixer {
namespace {
class MemSymbolIndex : public FuzzySymbolIndex {
public:
MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) {
for (auto &Symbol : Symbols) {
auto Tokens = tokenize(Symbol.Symbol.getName());
this->Symbols.emplace_back(
StringRef(llvm::join(Tokens.begin(), Tokens.end(), " ")),
std::move(Symbol));
}
}
std::vector<SymbolAndSignals> search(StringRef Query) override {
auto Tokens = tokenize(Query);
llvm::Regex Pattern("^" + queryRegexp(Tokens));
std::vector<SymbolAndSignals> Results;
for (const Entry &E : Symbols)
if (Pattern.match(E.first))
Results.push_back(E.second);
return Results;
}
private:
using Entry = std::pair<llvm::SmallString<32>, SymbolAndSignals>;
std::vector<Entry> Symbols;
};
// Helpers for tokenize state machine.
enum TokenizeState {
EMPTY, // No pending characters.
ONE_BIG, // Read one uppercase letter, could be WORD or Word.
BIG_WORD, // Reading an uppercase WORD.
SMALL_WORD, // Reading a lowercase word.
NUMBER // Reading a number.
};
enum CharType { UPPER, LOWER, DIGIT, MISC };
CharType classify(char c) {
if (isupper(c))
return UPPER;
if (islower(c))
return LOWER;
if (isdigit(c))
return DIGIT;
return MISC;
}
} // namespace
std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) {
std::vector<std::string> Result;
// State describes the treatment of text from Start to I.
// Once text is Flush()ed into Result, we're done with it and advance Start.
TokenizeState State = EMPTY;
size_t Start = 0;
auto Flush = [&](size_t End) {
if (State != EMPTY) {
Result.push_back(Text.substr(Start, End - Start).lower());
State = EMPTY;
}
Start = End;
};
for (size_t I = 0; I < Text.size(); ++I) {
CharType Type = classify(Text[I]);
if (Type == MISC)
Flush(I);
else if (Type == LOWER)
switch (State) {
case BIG_WORD:
Flush(I - 1); // FOOBar: first token is FOO, not FOOB.
LLVM_FALLTHROUGH;
case ONE_BIG:
State = SMALL_WORD;
LLVM_FALLTHROUGH;
case SMALL_WORD:
break;
default:
Flush(I);
State = SMALL_WORD;
}
else if (Type == UPPER)
switch (State) {
case ONE_BIG:
State = BIG_WORD;
LLVM_FALLTHROUGH;
case BIG_WORD:
break;
default:
Flush(I);
State = ONE_BIG;
}
else if (Type == DIGIT && State != NUMBER) {
Flush(I);
State = NUMBER;
}
}
Flush(Text.size());
return Result;
}
std::string
FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) {
std::string Result;
for (size_t I = 0; I < Tokens.size(); ++I) {
if (I)
Result.append("[[:alnum:]]* ");
for (size_t J = 0; J < Tokens[I].size(); ++J) {
if (J)
Result.append("([[:alnum:]]* )?");
Result.push_back(Tokens[I][J]);
}
}
return Result;
}
llvm::Expected<std::unique_ptr<FuzzySymbolIndex>>
FuzzySymbolIndex::createFromYAML(StringRef FilePath) {
auto Buffer = llvm::MemoryBuffer::getFile(FilePath);
if (!Buffer)
return llvm::errorCodeToError(Buffer.getError());
return llvm::make_unique<MemSymbolIndex>(
find_all_symbols::ReadSymbolInfosFromYAML(Buffer.get()->getBuffer()));
}
} // namespace include_fixer
} // namespace clang
|