1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
|
//===--- MisleadingBidirectional.cpp - clang-tidy -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MisleadingBidirectional.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/Support/ConvertUTF.h"
using namespace clang;
using namespace clang::tidy::misc;
static bool containsMisleadingBidi(StringRef Buffer,
bool HonorLineBreaks = true) {
const char *CurPtr = Buffer.begin();
enum BidiChar {
PS = 0x2029,
RLO = 0x202E,
RLE = 0x202B,
LRO = 0x202D,
LRE = 0x202A,
PDF = 0x202C,
RLI = 0x2067,
LRI = 0x2066,
FSI = 0x2068,
PDI = 0x2069
};
SmallVector<BidiChar> BidiContexts;
// Scan each character while maintaining a stack of opened bidi context.
// RLO/RLE/LRO/LRE all are closed by PDF while RLI LRI and FSI are closed by
// PDI. New lines reset the context count. Extra PDF / PDI are ignored.
//
// Warn if we end up with an unclosed context.
while (CurPtr < Buffer.end()) {
unsigned char C = *CurPtr;
if (isASCII(C)) {
++CurPtr;
bool IsParagrapSep =
(C == 0xA || C == 0xD || (0x1C <= C && C <= 0x1E) || C == 0x85);
bool IsSegmentSep = (C == 0x9 || C == 0xB || C == 0x1F);
if (IsParagrapSep || IsSegmentSep)
BidiContexts.clear();
continue;
}
llvm::UTF32 CodePoint;
llvm::ConversionResult Result = llvm::convertUTF8Sequence(
(const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)Buffer.end(),
&CodePoint, llvm::strictConversion);
// If conversion fails, utf-8 is designed so that we can just try next char.
if (Result != llvm::conversionOK) {
++CurPtr;
continue;
}
// Open a PDF context.
if (CodePoint == RLO || CodePoint == RLE || CodePoint == LRO ||
CodePoint == LRE)
BidiContexts.push_back(PDF);
// Close PDF Context.
else if (CodePoint == PDF) {
if (!BidiContexts.empty() && BidiContexts.back() == PDF)
BidiContexts.pop_back();
}
// Open a PDI Context.
else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)
BidiContexts.push_back(PDI);
// Close a PDI Context.
else if (CodePoint == PDI) {
auto R = std::find(BidiContexts.rbegin(), BidiContexts.rend(), PDI);
if (R != BidiContexts.rend())
BidiContexts.resize(BidiContexts.rend() - R - 1);
}
// Line break or equivalent
else if (CodePoint == PS)
BidiContexts.clear();
}
return !BidiContexts.empty();
}
class MisleadingBidirectionalCheck::MisleadingBidirectionalHandler
: public CommentHandler {
public:
MisleadingBidirectionalHandler(MisleadingBidirectionalCheck &Check,
llvm::Optional<std::string> User)
: Check(Check) {}
bool HandleComment(Preprocessor &PP, SourceRange Range) override {
// FIXME: check that we are in a /* */ comment
StringRef Text =
Lexer::getSourceText(CharSourceRange::getCharRange(Range),
PP.getSourceManager(), PP.getLangOpts());
if (containsMisleadingBidi(Text, true))
Check.diag(
Range.getBegin(),
"comment contains misleading bidirectional Unicode characters");
return false;
}
private:
MisleadingBidirectionalCheck &Check;
};
MisleadingBidirectionalCheck::MisleadingBidirectionalCheck(
StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
Handler(std::make_unique<MisleadingBidirectionalHandler>(
*this, Context->getOptions().User)) {}
MisleadingBidirectionalCheck::~MisleadingBidirectionalCheck() = default;
void MisleadingBidirectionalCheck::registerPPCallbacks(
const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
PP->addCommentHandler(Handler.get());
}
void MisleadingBidirectionalCheck::check(
const ast_matchers::MatchFinder::MatchResult &Result) {
if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) {
StringRef Literal = SL->getBytes();
if (containsMisleadingBidi(Literal, false))
diag(SL->getBeginLoc(), "string literal contains misleading "
"bidirectional Unicode characters");
}
}
void MisleadingBidirectionalCheck::registerMatchers(
ast_matchers::MatchFinder *Finder) {
Finder->addMatcher(ast_matchers::stringLiteral().bind("strlit"), this);
}
|