1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
|
//===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the declaration of the UnwrappedLineParser,
/// which turns a stream of tokens into UnwrappedLines.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
#define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
#include "FormatToken.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/Support/Regex.h"
#include <list>
#include <stack>
#include <vector>
namespace clang {
namespace format {
struct UnwrappedLineNode;
/// An unwrapped line is a sequence of \c Token, that we would like to
/// put on a single line if there was no column limit.
///
/// This is used as a main interface between the \c UnwrappedLineParser and the
/// \c UnwrappedLineFormatter. The key property is that changing the formatting
/// within an unwrapped line does not affect any other unwrapped lines.
struct UnwrappedLine {
UnwrappedLine();
/// The \c Tokens comprising this \c UnwrappedLine.
std::list<UnwrappedLineNode> Tokens;
/// The indent level of the \c UnwrappedLine.
unsigned Level;
/// Whether this \c UnwrappedLine is part of a preprocessor directive.
bool InPPDirective;
bool MustBeDeclaration;
/// If this \c UnwrappedLine closes a block in a sequence of lines,
/// \c MatchingOpeningBlockLineIndex stores the index of the corresponding
/// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be
/// \c kInvalidIndex.
size_t MatchingOpeningBlockLineIndex = kInvalidIndex;
/// If this \c UnwrappedLine opens a block, stores the index of the
/// line with the corresponding closing brace.
size_t MatchingClosingBlockLineIndex = kInvalidIndex;
static const size_t kInvalidIndex = -1;
unsigned FirstStartColumn = 0;
};
class UnwrappedLineConsumer {
public:
virtual ~UnwrappedLineConsumer() {}
virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
virtual void finishRun() = 0;
};
class FormatTokenSource;
class UnwrappedLineParser {
public:
UnwrappedLineParser(const FormatStyle &Style,
const AdditionalKeywords &Keywords,
unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens,
UnwrappedLineConsumer &Callback);
void parse();
private:
enum class IfStmtKind {
NotIf, // Not an if statement.
IfOnly, // An if statement without the else clause.
IfElse, // An if statement followed by else but not else if.
IfElseIf // An if statement followed by else if.
};
void reset();
void parseFile();
bool precededByCommentOrPPDirective() const;
bool parseLevel(const FormatToken *OpeningBrace = nullptr,
bool CanContainBracedList = true,
TokenType NextLBracesType = TT_Unknown,
IfStmtKind *IfKind = nullptr,
FormatToken **IfLeftBrace = nullptr);
bool mightFitOnOneLine(UnwrappedLine &Line,
const FormatToken *OpeningBrace = nullptr) const;
FormatToken *parseBlock(bool MustBeDeclaration = false,
unsigned AddLevels = 1u, bool MunchSemi = true,
bool KeepBraces = true, IfStmtKind *IfKind = nullptr,
bool UnindentWhitesmithsBraces = false,
bool CanContainBracedList = true,
TokenType NextLBracesType = TT_Unknown);
void parseChildBlock(bool CanContainBracedList = true,
TokenType NextLBracesType = TT_Unknown);
void parsePPDirective();
void parsePPDefine();
void parsePPIf(bool IfDef);
void parsePPElIf();
void parsePPElse();
void parsePPEndIf();
void parsePPUnknown();
void readTokenWithJavaScriptASI();
void parseStructuralElement(bool IsTopLevel = false,
TokenType NextLBracesType = TT_Unknown,
IfStmtKind *IfKind = nullptr,
FormatToken **IfLeftBrace = nullptr,
bool *HasDoWhile = nullptr,
bool *HasLabel = nullptr);
bool tryToParseBracedList();
bool parseBracedList(bool ContinueOnSemicolons = false, bool IsEnum = false,
tok::TokenKind ClosingBraceKind = tok::r_brace);
void parseParens(TokenType AmpAmpTokenType = TT_Unknown);
void parseSquare(bool LambdaIntroducer = false);
void keepAncestorBraces();
void parseUnbracedBody(bool CheckEOF = false);
void handleAttributes();
bool handleCppAttributes();
FormatToken *parseIfThenElse(IfStmtKind *IfKind, bool KeepBraces = false);
void parseTryCatch();
void parseLoopBody(bool KeepBraces, bool WrapRightBrace);
void parseForOrWhileLoop();
void parseDoWhile();
void parseLabel(bool LeftAlignLabel = false);
void parseCaseLabel();
void parseSwitch();
void parseNamespace();
void parseModuleImport();
void parseNew();
void parseAccessSpecifier();
bool parseEnum();
bool parseStructLike();
void parseConcept();
bool parseRequires();
void parseRequiresClause(FormatToken *RequiresToken);
void parseRequiresExpression(FormatToken *RequiresToken);
void parseConstraintExpression();
void parseJavaEnumBody();
// Parses a record (aka class) as a top level element. If ParseAsExpr is true,
// parses the record as a child block, i.e. if the class declaration is an
// expression.
void parseRecord(bool ParseAsExpr = false);
void parseObjCLightweightGenerics();
void parseObjCMethod();
void parseObjCProtocolList();
void parseObjCUntilAtEnd();
void parseObjCInterfaceOrImplementation();
bool parseObjCProtocol();
void parseJavaScriptEs6ImportExport();
void parseStatementMacro();
void parseCSharpAttribute();
// Parse a C# generic type constraint: `where T : IComparable<T>`.
// See:
// https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/where-generic-type-constraint
void parseCSharpGenericTypeConstraint();
bool tryToParseLambda();
bool tryToParseChildBlock();
bool tryToParseLambdaIntroducer();
bool tryToParsePropertyAccessor();
void tryToParseJSFunction();
bool tryToParseSimpleAttribute();
// Used by addUnwrappedLine to denote whether to keep or remove a level
// when resetting the line state.
enum class LineLevel { Remove, Keep };
void addUnwrappedLine(LineLevel AdjustLevel = LineLevel::Remove);
bool eof() const;
// LevelDifference is the difference of levels after and before the current
// token. For example:
// - if the token is '{' and opens a block, LevelDifference is 1.
// - if the token is '}' and closes a block, LevelDifference is -1.
void nextToken(int LevelDifference = 0);
void readToken(int LevelDifference = 0);
// Decides which comment tokens should be added to the current line and which
// should be added as comments before the next token.
//
// Comments specifies the sequence of comment tokens to analyze. They get
// either pushed to the current line or added to the comments before the next
// token.
//
// NextTok specifies the next token. A null pointer NextTok is supported, and
// signifies either the absence of a next token, or that the next token
// shouldn't be taken into accunt for the analysis.
void distributeComments(const SmallVectorImpl<FormatToken *> &Comments,
const FormatToken *NextTok);
// Adds the comment preceding the next token to unwrapped lines.
void flushComments(bool NewlineBeforeNext);
void pushToken(FormatToken *Tok);
void calculateBraceTypes(bool ExpectClassBody = false);
// Marks a conditional compilation edge (for example, an '#if', '#ifdef',
// '#else' or merge conflict marker). If 'Unreachable' is true, assumes
// this branch either cannot be taken (for example '#if false'), or should
// not be taken in this round.
void conditionalCompilationCondition(bool Unreachable);
void conditionalCompilationStart(bool Unreachable);
void conditionalCompilationAlternative();
void conditionalCompilationEnd();
bool isOnNewLine(const FormatToken &FormatTok);
// Compute hash of the current preprocessor branch.
// This is used to identify the different branches, and thus track if block
// open and close in the same branch.
size_t computePPHash() const;
// FIXME: We are constantly running into bugs where Line.Level is incorrectly
// subtracted from beyond 0. Introduce a method to subtract from Line.Level
// and use that everywhere in the Parser.
std::unique_ptr<UnwrappedLine> Line;
// Comments are sorted into unwrapped lines by whether they are in the same
// line as the previous token, or not. If not, they belong to the next token.
// Since the next token might already be in a new unwrapped line, we need to
// store the comments belonging to that token.
SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
FormatToken *FormatTok;
bool MustBreakBeforeNextToken;
// The parsed lines. Only added to through \c CurrentLines.
SmallVector<UnwrappedLine, 8> Lines;
// Preprocessor directives are parsed out-of-order from other unwrapped lines.
// Thus, we need to keep a list of preprocessor directives to be reported
// after an unwrapped line that has been started was finished.
SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
// New unwrapped lines are added via CurrentLines.
// Usually points to \c &Lines. While parsing a preprocessor directive when
// there is an unfinished previous unwrapped line, will point to
// \c &PreprocessorDirectives.
SmallVectorImpl<UnwrappedLine> *CurrentLines;
// We store for each line whether it must be a declaration depending on
// whether we are in a compound statement or not.
llvm::BitVector DeclarationScopeStack;
const FormatStyle &Style;
const AdditionalKeywords &Keywords;
llvm::Regex CommentPragmasRegex;
FormatTokenSource *Tokens;
UnwrappedLineConsumer &Callback;
// FIXME: This is a temporary measure until we have reworked the ownership
// of the format tokens. The goal is to have the actual tokens created and
// owned outside of and handed into the UnwrappedLineParser.
ArrayRef<FormatToken *> AllTokens;
// Keeps a stack of the states of nested control statements (true if the
// statement contains more than some predefined number of nested statements).
SmallVector<bool, 8> NestedTooDeep;
// Represents preprocessor branch type, so we can find matching
// #if/#else/#endif directives.
enum PPBranchKind {
PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0
};
struct PPBranch {
PPBranch(PPBranchKind Kind, size_t Line) : Kind(Kind), Line(Line) {}
PPBranchKind Kind;
size_t Line;
};
// Keeps a stack of currently active preprocessor branching directives.
SmallVector<PPBranch, 16> PPStack;
// The \c UnwrappedLineParser re-parses the code for each combination
// of preprocessor branches that can be taken.
// To that end, we take the same branch (#if, #else, or one of the #elif
// branches) for each nesting level of preprocessor branches.
// \c PPBranchLevel stores the current nesting level of preprocessor
// branches during one pass over the code.
int PPBranchLevel;
// Contains the current branch (#if, #else or one of the #elif branches)
// for each nesting level.
SmallVector<int, 8> PPLevelBranchIndex;
// Contains the maximum number of branches at each nesting level.
SmallVector<int, 8> PPLevelBranchCount;
// Contains the number of branches per nesting level we are currently
// in while parsing a preprocessor branch sequence.
// This is used to update PPLevelBranchCount at the end of a branch
// sequence.
std::stack<int> PPChainBranchIndex;
// Include guard search state. Used to fixup preprocessor indent levels
// so that include guards do not participate in indentation.
enum IncludeGuardState {
IG_Inited, // Search started, looking for #ifndef.
IG_IfNdefed, // #ifndef found, IncludeGuardToken points to condition.
IG_Defined, // Matching #define found, checking other requirements.
IG_Found, // All requirements met, need to fix indents.
IG_Rejected, // Search failed or never started.
};
// Current state of include guard search.
IncludeGuardState IncludeGuard;
// Points to the #ifndef condition for a potential include guard. Null unless
// IncludeGuardState == IG_IfNdefed.
FormatToken *IncludeGuardToken;
// Contains the first start column where the source begins. This is zero for
// normal source code and may be nonzero when formatting a code fragment that
// does not start at the beginning of the file.
unsigned FirstStartColumn;
friend class ScopedLineState;
friend class CompoundStatementIndenter;
};
struct UnwrappedLineNode {
UnwrappedLineNode() : Tok(nullptr) {}
UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
FormatToken *Tok;
SmallVector<UnwrappedLine, 0> Children;
};
inline UnwrappedLine::UnwrappedLine()
: Level(0), InPPDirective(false), MustBeDeclaration(false),
MatchingOpeningBlockLineIndex(kInvalidIndex) {}
} // end namespace format
} // end namespace clang
#endif
|