1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
|
//===--- DirectiveTree.h - Find and strip preprocessor directives *- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The pseudoparser tries to match a token stream to the C++ grammar.
// Preprocessor #defines and other directives are not part of this grammar, and
// should be removed before the file can be parsed.
//
// Conditional blocks like #if...#else...#endif are particularly tricky, as
// simply stripping the directives may not produce a grammatical result:
//
// return
// #ifndef DEBUG
// 1
// #else
// 0
// #endif
// ;
//
// This header supports analyzing and removing the directives in a source file.
//
//===----------------------------------------------------------------------===//
#ifndef CLANG_PSEUDO_DIRECTIVETREE_H
#define CLANG_PSEUDO_DIRECTIVETREE_H
#include "clang-pseudo/Token.h"
#include "clang/Basic/TokenKinds.h"
#include <vector>
namespace clang {
class LangOptions;
namespace pseudo {
/// Describes the structure of a source file, as seen by the preprocessor.
///
/// The structure is a tree, whose leaves are plain source code and directives,
/// and whose internal nodes are #if...#endif sections.
///
/// (root)
/// |-+ Directive #include <stdio.h>
/// |-+ Code int main() {
/// | ` printf("hello, ");
/// |-+ Conditional -+ Directive #ifndef NDEBUG
/// | |-+ Code printf("debug\n");
/// | |-+ Directive #else
/// | |-+ Code printf("production\n");
/// | `-+ Directive #endif
/// |-+ Code return 0;
/// ` }
///
/// Unlike the clang preprocessor, we model the full tree explicitly.
/// This class does not recognize macro usage, only directives.
struct DirectiveTree {
/// A range of code (and possibly comments) containing no directives.
struct Code {
Token::Range Tokens;
};
/// A preprocessor directive.
struct Directive {
/// Raw tokens making up the directive, starting with `#`.
Token::Range Tokens;
clang::tok::PPKeywordKind Kind = clang::tok::pp_not_keyword;
};
/// A preprocessor conditional section.
///
/// This starts with an #if, #ifdef, #ifndef etc directive.
/// It covers all #else branches, and spans until the matching #endif.
struct Conditional {
/// The sequence of directives that introduce top-level alternative parses.
///
/// The first branch will have an #if type directive.
/// Subsequent branches will have #else type directives.
std::vector<std::pair<Directive, DirectiveTree>> Branches;
/// The directive terminating the conditional, should be #endif.
Directive End;
/// The index of the conditional branch we chose as active.
/// None indicates no branch was taken (e.g. #if 0 ... #endif).
/// The initial tree from `parse()` has no branches marked as taken.
/// See `chooseConditionalBranches()`.
llvm::Optional<unsigned> Taken;
};
/// Some piece of the file. {One of Code, Directive, Conditional}.
class Chunk; // Defined below.
std::vector<Chunk> Chunks;
/// Extract preprocessor structure by examining the raw tokens.
static DirectiveTree parse(const TokenStream &);
/// Produce a parseable token stream by stripping all directive tokens.
///
/// Conditional sections are replaced by the taken branch, if any.
/// This tree must describe the provided token stream.
TokenStream stripDirectives(const TokenStream &) const;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Chunk &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Code &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
const DirectiveTree::Directive &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
const DirectiveTree::Conditional &);
/// Selects a "taken" branch for each conditional directive in the file.
///
/// The choice is somewhat arbitrary, but aims to produce a useful parse:
/// - idioms like `#if 0` are respected
/// - we avoid paths that reach `#error`
/// - we try to maximize the amount of code seen
/// The choice may also be "no branch taken".
///
/// Choices are also made for conditionals themselves inside not-taken branches:
/// #if 1 // taken!
/// #else // not taken
/// #if 1 // taken!
/// #endif
/// #endif
///
/// The choices are stored in Conditional::Taken nodes.
void chooseConditionalBranches(DirectiveTree &, const TokenStream &Code);
// FIXME: This approximates std::variant<Code, Directive, Conditional>.
// Switch once we can use C++17.
class DirectiveTree::Chunk {
public:
enum Kind { K_Empty, K_Code, K_Directive, K_Conditional };
Kind kind() const {
return CodeVariant ? K_Code
: DirectiveVariant ? K_Directive
: ConditionalVariant ? K_Conditional
: K_Empty;
}
Chunk() = delete;
Chunk(const Chunk &) = delete;
Chunk(Chunk &&) = default;
Chunk &operator=(const Chunk &) = delete;
Chunk &operator=(Chunk &&) = default;
~Chunk() = default;
// T => Chunk constructor.
Chunk(Code C) : CodeVariant(std::move(C)) {}
Chunk(Directive C) : DirectiveVariant(std::move(C)) {}
Chunk(Conditional C) : ConditionalVariant(std::move(C)) {}
// Chunk => T& and const T& conversions.
#define CONVERSION(CONST, V) \
explicit operator CONST V &() CONST { return *V##Variant; }
CONVERSION(const, Code);
CONVERSION(, Code);
CONVERSION(const, Directive);
CONVERSION(, Directive);
CONVERSION(const, Conditional);
CONVERSION(, Conditional);
#undef CONVERSION
private:
// Wasteful, a union variant would be better!
llvm::Optional<Code> CodeVariant;
llvm::Optional<Directive> DirectiveVariant;
llvm::Optional<Conditional> ConditionalVariant;
};
} // namespace pseudo
} // namespace clang
#endif // CLANG_PSEUDO_DIRECTIVETREE_H
|