File: DirectiveTree.h

package info (click to toggle)
llvm-toolchain-15 1%3A15.0.6-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,554,644 kB
  • sloc: cpp: 5,922,452; ansic: 1,012,136; asm: 674,362; python: 191,568; objc: 73,855; f90: 42,327; lisp: 31,913; pascal: 11,973; javascript: 10,144; sh: 9,421; perl: 7,447; ml: 5,527; awk: 3,523; makefile: 2,520; xml: 885; cs: 573; fortran: 567
file content (172 lines) | stat: -rw-r--r-- 6,388 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
//===--- DirectiveTree.h - Find and strip preprocessor directives *- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The pseudoparser tries to match a token stream to the C++ grammar.
// Preprocessor #defines and other directives are not part of this grammar, and
// should be removed before the file can be parsed.
//
// Conditional blocks like #if...#else...#endif are particularly tricky, as
// simply stripping the directives may not produce a grammatical result:
//
//   return
//     #ifndef DEBUG
//       1
//     #else
//       0
//     #endif
//       ;
//
// This header supports analyzing and removing the directives in a source file.
//
//===----------------------------------------------------------------------===//

#ifndef CLANG_PSEUDO_DIRECTIVETREE_H
#define CLANG_PSEUDO_DIRECTIVETREE_H

#include "clang-pseudo/Token.h"
#include "clang/Basic/TokenKinds.h"
#include <vector>

namespace clang {
class LangOptions;
namespace pseudo {

/// Describes the structure of a source file, as seen by the preprocessor.
///
/// The structure is a tree, whose leaves are plain source code and directives,
/// and whose internal nodes are #if...#endif sections.
///
/// (root)
/// |-+ Directive                    #include <stdio.h>
/// |-+ Code                         int main() {
/// | `                                printf("hello, ");
/// |-+ Conditional -+ Directive     #ifndef NDEBUG
/// | |-+ Code                         printf("debug\n");
/// | |-+ Directive                  #else
/// | |-+ Code                         printf("production\n");
/// | `-+ Directive                  #endif
/// |-+ Code                           return 0;
///   `                              }
///
/// Unlike the clang preprocessor, we model the full tree explicitly.
/// This class does not recognize macro usage, only directives.
struct DirectiveTree {
  /// A range of code (and possibly comments) containing no directives.
  struct Code {
    Token::Range Tokens;
  };
  /// A preprocessor directive.
  struct Directive {
    /// Raw tokens making up the directive, starting with `#`.
    Token::Range Tokens;
    clang::tok::PPKeywordKind Kind = clang::tok::pp_not_keyword;
  };
  /// A preprocessor conditional section.
  ///
  /// This starts with an #if, #ifdef, #ifndef etc directive.
  /// It covers all #else branches, and spans until the matching #endif.
  struct Conditional {
    /// The sequence of directives that introduce top-level alternative parses.
    ///
    /// The first branch will have an #if type directive.
    /// Subsequent branches will have #else type directives.
    std::vector<std::pair<Directive, DirectiveTree>> Branches;
    /// The directive terminating the conditional, should be #endif.
    Directive End;
    /// The index of the conditional branch we chose as active.
    /// None indicates no branch was taken (e.g. #if 0 ... #endif).
    /// The initial tree from `parse()` has no branches marked as taken.
    /// See `chooseConditionalBranches()`.
    llvm::Optional<unsigned> Taken;
  };

  /// Some piece of the file. {One of Code, Directive, Conditional}.
  class Chunk; // Defined below.
  std::vector<Chunk> Chunks;

  /// Extract preprocessor structure by examining the raw tokens.
  static DirectiveTree parse(const TokenStream &);

  /// Produce a parseable token stream by stripping all directive tokens.
  ///
  /// Conditional sections are replaced by the taken branch, if any.
  /// This tree must describe the provided token stream.
  TokenStream stripDirectives(const TokenStream &) const;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Chunk &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Code &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
                              const DirectiveTree::Directive &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
                              const DirectiveTree::Conditional &);

/// Selects a "taken" branch for each conditional directive in the file.
///
/// The choice is somewhat arbitrary, but aims to produce a useful parse:
///  - idioms like `#if 0` are respected
///  - we avoid paths that reach `#error`
///  - we try to maximize the amount of code seen
/// The choice may also be "no branch taken".
///
/// Choices are also made for conditionals themselves inside not-taken branches:
///   #if 1 // taken!
///   #else // not taken
///      #if 1 // taken!
///      #endif
///   #endif
///
/// The choices are stored in Conditional::Taken nodes.
void chooseConditionalBranches(DirectiveTree &, const TokenStream &Code);

// FIXME: This approximates std::variant<Code, Directive, Conditional>.
//         Switch once we can use C++17.
class DirectiveTree::Chunk {
public:
  enum Kind { K_Empty, K_Code, K_Directive, K_Conditional };
  Kind kind() const {
    return CodeVariant          ? K_Code
           : DirectiveVariant   ? K_Directive
           : ConditionalVariant ? K_Conditional
                                : K_Empty;
  }

  Chunk() = delete;
  Chunk(const Chunk &) = delete;
  Chunk(Chunk &&) = default;
  Chunk &operator=(const Chunk &) = delete;
  Chunk &operator=(Chunk &&) = default;
  ~Chunk() = default;

  // T => Chunk constructor.
  Chunk(Code C) : CodeVariant(std::move(C)) {}
  Chunk(Directive C) : DirectiveVariant(std::move(C)) {}
  Chunk(Conditional C) : ConditionalVariant(std::move(C)) {}

  // Chunk => T& and const T& conversions.
#define CONVERSION(CONST, V)                                                   \
  explicit operator CONST V &() CONST { return *V##Variant; }
  CONVERSION(const, Code);
  CONVERSION(, Code);
  CONVERSION(const, Directive);
  CONVERSION(, Directive);
  CONVERSION(const, Conditional);
  CONVERSION(, Conditional);
#undef CONVERSION

private:
  // Wasteful, a union variant would be better!
  llvm::Optional<Code> CodeVariant;
  llvm::Optional<Directive> DirectiveVariant;
  llvm::Optional<Conditional> ConditionalVariant;
};

} // namespace pseudo
} // namespace clang

#endif // CLANG_PSEUDO_DIRECTIVETREE_H