File: DirectiveTree.h

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (130 lines) | stat: -rw-r--r-- 4,991 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
//===--- DirectiveTree.h - Find and strip preprocessor directives *- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The pseudoparser tries to match a token stream to the C++ grammar.
// Preprocessor #defines and other directives are not part of this grammar, and
// should be removed before the file can be parsed.
//
// Conditional blocks like #if...#else...#endif are particularly tricky, as
// simply stripping the directives may not produce a grammatical result:
//
//   return
//     #ifndef DEBUG
//       1
//     #else
//       0
//     #endif
//       ;
//
// This header supports analyzing and removing the directives in a source file.
//
//===----------------------------------------------------------------------===//

#ifndef CLANG_PSEUDO_DIRECTIVETREE_H
#define CLANG_PSEUDO_DIRECTIVETREE_H

#include "clang-pseudo/Token.h"
#include "clang/Basic/TokenKinds.h"
#include <optional>
#include <variant>
#include <vector>

namespace clang {
namespace pseudo {

/// Describes the structure of a source file, as seen by the preprocessor.
///
/// The structure is a tree, whose leaves are plain source code and directives,
/// and whose internal nodes are #if...#endif sections.
///
/// (root)
/// |-+ Directive                    #include <stdio.h>
/// |-+ Code                         int main() {
/// | `                                printf("hello, ");
/// |-+ Conditional -+ Directive     #ifndef NDEBUG
/// | |-+ Code                         printf("debug\n");
/// | |-+ Directive                  #else
/// | |-+ Code                         printf("production\n");
/// | `-+ Directive                  #endif
/// |-+ Code                           return 0;
///   `                              }
///
/// Unlike the clang preprocessor, we model the full tree explicitly.
/// This class does not recognize macro usage, only directives.
struct DirectiveTree {
  /// A range of code (and possibly comments) containing no directives.
  struct Code {
    Token::Range Tokens;
  };
  /// A preprocessor directive.
  struct Directive {
    /// Raw tokens making up the directive, starting with `#`.
    Token::Range Tokens;
    clang::tok::PPKeywordKind Kind = clang::tok::pp_not_keyword;
  };
  /// A preprocessor conditional section.
  ///
  /// This starts with an #if, #ifdef, #ifndef etc directive.
  /// It covers all #else branches, and spans until the matching #endif.
  struct Conditional {
    /// The sequence of directives that introduce top-level alternative parses.
    ///
    /// The first branch will have an #if type directive.
    /// Subsequent branches will have #else type directives.
    std::vector<std::pair<Directive, DirectiveTree>> Branches;
    /// The directive terminating the conditional, should be #endif.
    Directive End;
    /// The index of the conditional branch we chose as active.
    /// std::nullopt indicates no branch was taken (e.g. #if 0 ... #endif).
    /// The initial tree from `parse()` has no branches marked as taken.
    /// See `chooseConditionalBranches()`.
    std::optional<unsigned> Taken;
  };

  /// Some piece of the file. {One of Code, Directive, Conditional}.
  using Chunk = std::variant<Code, Directive, Conditional>;
  std::vector<Chunk> Chunks;

  /// Extract preprocessor structure by examining the raw tokens.
  static DirectiveTree parse(const TokenStream &);

  /// Produce a parseable token stream by stripping all directive tokens.
  ///
  /// Conditional sections are replaced by the taken branch, if any.
  /// This tree must describe the provided token stream.
  TokenStream stripDirectives(const TokenStream &) const;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Code &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
                              const DirectiveTree::Directive &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
                              const DirectiveTree::Conditional &);

/// Selects a "taken" branch for each conditional directive in the file.
///
/// The choice is somewhat arbitrary, but aims to produce a useful parse:
///  - idioms like `#if 0` are respected
///  - we avoid paths that reach `#error`
///  - we try to maximize the amount of code seen
/// The choice may also be "no branch taken".
///
/// Choices are also made for conditionals themselves inside not-taken branches:
///   #if 1 // taken!
///   #else // not taken
///      #if 1 // taken!
///      #endif
///   #endif
///
/// The choices are stored in Conditional::Taken nodes.
void chooseConditionalBranches(DirectiveTree &, const TokenStream &Code);

} // namespace pseudo
} // namespace clang

#endif // CLANG_PSEUDO_DIRECTIVETREE_H