File: Bracket.cpp

package info (click to toggle)
llvm-toolchain-15 1%3A15.0.6-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,554,644 kB
  • sloc: cpp: 5,922,452; ansic: 1,012,136; asm: 674,362; python: 191,568; objc: 73,855; f90: 42,327; lisp: 31,913; pascal: 11,973; javascript: 10,144; sh: 9,421; perl: 7,447; ml: 5,527; awk: 3,523; makefile: 2,520; xml: 885; cs: 573; fortran: 567
file content (155 lines) | stat: -rw-r--r-- 5,279 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
//===--- Bracket.cpp - Analyze bracket structure --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The basic phases of our bracket matching are:
//
// 1) A simple "greedy" match looks for well-nested subsequences.
//
//    We can't fully trust the results of this, consider:
//      while (1) {   // A
//        if (true) { // B
//          break;
//      }             // C
//    Greedy matching will match B=C, when we should at least consider A=C.
//    However for the correct parts of the file, the greedy match gives the
//    right answer. It produces useful candidates for phase 2.
//
//    simplePairBrackets handles this step.
//
// 2) Try to identify places where formatting indicates that the greedy match
//    was correct. This is similar to how a human would scan a large file.
//
//    For example:
//      int foo() {      // X
//        // indented
//        while (1) {
//          // valid code
//        }
//        return bar(42);
//      }                // Y
//    We can "verify" that X..Y looks like a braced block, and the greedy match
//    tells us that substring is perfectly nested.
//    We trust the pairings of those brackets and don't examine them further.
//    However in the first example above, we do not trust B=C because the brace
//    indentation is suspect.
//
//    FIXME: implement this step.
//
// 3) Run full best-match optimization on remaining brackets.
//
//    Conceptually, this considers all possible matchings and optimizes cost:
//      - there is a cost for failing to match a bracket
//      - there is a variable cost for matching two brackets.
//        (For example if brace indentation doesn't match).
//
//    In the first example we have three alternatives, and they are ranked:
//      1) A=C, skip B
//      2) B=C, skip A
//      3) skip A, skip B, skip C
//    The cost for skipping a bracket is high, so option 3 is worst.
//    B=C costs more than A=C, because the indentation doesn't match.
//
//    It would be correct to run this step alone, but it would be too slow.
//    The implementation is dynamic programming in N^3 space and N^2 time.
//    Having earlier steps filter out most brackets is key to performance.
//
//    FIXME: implement this step.
//
//===----------------------------------------------------------------------===//

#include "clang-pseudo/Bracket.h"

namespace clang {
namespace pseudo {
namespace {

struct Bracket {
  using Index = unsigned;
  constexpr static Index None = -1;

  enum BracketKind : char { Paren, Brace, Square } Kind;
  enum Direction : bool { Open, Close } Dir;
  unsigned Line;
  unsigned Indent;
  Token::Index Tok;
  Bracket::Index Pair = None;
};

// Find brackets in the stream and convert to Bracket struct.
std::vector<Bracket> findBrackets(const TokenStream &Stream) {
  std::vector<Bracket> Brackets;
  auto Add = [&](const pseudo::Token &Tok, Bracket::BracketKind K,
                 Bracket::Direction D) {
    Brackets.push_back(
        {K, D, Tok.Line, Tok.Indent, Stream.index(Tok), Bracket::None});
  };
  for (const auto &Tok : Stream.tokens()) {
    switch (Tok.Kind) {
    case clang::tok::l_paren:
      Add(Tok, Bracket::Paren, Bracket::Open);
      break;
    case clang::tok::r_paren:
      Add(Tok, Bracket::Paren, Bracket::Close);
      break;
    case clang::tok::l_brace:
      Add(Tok, Bracket::Brace, Bracket::Open);
      break;
    case clang::tok::r_brace:
      Add(Tok, Bracket::Brace, Bracket::Close);
      break;
    case clang::tok::l_square:
      Add(Tok, Bracket::Square, Bracket::Open);
      break;
    case clang::tok::r_square:
      Add(Tok, Bracket::Square, Bracket::Close);
      break;
    default:
      break;
    }
  }
  return Brackets;
}

// Write the bracket pairings from Brackets back to Tokens.
void applyPairings(ArrayRef<Bracket> Brackets, TokenStream &Tokens) {
  for (const auto &B : Brackets)
    Tokens.tokens()[B.Tok].Pair =
        (B.Pair == Bracket::None) ? 0 : (int32_t)Brackets[B.Pair].Tok - B.Tok;
}

// Find perfect pairings (ignoring whitespace) via greedy algorithm.
// This means two brackets are paired if they match and the brackets between
// them nest perfectly, with no skipped or crossed brackets.
void simplePairBrackets(MutableArrayRef<Bracket> Brackets) {
  std::vector<unsigned> Stack;
  for (unsigned I = 0; I < Brackets.size(); ++I) {
    if (Brackets[I].Dir == Bracket::Open) {
      Stack.push_back(I);
    } else if (!Stack.empty() &&
               Brackets[Stack.back()].Kind == Brackets[I].Kind) {
      Brackets[Stack.back()].Pair = I;
      Brackets[I].Pair = Stack.back();
      Stack.pop_back();
    } else {
      // Unpaired closer, no brackets on stack are part of a perfect sequence.
      Stack.clear();
    }
  }
  // Any remaining brackets on the stack stay unpaired.
}

} // namespace

void pairBrackets(TokenStream &Stream) {
  auto Brackets = findBrackets(Stream);
  simplePairBrackets(Brackets);
  applyPairings(Brackets, Stream);
}

} // namespace pseudo
} // namespace clang