1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
|
//===--- Forest.cpp - Parse forest ------------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang-pseudo/Forest.h"
#include "clang-pseudo/Token.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/FormatVariadic.h"
namespace clang {
namespace pseudo {
void ForestNode::RecursiveIterator::operator++() {
auto C = Cur->children();
// Try to find a child of the current node to descend into.
for (unsigned I = 0; I < C.size(); ++I) {
if (Seen.insert(C[I]).second) {
Stack.push_back({Cur, I});
Cur = C[I];
return;
}
}
// Try to find a sibling af an ancestor to advance to.
for (; !Stack.empty(); Stack.pop_back()) {
C = Stack.back().Parent->children();
unsigned &Index = Stack.back().ChildIndex;
while (++Index < C.size()) {
if (Seen.insert(C[Index]).second) {
Cur = C[Index];
return;
}
}
}
Cur = nullptr;
}
llvm::iterator_range<ForestNode::RecursiveIterator>
ForestNode::descendants() const {
return {RecursiveIterator(this), RecursiveIterator()};
}
std::string ForestNode::dump(const Grammar &G) const {
switch (kind()) {
case Ambiguous:
return llvm::formatv("{0} := <ambiguous>", G.symbolName(symbol()));
case Terminal:
return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()),
startTokenIndex());
case Sequence:
return G.dumpRule(rule());
case Opaque:
return llvm::formatv("{0} := <opaque>", G.symbolName(symbol()));
}
llvm_unreachable("Unhandled node kind!");
}
std::string ForestNode::dumpRecursive(const Grammar &G,
bool Abbreviated) const {
using llvm::formatv;
Token::Index MaxToken = 0;
// Count visits of nodes so we can mark those seen multiple times.
llvm::DenseMap<const ForestNode *, /*VisitCount*/ unsigned> VisitCounts;
std::function<void(const ForestNode *)> CountVisits =
[&](const ForestNode *P) {
MaxToken = std::max(MaxToken, P->startTokenIndex());
if (VisitCounts[P]++ > 0)
return; // Don't count children as multiply visited.
if (P->kind() == Ambiguous)
llvm::for_each(P->alternatives(), CountVisits);
else if (P->kind() == Sequence)
llvm::for_each(P->elements(), CountVisits);
};
CountVisits(this);
unsigned IndexWidth = std::max(3, (int)std::to_string(MaxToken).size());
// e.g. "[{0,4}, {1,4})" if MaxToken is 5742.
std::string RangeFormat = formatv("[{{0,{0}}, {{1,{0}}) ", IndexWidth);
// The box-drawing characters that should be added as a child is rendered.
struct LineDecoration {
std::string Prefix; // Prepended to every line.
llvm::StringRef First; // added to the child's line.
llvm::StringRef Subsequent; // added to descendants' lines.
};
// We print a "#<id>" for nonterminal forest nodes that are being dumped
// multiple times.
llvm::DenseMap<const ForestNode *, size_t> ReferenceIds;
std::string Result;
constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max();
std::function<void(const ForestNode *, Token::Index, llvm::Optional<SymbolID>,
LineDecoration &LineDec)>
Dump = [&](const ForestNode *P, Token::Index End,
llvm::Optional<SymbolID> ElidedParent,
LineDecoration LineDec) {
bool SharedNode = VisitCounts.find(P)->getSecond() > 1;
llvm::ArrayRef<const ForestNode *> Children;
auto EndOfElement = [&](size_t ChildIndex) {
return ChildIndex + 1 == Children.size()
? End
: Children[ChildIndex + 1]->startTokenIndex();
};
if (P->kind() == Ambiguous) {
Children = P->alternatives();
} else if (P->kind() == Sequence) {
Children = P->elements();
if (Abbreviated) {
// Abbreviate chains of trivial sequence nodes.
// A := B, B := C, C := D, D := X Y Z
// becomes
// A~D := X Y Z
//
// We can't hide nodes that appear multiple times in the tree,
// because we need to call out their identity with IDs.
if (Children.size() == 1 && !SharedNode) {
assert(Children[0]->startTokenIndex() == P->startTokenIndex() &&
EndOfElement(0) == End);
return Dump(Children[0], End,
/*ElidedParent=*/ElidedParent.value_or(P->symbol()),
LineDec);
}
}
}
if (End == KEnd)
Result += formatv(RangeFormat.c_str(), P->startTokenIndex(), "end");
else
Result += formatv(RangeFormat.c_str(), P->startTokenIndex(), End);
Result += LineDec.Prefix;
Result += LineDec.First;
if (ElidedParent) {
Result += G.symbolName(*ElidedParent);
Result += "~";
}
if (SharedNode && P->kind() != ForestNode::Terminal) {
auto It = ReferenceIds.try_emplace(P, ReferenceIds.size() + 1);
bool First = It.second;
unsigned ID = It.first->second;
// The first time, print as #1. Later, =#1.
if (First) {
Result += formatv("{0} #{1}", P->dump(G), ID);
} else {
Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID);
Children = {}; // Don't walk the children again.
}
} else {
Result.append(P->dump(G));
}
Result.push_back('\n');
auto OldPrefixSize = LineDec.Prefix.size();
LineDec.Prefix += LineDec.Subsequent;
for (size_t I = 0; I < Children.size(); ++I) {
if (I == Children.size() - 1) {
LineDec.First = "└─";
LineDec.Subsequent = " ";
} else {
LineDec.First = "├─";
LineDec.Subsequent = "│ ";
}
Dump(Children[I], P->kind() == Sequence ? EndOfElement(I) : End,
llvm::None, LineDec);
}
LineDec.Prefix.resize(OldPrefixSize);
};
LineDecoration LineDec;
Dump(this, KEnd, llvm::None, LineDec);
return Result;
}
llvm::ArrayRef<ForestNode>
ForestArena::createTerminals(const TokenStream &Code) {
ForestNode *Terminals = Arena.Allocate<ForestNode>(Code.tokens().size());
size_t Index = 0;
for (const auto &T : Code.tokens()) {
new (&Terminals[Index])
ForestNode(ForestNode::Terminal, tokenSymbol(T.Kind),
/*Start=*/Index, /*TerminalData*/ 0);
++Index;
}
NodeCount = Index;
return llvm::makeArrayRef(Terminals, Index);
}
} // namespace pseudo
} // namespace clang
|