1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
|
//===-- HTMLForest.cpp - browser-based parse forest explorer
//---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The plain text forest node dump (clang-pseudo -print-forest) is useful but
// hard to reconcile with the code being examined, especially when it is large.
//
// HTMLForest produces a self-contained HTML file containing both the code and
// the forest representation, linking them interactively with javascript.
// At any given time, a single parse tree is shown (ambiguities resolved).
// The user can switch between ambiguous alternatives.
//
// +-------+---------------+
// | | +-----+|
// | #tree | #code |#info||
// | | +-----+|
// | | |
// +-------+---------------+
//
// #tree is a hierarchical view of the nodes (nested <ul>s), like -print-forest.
// (It is a simple tree, not a DAG, because ambiguities have been resolved).
// Like -print-forest, trivial sequences are collapsed (expression~IDENTIFIER).
//
// #code is the source code, annotated with <span>s marking the node ranges.
// These spans are usually invisible (exception: ambiguities are marked), but
// they are used to show and change the selection.
//
// #info is a floating box that shows details of the currently selected node:
// - rule (for sequence nodes). Abbreviated rules are also shown.
// - alternatives (for ambiguous nodes). The user can choose an alternative.
// - ancestors. The parent nodes show how this node fits in translation-unit.
//
// There are two types of 'active' node:
// - *highlight* is what the cursor is over, and is colored blue.
// Near ancestors are shaded faintly (onion-skin) to show local structure.
// - *selection* is set by clicking.
// The #info box shows the selection, and selected nodes have a dashed ring.
//
//===----------------------------------------------------------------------===//
#include "clang-pseudo/Forest.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/raw_ostream.h"
namespace clang {
namespace pseudo {
namespace {
// Defines const char HTMLForest_css[] = "...contents of HTMLForest.css..."; etc
#include "HTMLForestResources.inc"
struct Writer {
llvm::raw_ostream &Out;
const Grammar &G;
const ForestNode &Root;
const TokenStream &Stream;
void write() {
Out << "<!doctype html>\n";
tag("html", [&] {
tag("head", [&] {
tag("title", [&] { Out << "HTMLForest"; });
tag("script", [&] { Out << HTMLForest_js; });
tag("style", [&] { Out << HTMLForest_css; });
tag("script", [&] {
Out << "var forest=";
writeForestJSON();
Out << ";";
});
tag("pre id='hidden-code' hidden", [&] { writeCode(); });
});
tag("body", [&] { Out << HTMLForest_html; });
});
}
void writeCode();
void writeForestJSON();
void tag(llvm::StringRef Opener, llvm::function_ref<void()> Body) {
Out << "<" << Opener << ">";
Body();
Out << "</" << Opener.split(' ').first << ">\n";
}
};
void Writer::writeCode() {
// This loop (whitespace logic) is cribbed from TokenStream::Print.
bool FirstToken = true;
unsigned LastLine = -1;
StringRef LastText;
for (const auto &T : Stream.tokens()) {
StringRef Text = T.text();
if (FirstToken) {
FirstToken = false;
} else if (T.Line == LastLine) {
if (LastText.data() + LastText.size() != Text.data())
Out << ' ';
} else {
Out << " \n"; // Extra space aids selection.
Out.indent(T.Indent);
}
Out << "<span class='token' id='t" << Stream.index(T) << "'>";
llvm::printHTMLEscaped(Text, Out);
Out << "</span>";
LastLine = T.Line;
LastText = Text;
}
if (!FirstToken)
Out << '\n';
}
// Writes a JSON array of forest nodes. Items are e.g.:
// {kind:'sequence', symbol:'compound-stmt', children:[5,8,33],
// rule:'compound-stmt := ...'} {kind:'terminal', symbol:'VOID', token:'t52'}
// {kind:'ambiguous', symbol:'type-specifier', children:[3,100] selected:3}
// {kind:'opaque', symbol:'statement-seq', firstToken:'t5', lastToken:'t6'}
void Writer::writeForestJSON() {
// This is the flat array of nodes: the index into this array is the node ID.
std::vector<std::pair<const ForestNode *, /*End*/ Token::Index>> Sequence;
llvm::DenseMap<const ForestNode *, unsigned> Index;
auto AssignID = [&](const ForestNode *N, Token::Index End) -> unsigned {
auto R = Index.try_emplace(N, Sequence.size());
if (R.second)
Sequence.push_back({N, End});
return R.first->second;
};
AssignID(&Root, Stream.tokens().size());
auto TokenID = [](Token::Index I) { return ("t" + llvm::Twine(I)).str(); };
llvm::json::OStream Out(this->Out, 2);
Out.array([&] {
for (unsigned I = 0; I < Sequence.size(); ++I) {
const ForestNode *N = Sequence[I].first;
Token::Index End = Sequence[I].second;
Out.object([&] {
Out.attribute("symbol", G.symbolName(N->symbol()));
switch (N->kind()) {
case ForestNode::Terminal:
Out.attribute("kind", "terminal");
Out.attribute("token", TokenID(N->startTokenIndex()));
break;
case ForestNode::Sequence:
Out.attribute("kind", "sequence");
Out.attribute("rule", G.dumpRule(N->rule()));
break;
case ForestNode::Ambiguous:
Out.attribute("kind", "ambiguous");
Out.attribute("selected", AssignID(N->children().front(), End));
break;
case ForestNode::Opaque:
Out.attribute("kind", "opaque");
Out.attribute("firstToken", TokenID(N->startTokenIndex()));
// [firstToken, lastToken] is a closed range.
// If empty, lastToken is omitted.
if (N->startTokenIndex() != End)
Out.attribute("lastToken", TokenID(End - 1));
break;
}
auto Children = N->children();
if (!Children.empty())
Out.attributeArray("children", [&] {
for (unsigned I = 0; I < Children.size(); ++I)
Out.value(AssignID(Children[I],
I + 1 == Children.size()
? End
: Children[I + 1]->startTokenIndex()));
});
});
}
});
}
} // namespace
// We only accept the derived stream here.
// FIXME: allow the original stream instead?
void writeHTMLForest(llvm::raw_ostream &OS, const Grammar &G,
const ForestNode &Root, const TokenStream &Stream) {
Writer{OS, G, Root, Stream}.write();
}
} // namespace pseudo
} // namespace clang
|