1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
|
//===--- Main.cpp - Compile BNF grammar -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This is a tool to compile a BNF grammar, it is used by the build system to
// generate a necessary data bits to statically construct core pieces (Grammar,
// LRTable etc) of the LR parser.
//
//===----------------------------------------------------------------------===//
#include "clang-pseudo/grammar/Grammar.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ToolOutputFile.h"
#include <algorithm>
using llvm::cl::desc;
using llvm::cl::init;
using llvm::cl::opt;
using llvm::cl::Required;
using llvm::cl::value_desc;
using llvm::cl::values;
namespace {
enum EmitType {
EmitSymbolList,
EmitGrammarContent,
};
opt<std::string> Grammar("grammar", desc("Parse a BNF grammar file."),
Required);
opt<EmitType>
Emit(desc("which information to emit:"),
values(clEnumValN(EmitSymbolList, "emit-symbol-list",
"Print nonterminal symbols (default)"),
clEnumValN(EmitGrammarContent, "emit-grammar-content",
"Print the BNF grammar content as a string")));
opt<std::string> OutputFilename("o", init("-"), desc("Output"),
value_desc("file"));
std::string readOrDie(llvm::StringRef Path) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
llvm::MemoryBuffer::getFile(Path);
if (std::error_code EC = Text.getError()) {
llvm::errs() << "Error: can't read grammar file '" << Path
<< "': " << EC.message() << "\n";
::exit(1);
}
return Text.get()->getBuffer().str();
}
} // namespace
namespace clang {
namespace pseudo {
namespace {
// Mangles a symbol name into a valid identifier.
//
// These follow names in the grammar fairly closely:
// nonterminal: `ptr-declarator` becomes `ptr_declarator`;
// punctuator: `,` becomes `COMMA`;
// keyword: `INT` becomes `INT`;
// terminal: `IDENTIFIER` becomes `IDENTIFIER`;
std::string mangleSymbol(SymbolID SID, const Grammar &G) {
static auto &TokNames = *new std::vector<std::string>{
#define TOK(X) llvm::StringRef(#X).upper(),
#define KEYWORD(Keyword, Condition) llvm::StringRef(#Keyword).upper(),
#include "clang/Basic/TokenKinds.def"
};
if (isToken(SID))
return TokNames[symbolToToken(SID)];
std::string Name = G.symbolName(SID).str();
// translation-unit -> translation_unit
std::replace(Name.begin(), Name.end(), '-', '_');
return Name;
}
// Mangles the RHS of a rule definition into a valid identifier.
//
// These are unique only for a fixed LHS.
// e.g. for the grammar rule `ptr-declarator := ptr-operator ptr-declarator`,
// it is `ptr_operator__ptr_declarator`.
std::string mangleRule(RuleID RID, const Grammar &G) {
const auto &R = G.lookupRule(RID);
std::string MangleName = mangleSymbol(R.seq().front(), G);
for (SymbolID S : R.seq().drop_front()) {
MangleName.append("__");
MangleName.append(mangleSymbol(S, G));
}
return MangleName;
}
} // namespace
} // namespace pseudo
} // namespace clang
int main(int argc, char *argv[]) {
llvm::cl::ParseCommandLineOptions(argc, argv, "");
std::string GrammarText = readOrDie(Grammar);
std::vector<std::string> Diags;
auto G = clang::pseudo::Grammar::parseBNF(GrammarText, Diags);
if (!Diags.empty()) {
llvm::errs() << llvm::join(Diags, "\n");
return 1;
}
std::error_code EC;
llvm::ToolOutputFile Out{OutputFilename, EC, llvm::sys::fs::OF_None};
if (EC) {
llvm::errs() << EC.message() << '\n';
return 1;
}
switch (Emit) {
case EmitSymbolList:
Out.os() << R"cpp(
#ifndef NONTERMINAL
#define NONTERMINAL(NAME, ID)
#endif
#ifndef RULE
#define RULE(LHS, RHS, ID)
#endif
#ifndef EXTENSION
#define EXTENSION(NAME, ID)
#endif
)cpp";
for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size();
++ID) {
Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n",
clang::pseudo::mangleSymbol(ID, G), ID);
for (const clang::pseudo::Rule &R : G.rulesFor(ID)) {
clang::pseudo::RuleID RID = &R - G.table().Rules.data();
Out.os() << llvm::formatv("RULE({0}, {1}, {2})\n",
clang::pseudo::mangleSymbol(R.Target, G),
clang::pseudo::mangleRule(RID, G), RID);
}
}
for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/;
EID < G.table().AttributeValues.size(); ++EID) {
llvm::StringRef Name = G.table().AttributeValues[EID];
assert(!Name.empty());
Out.os() << llvm::formatv("EXTENSION({0}, {1})\n", Name, EID);
}
Out.os() << R"cpp(
#undef NONTERMINAL
#undef RULE
#undef EXTENSION
)cpp";
break;
case EmitGrammarContent:
for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) {
Out.os() << '"';
Out.os().write_escaped((Line + "\n").str());
Out.os() << "\"\n";
}
break;
}
Out.keep();
return 0;
}
|