1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
|
//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file defines the log symbolizer markup data model and parser.
///
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/Symbolize/Markup.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
namespace llvm {
namespace symbolize {
// Matches the following:
// "\033[0m"
// "\033[1m"
// "\033[30m" -- "\033[37m"
static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
MarkupParser::MarkupParser(StringSet<> MultilineTags)
: MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
return Str.take_front(Pos - Str.begin());
}
static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
Str = Str.drop_front(Pos - Str.begin());
}
void MarkupParser::parseLine(StringRef Line) {
Buffer.clear();
NextIdx = 0;
FinishedMultiline.clear();
this->Line = Line;
}
std::optional<MarkupNode> MarkupParser::nextNode() {
// Pull something out of the buffer if possible.
if (!Buffer.empty()) {
if (NextIdx < Buffer.size())
return std::move(Buffer[NextIdx++]);
NextIdx = 0;
Buffer.clear();
}
// The buffer is empty, so parse the next bit of the line.
if (Line.empty())
return std::nullopt;
if (!InProgressMultiline.empty()) {
if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
llvm::append_range(InProgressMultiline, *MultilineEnd);
assert(FinishedMultiline.empty() &&
"At most one multi-line element can be finished at a time.");
FinishedMultiline.swap(InProgressMultiline);
// Parse the multi-line element as if it were contiguous.
advanceTo(Line, MultilineEnd->end());
return *parseElement(FinishedMultiline);
}
// The whole line is part of the multi-line element.
llvm::append_range(InProgressMultiline, Line);
Line = Line.drop_front(Line.size());
return std::nullopt;
}
// Find the first valid markup element, if any.
if (std::optional<MarkupNode> Element = parseElement(Line)) {
parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
Buffer.push_back(std::move(*Element));
advanceTo(Line, Element->Text.end());
return nextNode();
}
// Since there were no valid elements remaining, see if the line opens a
// multi-line element.
if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
// Emit any text before the element.
parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
// Begin recording the multi-line element.
llvm::append_range(InProgressMultiline, *MultilineBegin);
Line = Line.drop_front(Line.size());
return nextNode();
}
// The line doesn't contain any more markup elements, so emit it as text.
parseTextOutsideMarkup(Line);
Line = Line.drop_front(Line.size());
return nextNode();
}
void MarkupParser::flush() {
Buffer.clear();
NextIdx = 0;
Line = {};
if (InProgressMultiline.empty())
return;
FinishedMultiline.swap(InProgressMultiline);
parseTextOutsideMarkup(FinishedMultiline);
}
// Finds and returns the next valid markup element in the given line. Returns
// std::nullopt if the line contains no valid elements.
std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
while (true) {
// Find next element using begin and end markers.
size_t BeginPos = Line.find("{{{");
if (BeginPos == StringRef::npos)
return std::nullopt;
size_t EndPos = Line.find("}}}", BeginPos + 3);
if (EndPos == StringRef::npos)
return std::nullopt;
EndPos += 3;
MarkupNode Element;
Element.Text = Line.slice(BeginPos, EndPos);
Line = Line.substr(EndPos);
// Parse tag.
StringRef Content = Element.Text.drop_front(3).drop_back(3);
StringRef FieldsContent;
std::tie(Element.Tag, FieldsContent) = Content.split(':');
if (Element.Tag.empty())
continue;
// Parse fields.
if (!FieldsContent.empty())
FieldsContent.split(Element.Fields, ":");
else if (Content.back() == ':')
Element.Fields.push_back(FieldsContent);
return Element;
}
}
static MarkupNode textNode(StringRef Text) {
MarkupNode Node;
Node.Text = Text;
return Node;
}
// Parses a region of text known to be outside any markup elements. Such text
// may still contain SGR control codes, so the region is further subdivided into
// control codes and true text regions.
void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
if (Text.empty())
return;
SmallVector<StringRef> Matches;
while (SGRSyntax.match(Text, &Matches)) {
// Emit any text before the SGR element.
if (Matches.begin()->begin() != Text.begin())
Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
Buffer.push_back(textNode(*Matches.begin()));
advanceTo(Text, Matches.begin()->end());
}
if (!Text.empty())
Buffer.push_back(textNode(Text));
}
// Given that a line doesn't contain any valid markup, see if it ends with the
// start of a multi-line element. If so, returns the beginning.
std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
// A multi-line begin marker must be the last one on the line.
size_t BeginPos = Line.rfind("{{{");
if (BeginPos == StringRef::npos)
return std::nullopt;
size_t BeginTagPos = BeginPos + 3;
// If there are any end markers afterwards, the begin marker cannot belong to
// a multi-line element.
size_t EndPos = Line.find("}}}", BeginTagPos);
if (EndPos != StringRef::npos)
return std::nullopt;
// Check whether the tag is registered multi-line.
size_t EndTagPos = Line.find(':', BeginTagPos);
if (EndTagPos == StringRef::npos)
return std::nullopt;
StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
if (!MultilineTags.contains(Tag))
return std::nullopt;
return Line.substr(BeginPos);
}
// See if the line begins with the ending of an in-progress multi-line element.
// If so, return the ending.
std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
size_t EndPos = Line.find("}}}");
if (EndPos == StringRef::npos)
return std::nullopt;
return Line.take_front(EndPos + 3);
}
} // end namespace symbolize
} // end namespace llvm
|