1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
|
use parser;
/**
* Error thrown when there is an error in the Markdown syntax.
*/
class MarkdownError extends Exception {
init(Str:Iter pos, Str msg) {
init { pos = pos; msg = msg; }
}
private Str:Iter pos;
private Str msg;
void message(StrBuf to) : override {
to << "Failed to parse markdown text:\n";
to << msg << "\n";
to << "The error is near the |> in the text below:\n" << pos;
}
}
// Parse a string containing Markdown text. Returns a symbolic representation of the document.
// Throws a `MarkdownError` on failure.
Document parse(Str str) {
// We require that the string ends in a newline. Extra newlines do not matter, so just add one
// always.
var result = parser(str + "\n");
if (error = result.error)
throw MarkdownError(error.pos, error.message);
unless (v = result.value)
throw InternalError("Should have got an error!");
if (result.end != str.end)
throw MarkdownError(result.end, "Not all data could be parsed.");
return v;
}
// The parser itself:
private parser : parser(backtracking recursive descent) {
start = SDocument;
Document SDocument();
SDocument => Document(content) : SElementSeq(0) content;
ElementSeq SElementSeq(Nat indent);
SElementSeq => ElementSeq() : (SElement(me, indent))*;
// Match an entire element.
// The first part matches the indentation and delegates to SElementBaseline.
void SElement(ElementSeq to, Nat indent);
SElement[20] => to : parser.special.ExactIndent(indent) - SElementBaseline(indent) -> add;
SElement[10] : "[ \t]*\n"; // Empty lines may have too little indentation.
// Match the part after the expected indentation level.
Element SElementBaseline(Nat indent);
SElementBaseline[10] => x : SExtraIndent(indent) newIndent - SElementNoIndent(newIndent) x;
// This is where we would detect things like code blocks, etc.
// Match the part after all (possibly additional) indentation has been consumed.
Element SElementNoIndent(Nat indent);
SElementNoIndent[90] => x : SHashHeading(indent) x;
SElementNoIndent[80] => x : SUList(indent) x;
SElementNoIndent[70] => x : SOList(indent) x;
SElementNoIndent[60] => x : SCode(indent) x;
SElementNoIndent[10] => x : SText(indent) x;
// Match a code block.
CodeBlock SCode(Nat indent);
SCode => CodeBlock(language) : "```" - "[^\n ]*" language - " *\n" - SCodeContent(indent, me);
void SCodeContent(Nat indent, CodeBlock to);
SCodeContent[90] : parser.special.ExactIndent(indent) - "``` *\n";
SCodeContent[50] => to : parser.special.ExactIndent(indent) - "[^\n]*" -> addLine - "\n" - SCodeContent(indent, to);
SCodeContent[10] => to : "[ \t]*\n" - "" -> addLine - SCodeContent(indent, to);
// Match a list. Note: We don't have to care about empty lines with "too little" indentation
// here since the body matches SElement, which consumes them for us.
List SUList(Nat indent);
SUList => List(false) : SUListElem(indent, me) - (parser.special.ExactIndent(indent) - SUListElem(indent, me))*;
void SUListElem(Nat indent, List list);
SUListElem => list : SUListMarker(indent) newIndent - SListElemBody(newIndent) -> add;
List SOList(Nat indent);
SOList => List(true) : SOListElem(indent, me) - (parser.special.ExactIndent(indent) - SOListElem(indent, me))*;
void SOListElem(Nat indent, List list);
SOListElem => list : SOListMarker(indent) newIndent - SListElemBody(newIndent) -> add;
ElementSeq SListElemBody(Nat indent);
SListElemBody => ElementSeq() : SElementBaseline(indent) -> add - (SElement(me, indent))*;
// Match a list marker. Returns extra indentation level.
Nat SUListMarker(Nat indent);
SUListMarker => addIndentation(indent, extra) : "[+\-*] +" extra;
Nat SOListMarker(Nat indent);
SOListMarker => addIndentation(indent, extra) : "[0-9]+\. +" extra;
// Match a part of text, after indentation. Then figure out what it is.
Element SText(Nat indent);
SText[90] => heading : SFmtText(indent) text - "\n" - parser.special.MinIndent(indent) - SUnderline(text) heading;
SText[80] => Paragraph(text) : SParText(indent) text;
// Match a paragraph.
FormattedText SParText(Nat indent);
SParText => FormattedText() : SFmtText(indent) -> add - "\n" - (SFmtTextLine(indent) -> addLine)*;
// Match the underlined part of a heading.
Heading SUnderline(FormattedText text);
SUnderline => Heading(1, text) : "=+\n";
SUnderline => Heading(2, text) : "-+\n";
// Match a bracket heading (# heading)
Element SHashHeading(Nat indent);
SHashHeading => Heading(depth, text) : SHashDepth depth - " *" - SFmtText(indent) text - "\n";
Nat SHashDepth();
SHashDepth => count(text) : "#+" text;
// Match text, ending in a newline. Assumes that this is an entire line, and does not match new
// lists etc.
FormattedText SFmtTextLine(Nat indent);
SFmtTextLine => FormattedText() : parser.special.MinIndent(indent) - SFmtSpanFirst(indent) -> add - (SFmtSpan(indent) -> add)* - "\n";
// Match text, ending in a newline. Assumes this is in the middle of the line somewhere, and
// does not account for special formatting etc.
FormattedText SFmtText(Nat indent);
SFmtText => FormattedText() : (SFmtSpan(indent) -> add)+;
TextSpan SFmtSpanFirst(Nat indent);
SFmtSpanFirst[10] => x : SFmtSpanCommon(indent) x;
SFmtSpanFirst[20] => PlainTextSpan(text) : "[^*`\n\[\]\-+\\][^\n*\[\]`\\]*" text; // May not start with list characters.
TextSpan SFmtSpan(Nat indent);
SFmtSpan[10] => x : SFmtSpanCommon(indent) x;
SFmtSpan[20] => PlainTextSpan(text) : "[^\n*\[\]`\\]+" text;
TextSpan SFmtSpanCommon(Nat indent);
SFmtSpanCommon[90] => BoldText(text) : "\*\*" - SNestedText(indent) text - "\*\*";
SFmtSpanCommon[80] => ItalicText(text) : "\*" - SNestedText(indent) text - "\*";
SFmtSpanCommon[70] => InlineCode(text) : "`" - SInlineCode(indent) text - "`";
SFmtSpanCommon[60] => Link(text, target) : "\[" - SNestedText(indent) text - "\] *(" - "[^\n)]*" target - ")";
SFmtSpanCommon[50] => CustomText(text) : "\[" - "[^\n\]]+" text - "\]";
// Escape special characters:
SFmtSpanCommon[40] => PlainTextSpan(text) : "\\" - "." text;
// Nested text that may contain a newline.
FormattedText SNestedText(Nat indent);
SNestedText[10] => FormattedText() : SFmtText(indent) -> add;
SNestedText[50] => FormattedText() : SFmtText(indent) -> add - "\n" - parser.special.MinIndent(indent) - SNestedText(indent) -> addLine;
// Nested inline code that may contain a newline, but no other spans. Also required to contain
// at least one character, otherwise it will be mis-interpreted as a code block.
StrBuf SInlineCode(Nat indent);
SInlineCode => StrBuf() : "[^`\n]+" -> add - ("\n" -> add - parser.special.MinIndent(indent) - "[^`\n]*" -> add)*;
// Extra indentation.
Nat SExtraIndent(Nat original);
SExtraIndent => +(original, extra) : parser.special.Indent() extra;
}
// Add extra indentation.
package Nat addIndentation(Nat original, Str extra) {
original + extra.count();
}
|