1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
|
# a markdown translator
#
# The differences between this and original markdown:
# - markdown in inline tags are not processed
# - every line-break in non-tag parts is translated into <br/>
# - nested list elements are not supported
require "rsec"
class LittleMarkdown
include Rsec::Helper
def initialize
@markdown_line_translator = make_markdown_line_translator
@parser = (make_xml_tag_parser | make_char_parser).star.eof
end
def translate src
@stack = []
@charsbuf = ''
@out = ''
@parser.parse! src
flush_chars
@out
end
def flush_chars
@out.<< translate_markdown @charsbuf
@charsbuf = ''
end
def make_char_parser
# care stringscanner's bug, see issues
(/./.r | /\n/).fail('char'){|c| @charsbuf << c}
end
# make a single-line markdown parser
def make_markdown_line_translator
line_text = lazy{line}.map{|tokens|
tokens.empty? ? Rsec::INVALID : tokens.join # filter out empty
}
title = /"[^"]*"|'[^']*'/.r._?{|(s)|
s ? "title=#{s}" : ''
}
img = seq('!['.r >> /[^\]]+/ << '](', /[^\)"']+/, title, ')'){|(txt, path, title)|
"<img src='#{path}' #{title}>#{txt}</img>"
}
link = seq(('['.r >> /[^\]]+/ << ']('), /[^\)"']+/, title, ')'){|(txt, path, title)|
"<a href='#{path}' #{title}>#{txt}</a>"
}
# NOTE strong should be left of em
strong = ('**'.r >> line_text << '**').map{|s|
"<strong>#{s}</strong>"
}
em = ('*'.r >> line_text << '*').map{|s|
"<em>#{s}</em>"
}
code = ('`'.r >> /[^`]+/ << '`').map{|s|
"<code>#{s}</code>"
}
escape = '<'.r{'<'} | '&'.r{'&'} | /\\[\!\`\*\[\]]/.r{|s|s[1]}
text = /[^\!\`\*\[\]]+/
id = seq_(('['.r >> /[^\]]+/ << ']:'), text){|(id, text)|
"<span id='#{id}'>#{text}</span>"
}
line = (img | link | strong | em | code | escape | id | text).star
line.eof.map &:join
end
# pseudo xml tag parser, except <br> and <hr> and <script>
def make_xml_tag_parser
name = /[\w-]+/ # greedy, no need to worry space between first attr
value = /"[^"]*"|'[^']*'/
attr = seq_(name, seq_('=', value)._?)
attrs = /\s*/.r.join(attr)
# use a stack to ensure tag matching
tag_start = seq('<', name, attrs){|res|
@stack.push res[1].downcase
res
}
tag_empty_end = '/>'.r{|res|
@stack.pop
res
}
tag_non_empty_end = seq('>', lazy{content}, '</', name, /\s*\>/){|res|
if @stack.pop == res[3].downcase
res
else
Rsec::INVALID
end
}
special_tag = /\<[bh]r\s*\>/i.r | seq_('<script', attrs, /\>.*?\<\/script\>/)
tag = special_tag | seq(tag_start, (tag_empty_end | tag_non_empty_end))
# xml content
comment = /<!--([^-]|-[^-])*-->/
cdata = /<!\[CDATA\[.*?\]\]>/x
entity = /&(nbsp|lt|gt|amp|cent|pound|yen|euro|sect|copy|reg|trade|#[a-f0-9]{2,4});/i
text = /[^<&]+/
content = (cdata.r | comment | entity | tag | text).star
tag.fail('tag'){|res|
if @charsbuf.end_with? "\n"
flush_chars
@out << res.join
else
@charsbuf << res.join # inline tags
end
}
end
# translate markdown
def translate_markdown str
lines = str.split("\n").chunk{|line|
line[/^(\ {4}|\#{1,6}\ |[\+\-\>]\ |)/]
}.map{|(leading, lines)|
case leading
when ' ' # code
"<pre><code>#{lines.join "\n"}</code></pre>"
when /\#{1,6}/ # headings
hn = "h#{leading.strip.size}"
lines.map! do |line|
line = line.sub(/\#{1,6}/, '')
"<#{hn}>#{@markdown_line_translator.parse! line}</#{hn}>"
end
lines.join
when '> ' # block quote
# TODO nested
lines.map! do |line|
@markdown_line_translator.parse! line[2..-1]
end
"<blockquote>#{lines.join '<br/>'}</blockquote>"
when '+ ' # numbered list
# TODO nested
lines.map! do |line|
"<li>#{@markdown_line_translator.parse! line[2..-1]}</li>"
end
"<ol>#{lines.join}</ol>"
when '- ' # unordered list
# TODO nested
lines.map! do |line|
"<li>#{@markdown_line_translator.parse! line[2..-1]}</li>"
end
"<ul>#{lines.join}</ul>"
else
lines.map! do |line|
@markdown_line_translator.parse! line
end
lines.join "<br/>"
end
}
# add trailing '\n' s
lines.join('<br/>') << ('<br/>' * str[/\n*\Z/].size)
end
end
if __FILE__ == $PROGRAM_NAME
lm = LittleMarkdown.new
puts lm.translate <<-MD
## *a *
<pre a="3">123afd</pre>
** b **
MD
end
|