File: little_markdown.rb

package info (click to toggle)
ruby-rsec 0.4.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 272 kB
  • sloc: ruby: 2,130; lisp: 13; makefile: 3
file content (167 lines) | stat: -rw-r--r-- 5,038 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# a markdown translator
# 
# The differences between this and original markdown:
# - markdown in inline tags are not processed
# - every line-break in non-tag parts is translated into <br/>
# - nested list elements are not supported

require "rsec"

class LittleMarkdown
  include Rsec::Helper

  def initialize
    @markdown_line_translator = make_markdown_line_translator
    @parser = (make_xml_tag_parser | make_char_parser).star.eof
  end

  def translate src
    @stack = []
    @charsbuf = ''
    @out = ''
    @parser.parse! src
    flush_chars
    @out
  end

  def flush_chars
    @out.<< translate_markdown @charsbuf
    @charsbuf = ''
  end

  def make_char_parser
    # care stringscanner's bug, see issues
    (/./.r | /\n/).fail('char'){|c| @charsbuf << c}
  end

  # make a single-line markdown parser
  def make_markdown_line_translator
    line_text = lazy{line}.map{|tokens|
      tokens.empty? ? Rsec::INVALID : tokens.join # filter out empty
    }

    title = /"[^"]*"|'[^']*'/.r._?{|(s)|
      s ? "title=#{s}" : ''
    }
    img = seq('!['.r >> /[^\]]+/ << '](', /[^\)"']+/, title, ')'){|(txt, path, title)|
      "<img src='#{path}' #{title}>#{txt}</img>"
    }
    link = seq(('['.r >> /[^\]]+/ << ']('), /[^\)"']+/, title, ')'){|(txt, path, title)|
      "<a href='#{path}' #{title}>#{txt}</a>"
    }
    # NOTE strong should be left of em
    strong = ('**'.r >> line_text << '**').map{|s|
      "<strong>#{s}</strong>"
    }
    em = ('*'.r >> line_text << '*').map{|s|
      "<em>#{s}</em>"
    }
    code = ('`'.r >> /[^`]+/ << '`').map{|s|
      "<code>#{s}</code>"
    }
    escape = '<'.r{'&lt;'} | '&'.r{'&amp;'} | /\\[\!\`\*\[\]]/.r{|s|s[1]}
    text = /[^\!\`\*\[\]]+/
    id = seq_(('['.r >> /[^\]]+/ << ']:'), text){|(id, text)|
      "<span id='#{id}'>#{text}</span>"
    }
    line = (img | link | strong | em | code | escape | id | text).star
    line.eof.map &:join
  end
  
  # pseudo xml tag parser, except <br> and <hr> and <script>
  def make_xml_tag_parser
    name  = /[\w-]+/ # greedy, no need to worry space between first attr
    value = /"[^"]*"|'[^']*'/
    attr  = seq_(name, seq_('=', value)._?)
    attrs = /\s*/.r.join(attr)

    # use a stack to ensure tag matching
    tag_start = seq('<', name, attrs){|res|
      @stack.push res[1].downcase
      res
    }
    tag_empty_end = '/>'.r{|res|
      @stack.pop
      res
    }
    tag_non_empty_end = seq('>', lazy{content}, '</', name, /\s*\>/){|res|
      if @stack.pop == res[3].downcase
        res
      else
        Rsec::INVALID
      end
    }
    special_tag = /\<[bh]r\s*\>/i.r | seq_('<script', attrs, /\>.*?\<\/script\>/)
    tag = special_tag | seq(tag_start, (tag_empty_end | tag_non_empty_end))

    # xml content
    comment = /<!--([^-]|-[^-])*-->/
    cdata   = /<!\[CDATA\[.*?\]\]>/x
    entity  = /&(nbsp|lt|gt|amp|cent|pound|yen|euro|sect|copy|reg|trade|#[a-f0-9]{2,4});/i
    text    = /[^<&]+/
    content = (cdata.r | comment | entity | tag | text).star
    tag.fail('tag'){|res|
      if @charsbuf.end_with? "\n"
        flush_chars
        @out << res.join
      else
        @charsbuf << res.join # inline tags
      end
    }
  end

  # translate markdown
  def translate_markdown str
    lines = str.split("\n").chunk{|line|
      line[/^(\ {4}|\#{1,6}\ |[\+\-\>]\ |)/]
    }.map{|(leading, lines)|
      case leading
      when '    '                                              # code
        "<pre><code>#{lines.join "\n"}</code></pre>"
      when /\#{1,6}/                                           # headings
        hn = "h#{leading.strip.size}"
        lines.map! do |line|
          line = line.sub(/\#{1,6}/, '')
          "<#{hn}>#{@markdown_line_translator.parse! line}</#{hn}>"
        end
        lines.join
      when '> '                                                # block quote
        # TODO nested
        lines.map! do |line|
          @markdown_line_translator.parse! line[2..-1]
        end
        "<blockquote>#{lines.join '<br/>'}</blockquote>"
      when '+ '                                                # numbered list
        # TODO nested
        lines.map! do |line|
          "<li>#{@markdown_line_translator.parse! line[2..-1]}</li>"
        end
        "<ol>#{lines.join}</ol>"
      when '- '                                                # unordered list
        # TODO nested
        lines.map! do |line|
          "<li>#{@markdown_line_translator.parse! line[2..-1]}</li>"
        end
        "<ul>#{lines.join}</ul>"
      else
        lines.map! do |line|
          @markdown_line_translator.parse! line
        end
        lines.join "<br/>"
      end
    }
    # add trailing '\n' s
    lines.join('<br/>') << ('<br/>' * str[/\n*\Z/].size)
  end

end

if __FILE__ == $PROGRAM_NAME
  lm = LittleMarkdown.new
  puts lm.translate <<-MD
## *a *
<pre a="3">123afd</pre>
  ** b **
  MD
end