File: regression_spec.rb

package info (click to toggle)
ruby-parslet 1.6.1-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 908 kB
  • ctags: 473
  • sloc: ruby: 5,220; makefile: 2
file content (314 lines) | stat: -rw-r--r-- 9,744 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
# Encoding: UTF-8

require 'spec_helper'

require 'parslet'

describe "Regressions from real examples" do
  # This parser piece produces on the left a subtree that is keyed (a hash)
  # and on the right a subtree that is a repetition of such subtrees. I've
  # for now decided that these would merge into the repetition such that the
  # return value is an array. This avoids maybe loosing keys/values in a 
  # hash merge. 
  #
  class ArgumentListParser
    include Parslet

    rule :argument_list do
      expression.as(:argument) >> 
        (comma >> expression.as(:argument)).repeat
    end
    rule :expression do
      string
    end
    rule :string do
      str('"') >> 
      (
        str('\\') >> any |
        str('"').absent? >> any
      ).repeat.as(:string) >>
      str('"') >> space?
    end
    rule :comma do
      str(',') >> space?
    end
    rule :space? do
      space.maybe
    end
    rule :space do
      match("[ \t]").repeat(1)
    end
    
    def parse(str)
      argument_list.parse(str)
    end
  end
  describe ArgumentListParser do
    let(:instance) { ArgumentListParser.new }
    it "should have method expression" do
      instance.should respond_to(:expression)
    end 
    it 'should parse "arg1", "arg2"' do
      result = ArgumentListParser.new.parse('"arg1", "arg2"')
      
      result.should have(2).elements
      result.each do |r|
        r[:argument]
      end
    end
    it 'should parse "arg1", "arg2", "arg3"' do
      result = ArgumentListParser.new.parse('"arg1", "arg2", "arg3"')
      
      result.should have(3).elements
      result.each do |r|
        r[:argument]
      end
    end
  end

  class ParensParser < Parslet::Parser
    rule(:balanced) {
      str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
    }
  
    root(:balanced)
  end
  describe ParensParser do
    let(:instance) { ParensParser.new }
    
    context "statefulness: trying several expressions in sequence" do
      it "should not be stateful" do
        # NOTE: Since you've come here to read this, I'll explain why
        # this is broken and not fixed: You're looking at the tuning branch, 
        # which rewrites a bunch of stuff - so I have failing tests to 
        # remind me of what is left to be done. And to remind you not to 
        # trust this code. 
        instance.parse('(())')
        lambda {
          instance.parse('((()))')
          instance.parse('(((())))')
        }.should_not raise_error
      end 
    end
    context "expression '(())'" do
      let(:result) { instance.parse('(())') }

      it "should yield a doubly nested hash" do
        result.should be_a(Hash)
        result.should have_key(:m)
        result[:m].should be_a(Hash)   # This was an array earlier
      end 
      context "inner hash" do
        let(:inner) { result[:m] }
        
        it "should have nil as :m" do
          inner[:m].should be_nil
        end 
      end
    end
  end

  class ALanguage < Parslet::Parser
    root(:expressions)

    rule(:expressions) { (line >> eol).repeat(1) | line }
    rule(:line) { space? >> an_expression.as(:exp).repeat }
    rule(:an_expression) { str('a').as(:a) >> space? }

    rule(:eol) { space? >> match["\n\r"].repeat(1) >> space? }

    rule(:space?) { space.repeat }
    rule(:space) { multiline_comment.as(:multi) | line_comment.as(:line) | str(' ') }

    rule(:line_comment) { str('//') >> (match["\n\r"].absent? >> any).repeat }
    rule(:multiline_comment) { str('/*') >> (str('*/').absent? >> any).repeat >> str('*/') }
  end
  describe ALanguage do
    def remove_indent(s)
      s.to_s.lines.map { |l| l.chomp.strip }.join("\n")
    end
    
    it "should count lines correctly" do
      cause = catch_failed_parse {
        subject.parse('a
          a a a 
          aaa // ff
          /* 
          a
          */
          b
        ')
      }

      remove_indent(cause.ascii_tree).should == remove_indent(%q(
      Expected one of [(LINE EOL){1, }, LINE] at line 1 char 1.
      |- Extra input after last repetition at line 7 char 11.
      |  `- Failed to match sequence (LINE EOL) at line 7 char 11.
      |     `- Failed to match sequence (SPACE? [\n\r]{1, } SPACE?) at line 7 char 11.
      |        `- Expected at least 1 of [\n\r] at line 7 char 11.
      |           `- Failed to match [\n\r] at line 7 char 11.
      `- Don't know what to do with "\n         " at line 1 char 2.).strip)
    end 
  end

  class BLanguage < Parslet::Parser
    root :expression
    rule(:expression) { b.as(:one) >> b.as(:two) }
    rule(:b) { str('b') }
  end
  describe BLanguage do
    it "should parse 'bb'" do
      subject.should parse('bb').as(:one => 'b', :two => 'b')
    end 
    it "should transform with binding constraint" do
      transform = Parslet::Transform.new do |t|
        t.rule(:one => simple(:b), :two => simple(:b)) { :ok }
      end
      transform.apply(subject.parse('bb')).should == :ok
    end 
  end

  class UnicodeLanguage < Parslet::Parser
    root :gobble
    rule(:gobble) { any.repeat }
  end
  describe UnicodeLanguage do
    it "should parse UTF-8 strings" do
      subject.should parse('éèäöü').as('éèäöü')
      subject.should parse('RubyKaigi2009のテーマは、「変わる/変える」です。 前回の').as('RubyKaigi2009のテーマは、「変わる/変える」です。 前回の')
    end 
  end
  
  class UnicodeSentenceLanguage < Parslet::Parser
    rule(:sentence) { (match('[^。]').repeat(1) >> str("。")).as(:sentence) }
    rule(:sentences) { sentence.repeat }
    root(:sentences)
  end
  describe UnicodeSentenceLanguage do
    let(:string) {
      "RubyKaigi2009のテーマは、「変わる/変える」です。 前回の" +
      "RubyKaigi2008のテーマであった「多様性」の言葉の通り、 " +
      "2008年はRubyそのものに関しても、またRubyの活躍する舞台に関しても、 " +
      "ますます多様化が進みつつあります。RubyKaigi2008は、そのような " +
      "Rubyの生態系をあらためて認識する場となりました。 しかし、" +
      "こうした多様化が進む中、異なる者同士が単純に距離を 置いたままでは、" +
      "その違いを認識したところであまり意味がありません。 異なる実装、" +
      "異なる思想、異なる背景といった、様々な多様性を理解しつつ、 " +
      "すり合わせるべきものをすり合わせ、変えていくべきところを " +
      "変えていくことが、豊かな未来へとつながる道に違いありません。"
    }
    
    it "should parse sentences" do
      subject.should parse(string)
    end 
  end

  class TwoCharLanguage < Parslet::Parser
    root :twochar
    rule(:twochar) { any >> str('2') }
  end
  describe TwoCharLanguage do
    def di(s)
      s.strip.to_s.lines.map { |l| l.chomp.strip }.join("\n")
    end

    it "should raise an error" do
      error = catch_failed_parse {
        subject.parse('123') }
      di(error.ascii_tree).should == di(%q(
        Failed to match sequence (. '2') at line 1 char 2.
        `- Don't know what to do with "3" at line 1 char 3.
      ))
    end 
  end

  # Issue #68: Extra input reporting, written by jmettraux
  class RepetitionParser < Parslet::Parser
    rule(:nl)      { match('[\s]').repeat(1) }
    rule(:nl?)     { nl.maybe }
    rule(:sp)      { str(' ').repeat(1) }
    rule(:sp?)     { str(' ').repeat(0) }
    rule(:line)    { sp >> str('line') }
    rule(:body)    { ((line | block) >> nl).repeat(0) }
    rule(:block)   { sp? >> str('begin') >> sp >> match('[a-z]') >> nl >>
                     body >> sp? >> str('end') }
    rule(:blocks)  { nl? >> block >> (nl >> block).repeat(0) >> nl? }

    root(:blocks)
  end
  describe RepetitionParser do
    def di(s)
      s.strip.to_s.lines.map { |l| l.chomp.strip }.join("\n")
    end

    it 'parses a block' do
      subject.parse(%q{
        begin a
        end
      })
    end
    it 'parses nested blocks' do
      subject.parse(%q{
        begin a
          begin b
          end
        end
      })
    end
    it 'parses successive blocks' do
      subject.parse(%q{
        begin a
        end
        begin b
        end
      })
    end
    it 'fails gracefully on a missing end' do
      error = catch_failed_parse {
        subject.parse(%q{
          begin a
            begin b
          end
        }) }
      
      di(error.ascii_tree).should == di(%q(
        Failed to match sequence (NL? BLOCK (NL BLOCK){0, } NL?) at line 2 char 11.
        `- Failed to match sequence (SP? 'begin' SP [a-z] NL BODY SP? 'end') at line 5 char 9.
           `- Premature end of input at line 5 char 9.
        ))
    end
    it 'fails gracefully on a missing end (2)' do
      error = catch_failed_parse {
        subject.parse(%q{
          begin a
          end
          begin b
            begin c
          end
        }) }

      di(error.ascii_tree).should == di(%q(
        Failed to match sequence (NL? BLOCK (NL BLOCK){0, } NL?) at line 3 char 14.
        `- Don't know what to do with "begin b\n  " at line 4 char 11.
        ))
    end
    it 'fails gracefully on a missing end (deepest reporter)' do
      error = catch_failed_parse {
        subject.parse(%q{
            begin a
            end
            begin b
              begin c
                li
              end
            end
          },
          :reporter => Parslet::ErrorReporter::Deepest.new) }

      di(error.ascii_tree).should == di(%q(
        Failed to match sequence (NL? BLOCK (NL BLOCK){0, } NL?) at line 3 char 16.
        `- Expected "end", but got "li\n" at line 6 char 17.
        ))
    end
  end
end