File: test_tokenizer.rb

package info (click to toggle)
libfeedtools-ruby 0.2.29%2Bdfsg1-4
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 2,004 kB
  • ctags: 1,385
  • sloc: ruby: 18,815; sql: 39; makefile: 6
file content (94 lines) | stat: -rw-r--r-- 2,674 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
require File.join(File.dirname(__FILE__), 'preamble')

require 'html5/tokenizer'

require 'tokenizer_test_parser'

class Html5TokenizerTestCase < Test::Unit::TestCase

  def assert_tokens_match(expectedTokens, receivedTokens, ignoreErrorOrder, message)
    if !ignoreErrorOrder
      return expectedTokens == receivedTokens
    else
      #Sort the tokens into two groups; non-parse errors and parse errors
      expected = [[],[]]
      received = [[],[]]
      
      for token in expectedTokens
        if token != "ParseError"
          expected[0] << token
        else
          expected[1] << token
        end
      end

      for token in receivedTokens
        if token != "ParseError"
          received[0] << token
        else
          received[1] << token
        end
      end
      assert_equal expected, received, message
    end
  end

  def type_of?(token_name, token)
    token != 'ParseError' and token_name == token.first
  end

  def convert_attribute_arrays_to_hashes(tokens)
    tokens.inject([]) do |tokens, token|
      token[2] = Hash[*token[2].reverse.flatten] if type_of?('StartTag', token)
      tokens << token
    end
  end
  
  def concatenate_consecutive_characters(tokens)
    tokens.inject([]) do |tokens, token|
      if type_of?('Character', token) and tokens.any? and type_of?('Character', tokens.last)
        tokens.last[1] = tokens.last[1] + token[1]
        next tokens
      end
      tokens << token
    end
  end

  def tokenizer_test(data)
    (data['contentModelFlags'] || [:PCDATA]).each do |content_model_flag|
      message = [
        '', 'Description:', data['description'],
        '', 'Input:', data['input'],
        '', 'Content Model Flag:', content_model_flag,
        '' ] * "\n"

      assert_nothing_raised message do
        tokenizer = HTML5::HTMLTokenizer.new(data['input'])

        tokenizer.content_model_flag = content_model_flag.to_sym

        tokenizer.current_token = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')

        tokens = TokenizerTestParser.new(tokenizer).parse

        actual = concatenate_consecutive_characters(convert_attribute_arrays_to_hashes(tokens))

        expected = concatenate_consecutive_characters(data['output'])

        assert_tokens_match expected, actual, data["ignoreErrorOrder"], message
      end
    end 
  end

  html5_test_files('tokenizer').each do |test_file|
    test_name = File.basename(test_file).sub('.test', '')

    tests = JSON.parse(File.read(test_file))['tests']

    tests.each_with_index do |data, index|
      define_method('test_%s_%d' % [test_name, index + 1]) { tokenizer_test data }
    end
  end

end