require 'html/htmltokenizer'

class HtmlTokenizerTest < Test::Unit::TestCase
  def test_right_version
    assert_equal 1.0, HTMLTokenizer.version
  end

  def test_parses_attributes_with_dash
    html = '<meta http-equiv="content-type" value="text/html">'
    token = HTMLTokenizer.new(html).getNextToken()

    assert_equal HTMLTag, token.class
    assert_equal 2, token.attr_hash.size
    assert_equal true, token.attr_hash.has_key?('value')
    assert_equal true, token.attr_hash.has_key?('http-equiv')
  end

  def test_parses_tags_with_dash
    html = '<a-value>abc</a-value>'
    tokenizer = HTMLTokenizer.new(html)

    assert_equal 'a-value', tokenizer.getNextToken().tag_name
    assert_equal 'abc', tokenizer.getNextToken().text
    assert_equal '/a-value', tokenizer.getNextToken().tag_name
  end

  def test_gets_attributes_from_tags_with_dash_with_space
    html = '<a-value n="2" >abc</a-value>'
    tokenizer = HTMLTokenizer.new(html)

    token = tokenizer.getNextToken()
    assert_equal 1, token.attr_hash.size, "attributes found: #{token.attr_hash.inspect}"
    assert_equal '2', token.attr_hash['n']
  end

  def test_gets_attributes_from_tags_with_dash_sans_space
    html = '<a-value k=\'3\'>abc</a-value>'
    tokenizer = HTMLTokenizer.new(html)

    token = tokenizer.getNextToken()
    assert_equal 1, token.attr_hash.size, "attributes found: #{token.attr_hash.inspect}"
    assert_equal '3', token.attr_hash['k']
  end

  def test_gets_dashed_attributes_from_tags_with_dash
    html = '<S-Value p:n-d="2">abc</a-value>'
    tokenizer = HTMLTokenizer.new(html)

    token = tokenizer.getNextToken()
    assert_equal 's-value', token.tag_name
    assert_equal 1, token.attr_hash.size
    assert_equal '2', token.attr_hash['p:n-d']
  end

  def test_reads_attributes_without_quotes
    html = '<a href=http://www.test.com/blank.html>value</a>'
    tokenizer = HTMLTokenizer.new(html)

    token = tokenizer.getNextToken()
    assert_equal 'a', token.tag_name
    assert_equal 'http://www.test.com/blank.html', token.attr_hash['href']
  end

  def test_reads_short_attributes_without_quotes
    html = '<a name=a>value</a>'
    tokenizer = HTMLTokenizer.new(html)

    token = tokenizer.getNextToken()
    assert_equal 'a', token.tag_name
    assert_equal 'a', token.attr_hash['name']
  end

  def test_reads_multiple_short_attributes_without_quotes
    html = '<a name=n target=m href=k>value</a>'
    tokenizer = HTMLTokenizer.new(html)

    token = tokenizer.getNextToken()
    assert_equal 'a', token.tag_name
    assert_equal 'n', token.attr_hash['name']
    assert_equal 'm', token.attr_hash['target']
    assert_equal 'k', token.attr_hash['href']
  end

  def test_makes_boolean_attribute_values_themselves
    html = '<input type=checked checked>'
    tokenizer = HTMLTokenizer.new(html)

    token = tokenizer.getNextToken()
    assert_equal 'input', token.tag_name
    assert_equal 'checked', token.attr_hash['checked']
  end
end
