File: test_convert_xpath.rb

package info (click to toggle)
ruby-nokogiri 1.11.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,576 kB
  • sloc: xml: 28,086; ruby: 18,456; java: 13,067; ansic: 5,138; yacc: 265; sh: 208; makefile: 27
file content (135 lines) | stat: -rw-r--r-- 5,058 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
require "helper"

class TestConvertXPath < Nokogiri::TestCase

  def setup
    super
    @N = Nokogiri(File.read(HTML_FILE))
  end

  def assert_syntactical_equivalence(hpath, xpath, match, &blk)
    blk ||= lambda {|j| j.first}
    assert_equal match, blk.call(@N.search(xpath)), "xpath result did not match"
  end

  def test_child_tag
    assert_syntactical_equivalence("h1[a]", ".//h1[child::a]", "Tender Lovemaking") do |j|
      j.inner_text
    end
  end

  def test_child_tag_equals
    assert_syntactical_equivalence("h1[a='Tender Lovemaking']", ".//h1[child::a = 'Tender Lovemaking']", "Tender Lovemaking") do |j|
      j.inner_text
    end
  end

  def test_filter_contains
    assert_syntactical_equivalence("title:contains('Tender')", ".//title[contains(., 'Tender')]",
                                   "Tender Lovemaking  ") do |j|
      j.inner_text
    end
  end

  def test_filter_comment
    assert_syntactical_equivalence("div comment()[2]", ".//div//comment()[position() = 2]", "<!-- end of header -->") do |j|
      j.first.to_s
    end
  end

  def test_filter_text
    assert_syntactical_equivalence("a[text()]", ".//a[normalize-space(child::text())]", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
      j.first.to_s
    end
    assert_syntactical_equivalence("a[text()='Tender Lovemaking']", ".//a[normalize-space(child::text()) = 'Tender Lovemaking']", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
      j.first.to_s
    end
    assert_syntactical_equivalence("a/text()", ".//a/child::text()", "Tender Lovemaking") do |j|
      j.first.to_s
    end
    assert_syntactical_equivalence("h2//a[text()!='Back Home!']", ".//h2//a[normalize-space(child::text()) != 'Back Home!']", "Meow meow meow meow meow") do |j|
      j.first.inner_text
    end
  end

  def test_filter_by_attr
    assert_syntactical_equivalence("a[@href='http://blog.geminigeek.com/wordpress-theme']",
                                   ".//a[@href = 'http://blog.geminigeek.com/wordpress-theme']",
                                   "http://blog.geminigeek.com/wordpress-theme") do |j|
      j.first["href"]
    end
  end

  def test_css_id
    assert_syntactical_equivalence("#linkcat-7", ".//*[@id = 'linkcat-7']", "linkcat-7") do |j|
      j.first["id"]
    end
    assert_syntactical_equivalence("li#linkcat-7", ".//li[@id = 'linkcat-7']", "linkcat-7") do |j|
      j.first["id"]
    end
  end

  def test_css_class
    assert_syntactical_equivalence(".cat-item-15", ".//*[contains(concat(' ',@class,' '),' cat-item-15 ')]",
                                   "cat-item cat-item-15") do |j|
      j.first["class"]
    end
    assert_syntactical_equivalence("li.cat-item-15", ".//li[contains(concat(' ',@class,' '),' cat-item-15 ')]",
                                   "cat-item cat-item-15") do |j|
      j.first["class"]
    end
  end

  def test_css_tags
    assert_syntactical_equivalence("div li a", ".//div//li//a", "http://brobinius.org/") do |j|
      j.first.inner_text
    end
    assert_syntactical_equivalence("div li > a", ".//div//li/a", "http://brobinius.org/") do |j|
      j.first.inner_text
    end
    assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
      j.first.inner_text
    end
    assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
      j.first.inner_text
    end
  end

  def test_positional
    assert_syntactical_equivalence("div/div:first()", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
      j.first.inner_text.gsub(/[\r\n]/, '')
    end
    assert_syntactical_equivalence("div/div:first", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
      j.first.inner_text.gsub(/[\r\n]/, '')
    end
    assert_syntactical_equivalence("div//a:last()", ".//div//a[position() = last()]", "Wordpress") do |j|
      j.last.inner_text
    end
    assert_syntactical_equivalence("div//a:last", ".//div//a[position() = last()]", "Wordpress") do |j|
      j.last.inner_text
    end
  end

  def test_multiple_filters
    assert_syntactical_equivalence("a[@rel='bookmark'][1]", ".//a[@rel = 'bookmark' and position() = 1]", "Back Home!") do |j|
      j.first.inner_text
    end
  end

# TODO:
#       doc/'title ~ link' -> links that are siblings of title
#       doc/'p[@class~="final"]' -> class includes string (whitespacy)
#       doc/'p[text()*="final"]' -> class includes string (index) (broken: always returns true?)
#       doc/'p[text()$="final"]' -> /final$/
#       doc/'p[text()|="final"]' -> /^final$/
#       doc/'p[text()^="final"]' -> string starts with 'final
#       nth_first
#       nth_last
#       even
#       odd
#       first-child, nth-child, last-child, nth-last-child, nth-last-of-type
#       only-of-type, only-child
#       parent
#       empty
#       root
end