File: tc_feed_parse.rb

package info (click to toggle)
ruby-feedparser 0.9.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid, stretch
  • size: 248 kB
  • ctags: 318
  • sloc: ruby: 3,009; sh: 24; makefile: 4
file content (168 lines) | stat: -rwxr-xr-x 5,571 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# encoding: UTF-8

$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')

require 'test/unit'
require 'feedparser'

# This class includes some basic tests of the parser. More detailed test is
# made by tc_parser.rb
class FeedParserTest < Test::Unit::TestCase
  # From http://my.netscape.com/publish/formats/rss-spec-0.91.html
  def test_parse_rss091_1
    ch = FeedParser::Feed::new <<-EOF
<?xml version="1.0"?>
<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
<rss version="0.91">
  <channel>
    <language>en</language>
    <description>News and commentary from the cross-platform scripting community.</description>
    <link>http://www.scripting.com/</link>
    <title>Scripting News</title>
    <image>
      <link>http://www.scripting.com/</link>
      <title>Scripting News</title>
      <url>http://www.scripting.com/gifs/tinyScriptingNews.gif</url>
    </image>
  </channel>
</rss>
    EOF
    assert_equal('Scripting News', ch.title)
    assert_equal('http://www.scripting.com/', ch.link)
    assert_equal('News and commentary from the cross-platform scripting community.', ch.description)
    assert_equal([], ch.items)
  end

  def test_parse_rss091_complete
    ch = FeedParser::Feed::new <<-EOF
<?xml version="1.0"?>
<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
<rss version="0.91">
<channel>
<copyright>Copyright 1997-1999 UserLand Software, Inc.</copyright>
<pubDate>Thu, 08 Jul 1999 07:00:00 GMT</pubDate>
<lastBuildDate>Thu, 08 Jul 1999 16:20:26 GMT</lastBuildDate>
<docs>http://my.userland.com/stories/storyReader$11</docs>
<description>News and commentary from the cross-platform scripting community.</description>
<link>http://www.scripting.com/</link>
<title>Scripting News</title>
<image>
  <link>http://www.scripting.com/</link>
  <title>Scripting News</title>
  <url>http://www.scripting.com/gifs/tinyScriptingNews.gif</url>
  <height>40</height>
  <width>78</width>
  <description>What is this used for?</description>
</image>
<managingEditor>dave@userland.com (Dave Winer)</managingEditor>
<webMaster>dave@userland.com (Dave Winer)</webMaster>
<language>en-us</language>
<skipHours>
  <hour>6</hour><hour>7</hour><hour>8</hour><hour>9</hour><hour>10</hour><hour>11</hour>
</skipHours>
<skipDays>
  <day>Sunday</day>
</skipDays>
<rating>(PICS-1.1 "http://www.rsac.org/ratingsv01.html" l gen true comment "RSACi North America Server" for "http://www.rsac.org" on "1996.04.16T08:15-0500" r (n 0 s 0 v 0 l 0))</rating>
<item>
  <title>stuff</title>
  <link>http://bar</link>
  <description>This is an article about some stuff</description>
</item>
<item>
  <title>second item's title</title>
  <link>http://link2</link>
  <description>aa bb cc
  dd ee ff</description>
</item>
<textinput>
  <title>Search Now!</title>
  <description>Enter your search &lt;terms&gt;</description>
  <name>find</name>
  <link>http://my.site.com/search.cgi</link>
  </textinput>
</channel>
</rss>
    EOF
    assert_equal('Scripting News', ch.title)
    assert_equal('http://www.scripting.com/', ch.link)
    assert_equal('News and commentary from the cross-platform scripting community.', ch.description)
    assert_equal(2, ch.items.length)
    assert_equal('http://bar', ch.items[0].link)
    assert_equal('<p>This is an article about some stuff</p>', ch.items[0].content)
    assert_equal('stuff', ch.items[0].title)
    assert_equal('http://link2', ch.items[1].link)
    assert_equal("<p>aa bb cc\n  dd ee ff</p>", ch.items[1].content)
    assert_equal('second item\'s title', ch.items[1].title)
  end

  def test_enclosures
    ch = FeedParser::Feed::new <<-EOF
<?xml version="1.0"?>
<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
<rss version="0.91">
<channel>
<item>
  <enclosure url="url1" length="1" type="type1"/> 
  <enclosure url="url2" type="type2"/> 
  <enclosure length="3" type="type3"/> 
  <enclosure url="url1" length="1"/> 
</item>
</channel>
</rss>
    EOF
    # the third one should be removed because an enclosure should have an url, or it's useless.
    assert_equal([["url1", "1", "type1"], ["url2", nil, "type2"], ["url1", "1", nil]], ch.items[0].enclosures)
  end

  def test_recode_utf8
    assert_equal 'UTF-8', FeedParser.recode("áéíóú").encoding.name
  end

  def test_recode_blank
    assert_equal 'UTF-8', FeedParser.recode('').encoding.name
  end

  def test_recode_iso88519
    assert_equal 'UTF-8', FeedParser.recode("áéíóú".encode('iso-8859-1')).encoding.name
  end

  def test_recode_utf8_mixed_with_ASCIIBIT
    recoded = FeedParser.recode("áé\x8Díóú")
    assert_equal'UTF-8', recoded.encoding.name
    assert_equal 'áéíóú', recoded
  end

  def test_recode_unicode_char
    assert_equal "1280×1024", FeedParser.recode("1280×1024")
  end

  def test_almost_valid_iso88591
    input = "Codifica\xE7\xE3o \x96 quase v\xE1lida"
    assert_equal "Codificação  quase válida", FeedParser.recode(input)
  end

  def test_feed_origin
    feed = FeedParser::Feed.new(nil, 'http://foo.com/feed')
    assert_equal "http://foo.com", feed.origin
  end

  def test_item_origin
    feed = FeedParser::Feed.new(nil, 'http://foo.com/feed')
    item = FeedParser::FeedItem.new(nil, feed)
    item.link = '/foo/bar'
    assert_equal 'http://foo.com/foo/bar', item.link
  end

  def test_item_origin_no_link
    item = FeedParser::FeedItem.new(nil, nil)
    assert_nil item.link
  end

  def test_item_no_feed
    item = FeedParser::FeedItem.new(nil, nil)
    item.link = '/foo/bar'
    assert_equal '/foo/bar', item.link
  end

end