File: test_scrub.py

package info (click to toggle)
planet-venus 0~git9de2109-4.2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 3,884 kB
  • sloc: python: 4,393; xml: 871; makefile: 39; sed: 3; sh: 2
file content (124 lines) | stat: -rw-r--r-- 4,464 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python

import unittest, StringIO, time
from copy import deepcopy
from planet.scrub import scrub
from planet import feedparser, config

feed = '''
<feed xmlns='http://www.w3.org/2005/Atom' xml:base="http://example.com/">
  <author><name>F&amp;ouml;o</name></author>
  <entry xml:lang="en">
    <id>ignoreme</id>
    <author><name>F&amp;ouml;o</name></author>
    <updated>%d-12-31T23:59:59Z</updated>
    <title>F&amp;ouml;o</title>
    <summary>F&amp;ouml;o</summary>
    <content>F&amp;ouml;o</content>
    <link href="http://example.com/entry/1/"/>
    <source>
      <link href="http://example.com/feed/"/>
      <author><name>F&amp;ouml;o</name></author>
    </source>
  </entry>
</feed>
''' % (time.gmtime()[0] + 1)

configData = '''
[testfeed]
ignore_in_feed = 
future_dates = 

name_type = html
title_type = html
summary_type = html
content_type = html
'''

class ScrubTest(unittest.TestCase):

    def test_scrub_ignore(self):
        base = feedparser.parse(feed)

        self.assertTrue(base.entries[0].has_key('author'))
        self.assertTrue(base.entries[0].has_key('author_detail'))
        self.assertTrue(base.entries[0].has_key('id'))
        self.assertTrue(base.entries[0].has_key('updated'))
        self.assertTrue(base.entries[0].has_key('updated_parsed'))
        self.assertTrue(base.entries[0].summary_detail.has_key('language'))

        config.parser.readfp(StringIO.StringIO(configData))
        config.parser.set('testfeed', 'ignore_in_feed',
          'author id updated xml:lang')
        data = deepcopy(base)
        scrub('testfeed', data)

        self.assertFalse(data.entries[0].has_key('author'))
        self.assertFalse(data.entries[0].has_key('author_detail'))
        self.assertFalse(data.entries[0].has_key('id'))
        self.assertFalse(data.entries[0].has_key('updated'))
        self.assertFalse(data.entries[0].has_key('updated_parsed'))
        self.assertFalse(data.entries[0].summary_detail.has_key('language'))

    def test_scrub_type(self):
        base = feedparser.parse(feed)

        self.assertEqual('F&ouml;o', base.feed.author_detail.name)

        config.parser.readfp(StringIO.StringIO(configData))
        data = deepcopy(base)
        scrub('testfeed', data)

        self.assertEqual('F\xc3\xb6o', data.feed.author_detail.name)
        self.assertEqual('F\xc3\xb6o', data.entries[0].author_detail.name)
        self.assertEqual('F\xc3\xb6o', data.entries[0].source.author_detail.name)

        self.assertEqual('text/html', data.entries[0].title_detail.type)
        self.assertEqual('text/html', data.entries[0].summary_detail.type)
        self.assertEqual('text/html', data.entries[0].content[0].type)

    def test_scrub_future(self):
        base = feedparser.parse(feed)
        self.assertEqual(1, len(base.entries))
        self.assertTrue(base.entries[0].has_key('updated'))

        config.parser.readfp(StringIO.StringIO(configData))
        config.parser.set('testfeed', 'future_dates', 'ignore_date')
        data = deepcopy(base)
        scrub('testfeed', data)
        self.assertFalse(data.entries[0].has_key('updated'))

        config.parser.set('testfeed', 'future_dates', 'ignore_entry')
        data = deepcopy(base)
        scrub('testfeed', data)
        self.assertEqual(0, len(data.entries))

    def test_scrub_xmlbase(self):
        base = feedparser.parse(feed)
        self.assertEqual('http://example.com/',
             base.entries[0].title_detail.base)

        config.parser.readfp(StringIO.StringIO(configData))
        config.parser.set('testfeed', 'xml_base', 'feed_alternate')
        data = deepcopy(base)
        scrub('testfeed', data)
        self.assertEqual('http://example.com/feed/',
             data.entries[0].title_detail.base)

        config.parser.set('testfeed', 'xml_base', 'entry_alternate')
        data = deepcopy(base)
        scrub('testfeed', data)
        self.assertEqual('http://example.com/entry/1/',
             data.entries[0].title_detail.base)

        config.parser.set('testfeed', 'xml_base', 'base/')
        data = deepcopy(base)
        scrub('testfeed', data)
        self.assertEqual('http://example.com/base/',
             data.entries[0].title_detail.base)

        config.parser.set('testfeed', 'xml_base', 'http://example.org/data/')
        data = deepcopy(base)
        scrub('testfeed', data)
        self.assertEqual('http://example.org/data/',
             data.entries[0].title_detail.base)