File: __init__.py

package info (click to toggle)
python-scrapy 0.24.2-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 3,240 kB
  • ctags: 4,259
  • sloc: python: 21,170; xml: 199; makefile: 67; sh: 44
file content (71 lines) | stat: -rw-r--r-- 2,747 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import sys
import os
import shutil

from zope.interface.verify import verifyObject
from twisted.trial import unittest


# ugly hack to avoid cyclic imports of scrapy.spider when running this test
# alone
from scrapy.interfaces import ISpiderManager
from scrapy.spidermanager import SpiderManager
from scrapy.http import Request

module_dir = os.path.dirname(os.path.abspath(__file__))

class SpiderManagerTest(unittest.TestCase):

    def setUp(self):
        orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
        self.tmpdir = self.mktemp()
        os.mkdir(self.tmpdir)
        self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
        shutil.copytree(orig_spiders_dir, self.spiders_dir)
        sys.path.append(self.tmpdir)
        self.spiderman = SpiderManager(['test_spiders_xxx'])

    def tearDown(self):
        del self.spiderman
        del sys.modules['test_spiders_xxx']
        sys.path.remove(self.tmpdir)

    def test_interface(self):
        verifyObject(ISpiderManager, self.spiderman)

    def test_list(self):
        self.assertEqual(set(self.spiderman.list()),
            set(['spider1', 'spider2', 'spider3', 'spider4']))

    def test_create(self):
        spider1 = self.spiderman.create("spider1")
        self.assertEqual(spider1.__class__.__name__, 'Spider1')
        spider2 = self.spiderman.create("spider2", foo="bar")
        self.assertEqual(spider2.__class__.__name__, 'Spider2')
        self.assertEqual(spider2.foo, 'bar')

    def test_find_by_request(self):
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy1.org/test')),
            ['spider1'])
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy2.org/test')),
            ['spider2'])
        self.assertEqual(set(self.spiderman.find_by_request(Request('http://scrapy3.org/test'))),
            set(['spider1', 'spider2']))
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy999.org/test')),
            [])
        self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com')),
            [])
        self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com/onlythis')),
            ['spider3'])

    def test_load_spider_module(self):
        self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider1'])
        assert len(self.spiderman._spiders) == 1

    def test_load_base_spider(self):
        self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider0'])
        assert len(self.spiderman._spiders) == 0

    def test_load_from_crawler(self):
        spider = self.spiderman.create('spider4', a='OK')
        self.assertEqual(spider.a, 'OK')