File: test_spidermiddleware.py

package info (click to toggle)
python-scrapy 2.4.1-2%2Bdeb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 4,748 kB
  • sloc: python: 32,888; xml: 199; makefile: 90; sh: 7
file content (103 lines) | stat: -rw-r--r-- 3,937 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from unittest import mock

from twisted.trial.unittest import TestCase
from twisted.python.failure import Failure

from scrapy.spiders import Spider
from scrapy.http import Request, Response
from scrapy.exceptions import _InvalidOutput
from scrapy.utils.test import get_crawler
from scrapy.core.spidermw import SpiderMiddlewareManager


class SpiderMiddlewareTestCase(TestCase):

    def setUp(self):
        self.request = Request('http://example.com/index.html')
        self.response = Response(self.request.url, request=self.request)
        self.crawler = get_crawler(Spider)
        self.spider = self.crawler._create_spider('foo')
        self.mwman = SpiderMiddlewareManager.from_crawler(self.crawler)

    def _scrape_response(self):
        """Execute spider mw manager's scrape_response method and return the result.
        Raise exception in case of failure.
        """
        scrape_func = mock.MagicMock()
        dfd = self.mwman.scrape_response(scrape_func, self.response, self.request, self.spider)
        # catch deferred result and return the value
        results = []
        dfd.addBoth(results.append)
        self._wait(dfd)
        ret = results[0]
        return ret


class ProcessSpiderInputInvalidOutput(SpiderMiddlewareTestCase):
    """Invalid return value for process_spider_input method"""

    def test_invalid_process_spider_input(self):

        class InvalidProcessSpiderInputMiddleware:
            def process_spider_input(self, response, spider):
                return 1

        self.mwman._add_middleware(InvalidProcessSpiderInputMiddleware())
        result = self._scrape_response()
        self.assertIsInstance(result, Failure)
        self.assertIsInstance(result.value, _InvalidOutput)


class ProcessSpiderOutputInvalidOutput(SpiderMiddlewareTestCase):
    """Invalid return value for process_spider_output method"""

    def test_invalid_process_spider_output(self):

        class InvalidProcessSpiderOutputMiddleware:
            def process_spider_output(self, response, result, spider):
                return 1

        self.mwman._add_middleware(InvalidProcessSpiderOutputMiddleware())
        result = self._scrape_response()
        self.assertIsInstance(result, Failure)
        self.assertIsInstance(result.value, _InvalidOutput)


class ProcessSpiderExceptionInvalidOutput(SpiderMiddlewareTestCase):
    """Invalid return value for process_spider_exception method"""

    def test_invalid_process_spider_exception(self):

        class InvalidProcessSpiderOutputExceptionMiddleware:
            def process_spider_exception(self, response, exception, spider):
                return 1

        class RaiseExceptionProcessSpiderOutputMiddleware:
            def process_spider_output(self, response, result, spider):
                raise Exception()

        self.mwman._add_middleware(InvalidProcessSpiderOutputExceptionMiddleware())
        self.mwman._add_middleware(RaiseExceptionProcessSpiderOutputMiddleware())
        result = self._scrape_response()
        self.assertIsInstance(result, Failure)
        self.assertIsInstance(result.value, _InvalidOutput)


class ProcessSpiderExceptionReRaise(SpiderMiddlewareTestCase):
    """Re raise the exception by returning None"""

    def test_process_spider_exception_return_none(self):

        class ProcessSpiderExceptionReturnNoneMiddleware:
            def process_spider_exception(self, response, exception, spider):
                return None

        class RaiseExceptionProcessSpiderOutputMiddleware:
            def process_spider_output(self, response, result, spider):
                1 / 0

        self.mwman._add_middleware(ProcessSpiderExceptionReturnNoneMiddleware())
        self.mwman._add_middleware(RaiseExceptionProcessSpiderOutputMiddleware())
        result = self._scrape_response()
        self.assertIsInstance(result, Failure)
        self.assertIsInstance(result.value, ZeroDivisionError)