File: test_downloadermiddleware_ajaxcrawlable.py

package info (click to toggle)
python-scrapy 2.13.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 5,664 kB
  • sloc: python: 52,028; xml: 199; makefile: 25; sh: 7
file content (62 lines) | stat: -rw-r--r-- 2,502 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import pytest

from scrapy.downloadermiddlewares.ajaxcrawl import AjaxCrawlMiddleware
from scrapy.http import HtmlResponse, Request, Response
from scrapy.spiders import Spider
from scrapy.utils.test import get_crawler


@pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
class TestAjaxCrawlMiddleware:
    def setup_method(self):
        crawler = get_crawler(Spider, {"AJAXCRAWL_ENABLED": True})
        self.spider = crawler._create_spider("foo")
        self.mw = AjaxCrawlMiddleware.from_crawler(crawler)

    def _ajaxcrawlable_body(self):
        return b'<html><head><meta name="fragment" content="!"/></head><body></body></html>'

    def _req_resp(self, url, req_kwargs=None, resp_kwargs=None):
        req = Request(url, **(req_kwargs or {}))
        resp = HtmlResponse(url, request=req, **(resp_kwargs or {}))
        return req, resp

    def test_non_get(self):
        req, resp = self._req_resp("http://example.com/", {"method": "HEAD"})
        resp2 = self.mw.process_response(req, resp, self.spider)
        assert resp == resp2

    def test_binary_response(self):
        req = Request("http://example.com/")
        resp = Response("http://example.com/", body=b"foobar\x00\x01\x02", request=req)
        resp2 = self.mw.process_response(req, resp, self.spider)
        assert resp is resp2

    def test_ajaxcrawl(self):
        req, resp = self._req_resp(
            "http://example.com/",
            {"meta": {"foo": "bar"}},
            {"body": self._ajaxcrawlable_body()},
        )
        req2 = self.mw.process_response(req, resp, self.spider)
        assert req2.url == "http://example.com/?_escaped_fragment_="
        assert req2.meta["foo"] == "bar"

    def test_ajaxcrawl_loop(self):
        req, resp = self._req_resp(
            "http://example.com/", {}, {"body": self._ajaxcrawlable_body()}
        )
        req2 = self.mw.process_response(req, resp, self.spider)
        resp2 = HtmlResponse(req2.url, body=resp.body, request=req2)
        resp3 = self.mw.process_response(req2, resp2, self.spider)

        assert isinstance(resp3, HtmlResponse), (resp3.__class__, resp3)
        assert resp3.request.url == "http://example.com/?_escaped_fragment_="
        assert resp3 is resp2

    def test_noncrawlable_body(self):
        req, resp = self._req_resp(
            "http://example.com/", {}, {"body": b"<html></html>"}
        )
        resp2 = self.mw.process_response(req, resp, self.spider)
        assert resp is resp2