1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
|
from scrapy.http import (
Headers,
HtmlResponse,
JsonResponse,
Response,
TextResponse,
XmlResponse,
)
from scrapy.responsetypes import responsetypes
class TestResponseTypes:
def test_from_filename(self):
mappings = [
("data.bin", Response),
("file.txt", TextResponse),
("file.xml.gz", Response),
("file.xml", XmlResponse),
("file.html", HtmlResponse),
("file.unknownext", Response),
]
for source, cls in mappings:
retcls = responsetypes.from_filename(source)
assert retcls is cls, f"{source} ==> {retcls} != {cls}"
def test_from_content_disposition(self):
mappings = [
(b'attachment; filename="data.xml"', XmlResponse),
(b"attachment; filename=data.xml", XmlResponse),
("attachment;filename=data£.tar.gz".encode(), Response),
("attachment;filename=dataµ.tar.gz".encode("latin-1"), Response),
("attachment;filename=data高.doc".encode("gbk"), Response),
("attachment;filename=دورهdata.html".encode("cp720"), HtmlResponse),
(
"attachment;filename=日本語版Wikipedia.xml".encode("iso2022_jp"),
XmlResponse,
),
]
for source, cls in mappings:
retcls = responsetypes.from_content_disposition(source)
assert retcls is cls, f"{source} ==> {retcls} != {cls}"
def test_from_content_type(self):
mappings = [
("text/html; charset=UTF-8", HtmlResponse),
("text/xml; charset=UTF-8", XmlResponse),
("application/xhtml+xml; charset=UTF-8", HtmlResponse),
("application/vnd.wap.xhtml+xml; charset=utf-8", HtmlResponse),
("application/xml; charset=UTF-8", XmlResponse),
("application/octet-stream", Response),
("application/json; encoding=UTF8;charset=UTF-8", JsonResponse),
("application/x-json; encoding=UTF8;charset=UTF-8", JsonResponse),
("application/json-amazonui-streaming;charset=UTF-8", JsonResponse),
(b"application/x-download; filename=\x80dummy.txt", Response),
]
for source, cls in mappings:
retcls = responsetypes.from_content_type(source)
assert retcls is cls, f"{source} ==> {retcls} != {cls}"
def test_from_body(self):
mappings = [
(b"\x03\x02\xdf\xdd\x23", Response),
(b"Some plain text\ndata with tabs\t and null bytes\0", TextResponse),
(b"<html><head><title>Hello</title></head>", HtmlResponse),
# https://codersblock.com/blog/the-smallest-valid-html5-page/
(b"<!DOCTYPE html>\n<title>.</title>", HtmlResponse),
(b'<?xml version="1.0" encoding="utf-8"', XmlResponse),
]
for source, cls in mappings:
retcls = responsetypes.from_body(source)
assert retcls is cls, f"{source} ==> {retcls} != {cls}"
def test_from_headers(self):
mappings = [
({"Content-Type": ["text/html; charset=utf-8"]}, HtmlResponse),
(
{
"Content-Type": ["text/html; charset=utf-8"],
"Content-Encoding": ["gzip"],
},
Response,
),
(
{
"Content-Type": ["application/octet-stream"],
"Content-Disposition": ["attachment; filename=data.txt"],
},
TextResponse,
),
]
for source, cls in mappings:
source = Headers(source)
retcls = responsetypes.from_headers(source)
assert retcls is cls, f"{source} ==> {retcls} != {cls}"
def test_from_args(self):
# TODO: add more tests that check precedence between the different arguments
mappings = [
({"url": "http://www.example.com/data.csv"}, TextResponse),
# headers takes precedence over url
(
{
"headers": Headers({"Content-Type": ["text/html; charset=utf-8"]}),
"url": "http://www.example.com/item/",
},
HtmlResponse,
),
(
{
"headers": Headers(
{"Content-Disposition": ['attachment; filename="data.xml.gz"']}
),
"url": "http://www.example.com/page/",
},
Response,
),
]
for source, cls in mappings:
retcls = responsetypes.from_args(**source)
assert retcls is cls, f"{source} ==> {retcls} != {cls}"
def test_custom_mime_types_loaded(self):
# check that mime.types files shipped with scrapy are loaded
assert responsetypes.mimetypes.guess_type("x.scrapytest")[0] == "x-scrapy/test"
|