1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
|
import unittest
import six
from scrapy.spiders import Spider
from scrapy.http import Request, Response
from scrapy.item import Item, Field
from scrapy.logformatter import LogFormatter
class CustomItem(Item):
name = Field()
def __str__(self):
return "name: %s" % self['name']
class LoggingContribTest(unittest.TestCase):
def setUp(self):
self.formatter = LogFormatter()
self.spider = Spider('default')
def test_crawled(self):
req = Request("http://www.example.com")
res = Response("http://www.example.com")
logkws = self.formatter.crawled(req, res, self.spider)
logline = logkws['msg'] % logkws['args']
self.assertEqual(logline,
"Crawled (200) <GET http://www.example.com> (referer: None)")
req = Request("http://www.example.com", headers={'referer': 'http://example.com'})
res = Response("http://www.example.com", flags=['cached'])
logkws = self.formatter.crawled(req, res, self.spider)
logline = logkws['msg'] % logkws['args']
self.assertEqual(logline,
"Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")
def test_flags_in_request(self):
req = Request("http://www.example.com", flags=['test','flag'])
res = Response("http://www.example.com")
logkws = self.formatter.crawled(req, res, self.spider)
logline = logkws['msg'] % logkws['args']
self.assertEqual(logline,
"Crawled (200) <GET http://www.example.com> ['test', 'flag'] (referer: None)")
def test_dropped(self):
item = {}
exception = Exception(u"\u2018")
response = Response("http://www.example.com")
logkws = self.formatter.dropped(item, exception, response, self.spider)
logline = logkws['msg'] % logkws['args']
lines = logline.splitlines()
assert all(isinstance(x, six.text_type) for x in lines)
self.assertEqual(lines, [u"Dropped: \u2018", '{}'])
def test_scraped(self):
item = CustomItem()
item['name'] = u'\xa3'
response = Response("http://www.example.com")
logkws = self.formatter.scraped(item, response, self.spider)
logline = logkws['msg'] % logkws['args']
lines = logline.splitlines()
assert all(isinstance(x, six.text_type) for x in lines)
self.assertEqual(lines, [u"Scraped from <200 http://www.example.com>", u'name: \xa3'])
class LogFormatterSubclass(LogFormatter):
def crawled(self, request, response, spider):
kwargs = super(LogFormatterSubclass, self).crawled(
request, response, spider)
CRAWLEDMSG = (
u"Crawled (%(status)s) %(request)s (referer: "
u"%(referer)s)%(flags)s"
)
return {
'level': kwargs['level'],
'msg': CRAWLEDMSG,
'args': kwargs['args']
}
class LogformatterSubclassTest(LoggingContribTest):
def setUp(self):
self.formatter = LogFormatterSubclass()
self.spider = Spider('default')
def test_flags_in_request(self):
pass
if __name__ == "__main__":
unittest.main()
|