File: corestats.py

package info (click to toggle)
python-scrapy 0.14.4-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 3,064 kB
  • sloc: python: 19,468; xml: 199; sh: 134; makefile: 67
file content (32 lines) | stat: -rw-r--r-- 1,259 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
"""
Extension for collecting core stats like items scraped and start/finish times
"""
import datetime

from scrapy.xlib.pydispatch import dispatcher

from scrapy import signals
from scrapy.stats import stats

class CoreStats(object):

    def __init__(self):
        dispatcher.connect(self.stats_spider_opened, signal=signals.stats_spider_opened)
        dispatcher.connect(self.stats_spider_closing, signal=signals.stats_spider_closing)
        dispatcher.connect(self.item_scraped, signal=signals.item_scraped)
        dispatcher.connect(self.item_dropped, signal=signals.item_dropped)

    def stats_spider_opened(self, spider):
        stats.set_value('start_time', datetime.datetime.utcnow(), spider=spider)

    def stats_spider_closing(self, spider, reason):
        stats.set_value('finish_time', datetime.datetime.utcnow(), spider=spider)
        stats.set_value('finish_reason', reason, spider=spider)

    def item_scraped(self, item, spider):
        stats.inc_value('item_scraped_count', spider=spider)

    def item_dropped(self, item, spider, exception):
        reason = exception.__class__.__name__
        stats.inc_value('item_dropped_count', spider=spider)
        stats.inc_value('item_dropped_reasons_count/%s' % reason, spider=spider)