File: engine.py

package info (click to toggle)
python-scrapy 0.14.4-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 3,064 kB
  • sloc: python: 19,468; xml: 199; sh: 134; makefile: 67
file content (59 lines) | stat: -rw-r--r-- 2,056 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""Some debugging functions for working with the Scrapy engine"""

from time import time # used in global tests code

def get_engine_status(engine):
    """Return a report of the current engine status"""
    global_tests = [
        "time()-engine.start_time",
        "engine.has_capacity()",
        "engine.downloader.is_idle()",
        "len(engine.downloader.slots)",
        "len(engine.downloader.active)",
        "engine.scraper.is_idle()",
        "len(engine.scraper.slots)",
    ]
    spider_tests = [
        "engine.spider_is_idle(spider)",
        "engine.slots[spider].closing",
        "len(engine.slots[spider].inprogress)",
        "len(engine.slots[spider].scheduler.dqs or [])",
        "len(engine.slots[spider].scheduler.mqs)",
        "len(engine.scraper.slots[spider].queue)",
        "len(engine.scraper.slots[spider].active)",
        "engine.scraper.slots[spider].active_size",
        "engine.scraper.slots[spider].itemproc_size",
        "engine.scraper.slots[spider].needs_backout()",
    ]

    status = {'global': [], 'spiders': {}}
    for test in global_tests:
        try:
            status['global'] += [(test, eval(test))]
        except Exception, e:
            status['global'] += [(test, "%s (exception)" % type(e).__name__)]
    for spider in engine.slots.keys():
        x = []
        for test in spider_tests:
            try:
                x += [(test, eval(test))]
            except Exception, e:
                x += [(test, "%s (exception)" % type(e).__name__)]
            status['spiders'][spider] = x
    return status

def format_engine_status(engine=None):
    status = get_engine_status(engine)
    s = "Execution engine status\n\n"
    for test, result in status['global']:
        s += "%-47s : %s\n" % (test, result)
    s += "\n"
    for spider, tests in status['spiders'].items():
        s += "Spider: %s\n" % spider
        for test, result in tests:
            s += "  %-50s : %s\n" % (test, result)
    return s

def print_engine_status(engine):
    print format_engine_status(engine)