File: spiderscheduler.py

package info (click to toggle)
python-scrapy 0.8-3
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 2,904 kB
  • ctags: 2,981
  • sloc: python: 15,349; xml: 199; makefile: 68; sql: 64; sh: 34
file content (37 lines) | stat: -rw-r--r-- 1,027 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
"""
The Spider Scheduler keeps track of next spiders to scrape. They must implement
the following methods:

* next_spider()
  return next spider to scrape and remove it from pending queue

* add_spider(spider)
  add spider to pending queue

* remove_pending_spider(spider)
  remove (all occurrences) of spider from pending queue, do nothing if not
  pending

* has_pending_spider(spider)
  Return ``True`` if the spider is pending to scrape, ``False`` otherwise

"""

class FifoSpiderScheduler(object):
    """Basic spider scheduler based on a FIFO queue"""

    def __init__(self):
        self._pending_spiders = []

    def next_spider(self) :
        if self._pending_spiders:
            return self._pending_spiders.pop(0)

    def add_spider(self, spider):
        self._pending_spiders.append(spider)

    def remove_pending_spider(self, spider):
        self._pending_spiders = [d for d in self._pending_spiders if d != spider]

    def has_pending_spider(self, spider):
        return spider in self._pending_spiders