File: useragent.py

package info (click to toggle)
python-scrapy 0.14.4-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 3,064 kB
  • sloc: python: 19,468; xml: 199; sh: 134; makefile: 67
file content (20 lines) | stat: -rw-r--r-- 617 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
"""Set User-Agent header per spider or use a default value from settings"""

from scrapy.utils.python import WeakKeyCache


class UserAgentMiddleware(object):
    """This middleware allows spiders to override the user_agent"""

    def __init__(self):
        self.cache = WeakKeyCache(self._user_agent)

    def _user_agent(self, spider):
        if hasattr(spider, 'user_agent'):
            return spider.user_agent
        return spider.settings['USER_AGENT']

    def process_request(self, request, spider):
        ua = self.cache[spider]
        if ua:
            request.headers.setdefault('User-Agent', ua)