1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
|
"""
Scrapy Web Console extension
See docs/topics/webconsole.rst
"""
import re
import socket
from time import time
from twisted.internet import reactor
from twisted.web import server, resource
from scrapy.xlib.pydispatch import dispatcher
from scrapy.core.exceptions import NotConfigured
from scrapy.core.engine import scrapyengine
from scrapy.conf import settings
# web management signals
webconsole_discover_module = object()
urlpath_re = re.compile(r"^/(\w+)/")
def error404(module):
return """
<html>
<head><title>404 Not Found</title></head>
<body>
<h1>Not found</h1>
<p>Web console module not found: <b>%s</b></p>
<p><a href="/">Back to main menu</a></p>
</body>
</html>
""" % module
def banner(module=None):
s = "<html>\n"
s += "<head><title>Scrapy</title></head>\n"
s += "<body>\n"
s += "<h1><a href='/'>Scrapy web console</a></h1>\n"
uptime = time() - scrapyengine.start_time
s += "<p>Bot: <b>%s</b> | Host: <b>%s</b> | Uptime: <b>%ds</b></p>\n" % \
(settings['BOT_NAME'], socket.gethostname(), uptime)
if module:
s += "<h2><a href='/%s/'>%s</a></h2>\n" % (module.webconsole_id, \
module.webconsole_name)
return s
class WebConsoleResource(resource.Resource):
isLeaf = True
@property
def modules(self):
if not hasattr(self, '_modules'):
self._modules = {}
for _, obj in dispatcher.send(signal=webconsole_discover_module, \
sender=self.__class__):
self._modules[obj.webconsole_id] = obj
return self._modules
def render_GET(self, request):
m = urlpath_re.search(request.path)
if m:
module = m.group(1)
if module in self.modules:
return self.modules[m.group(1)].webconsole_render(request)
else:
request.setResponseCode(404)
return error404(module)
else:
return self.module_list()
render_POST = render_GET
def module_list(self):
s = banner()
s += "<p>Available modules:</p>\n"
s += "<ul>\n"
for name, obj in self.modules.iteritems():
s += "<li><a href='/%s/'>%s</a></li>\n" % (name, obj.webconsole_name)
s += "</ul>\n"
s += "</body>\n"
s += "</html>\n"
return s
class WebConsole(server.Site):
def __init__(self):
if not settings.getbool('WEBCONSOLE_ENABLED'):
raise NotConfigured
logfile = settings['WEBCONSOLE_LOGFILE']
server.Site.__init__(self, WebConsoleResource(), logPath=logfile)
self.noisy = False
port = settings.getint('WEBCONSOLE_PORT')
reactor.callWhenRunning(reactor.listenTCP, port, self)
|