1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
|
from __future__ import print_function
import re
import os
import string
from importlib import import_module
from os.path import join, exists, abspath
from shutil import ignore_patterns, move, copy2, copystat
import scrapy
from scrapy.commands import ScrapyCommand
from scrapy.utils.template import render_templatefile, string_camelcase
from scrapy.exceptions import UsageError
TEMPLATES_TO_RENDER = (
('scrapy.cfg',),
('${project_name}', 'settings.py.tmpl'),
('${project_name}', 'items.py.tmpl'),
('${project_name}', 'pipelines.py.tmpl'),
('${project_name}', 'middlewares.py.tmpl'),
)
IGNORE = ignore_patterns('*.pyc', '.svn')
class Command(ScrapyCommand):
requires_project = False
default_settings = {'LOG_ENABLED': False,
'SPIDER_LOADER_WARN_ONLY': True}
def syntax(self):
return "<project_name> [project_dir]"
def short_desc(self):
return "Create new project"
def _is_valid_name(self, project_name):
def _module_exists(module_name):
try:
import_module(module_name)
return True
except ImportError:
return False
if not re.search(r'^[_a-zA-Z]\w*$', project_name):
print('Error: Project names must begin with a letter and contain'\
' only\nletters, numbers and underscores')
elif _module_exists(project_name):
print('Error: Module %r already exists' % project_name)
else:
return True
return False
def _copytree(self, src, dst):
"""
Since the original function always creates the directory, to resolve
the issue a new function had to be created. It's a simple copy and
was reduced for this case.
More info at:
https://github.com/scrapy/scrapy/pull/2005
"""
ignore = IGNORE
names = os.listdir(src)
ignored_names = ignore(src, names)
if not os.path.exists(dst):
os.makedirs(dst)
for name in names:
if name in ignored_names:
continue
srcname = os.path.join(src, name)
dstname = os.path.join(dst, name)
if os.path.isdir(srcname):
self._copytree(srcname, dstname)
else:
copy2(srcname, dstname)
copystat(src, dst)
def run(self, args, opts):
if len(args) not in (1, 2):
raise UsageError()
project_name = args[0]
project_dir = args[0]
if len(args) == 2:
project_dir = args[1]
if exists(join(project_dir, 'scrapy.cfg')):
self.exitcode = 1
print('Error: scrapy.cfg already exists in %s' % abspath(project_dir))
return
if not self._is_valid_name(project_name):
self.exitcode = 1
return
self._copytree(self.templates_dir, abspath(project_dir))
move(join(project_dir, 'module'), join(project_dir, project_name))
for paths in TEMPLATES_TO_RENDER:
path = join(*paths)
tplfile = join(project_dir,
string.Template(path).substitute(project_name=project_name))
render_templatefile(tplfile, project_name=project_name,
ProjectName=string_camelcase(project_name))
print("New Scrapy project %r, using template directory %r, created in:" % \
(project_name, self.templates_dir))
print(" %s\n" % abspath(project_dir))
print("You can start your first spider with:")
print(" cd %s" % project_dir)
print(" scrapy genspider example example.com")
@property
def templates_dir(self):
_templates_base_dir = self.settings['TEMPLATES_DIR'] or \
join(scrapy.__path__[0], 'templates')
return join(_templates_base_dir, 'project')
|