1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
from __future__ import annotations
import re
import string
from importlib.util import find_spec
from pathlib import Path
from shutil import copy2, copystat, ignore_patterns, move
from stat import S_IWUSR as OWNER_WRITE_PERMISSION
from typing import TYPE_CHECKING
import scrapy
from scrapy.commands import ScrapyCommand
from scrapy.exceptions import UsageError
from scrapy.utils.template import render_templatefile, string_camelcase
if TYPE_CHECKING:
import argparse
TEMPLATES_TO_RENDER: tuple[tuple[str, ...], ...] = (
("scrapy.cfg",),
("${project_name}", "settings.py.tmpl"),
("${project_name}", "items.py.tmpl"),
("${project_name}", "pipelines.py.tmpl"),
("${project_name}", "middlewares.py.tmpl"),
)
IGNORE = ignore_patterns("*.pyc", "__pycache__", ".svn")
def _make_writable(path: Path) -> None:
current_permissions = path.stat().st_mode
path.chmod(current_permissions | OWNER_WRITE_PERMISSION)
class Command(ScrapyCommand):
requires_project = False
default_settings = {"LOG_ENABLED": False, "SPIDER_LOADER_WARN_ONLY": True}
def syntax(self) -> str:
return "<project_name> [project_dir]"
def short_desc(self) -> str:
return "Create new project"
def _is_valid_name(self, project_name: str) -> bool:
def _module_exists(module_name: str) -> bool:
spec = find_spec(module_name)
return spec is not None and spec.loader is not None
if not re.search(r"^[_a-zA-Z]\w*$", project_name):
print(
"Error: Project names must begin with a letter and contain"
" only\nletters, numbers and underscores"
)
elif _module_exists(project_name):
print(f"Error: Module {project_name!r} already exists")
else:
return True
return False
def _copytree(self, src: Path, dst: Path) -> None:
"""
Since the original function always creates the directory, to resolve
the issue a new function had to be created. It's a simple copy and
was reduced for this case.
More info at:
https://github.com/scrapy/scrapy/pull/2005
"""
ignore = IGNORE
names = [x.name for x in src.iterdir()]
ignored_names = ignore(src, names)
if not dst.exists():
dst.mkdir(parents=True)
for name in names:
if name in ignored_names:
continue
srcname = src / name
dstname = dst / name
if srcname.is_dir():
self._copytree(srcname, dstname)
else:
copy2(srcname, dstname)
_make_writable(dstname)
copystat(src, dst)
_make_writable(dst)
def run(self, args: list[str], opts: argparse.Namespace) -> None:
if len(args) not in (1, 2):
raise UsageError
project_name = args[0]
project_dir = Path(args[-1])
if (project_dir / "scrapy.cfg").exists():
self.exitcode = 1
print(f"Error: scrapy.cfg already exists in {project_dir.resolve()}")
return
if not self._is_valid_name(project_name):
self.exitcode = 1
return
self._copytree(Path(self.templates_dir), project_dir.resolve())
move(project_dir / "module", project_dir / project_name)
for paths in TEMPLATES_TO_RENDER:
tplfile = Path(
project_dir,
*(
string.Template(s).substitute(project_name=project_name)
for s in paths
),
)
render_templatefile(
tplfile,
project_name=project_name,
ProjectName=string_camelcase(project_name),
)
print(
f"New Scrapy project '{project_name}', using template directory "
f"'{self.templates_dir}', created in:"
)
print(f" {project_dir.resolve()}\n")
print("You can start your first spider with:")
print(f" cd {project_dir}")
print(" scrapy genspider example example.com")
@property
def templates_dir(self) -> str:
return str(
Path(
self.settings["TEMPLATES_DIR"] or Path(scrapy.__path__[0], "templates"),
"project",
)
)
|