1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
|
from __future__ import annotations
import os
import warnings
from importlib import import_module
from pathlib import Path
from scrapy.exceptions import NotConfigured
from scrapy.settings import Settings
from scrapy.utils.conf import closest_scrapy_cfg, get_config, init_env
ENVVAR = "SCRAPY_SETTINGS_MODULE"
DATADIR_CFG_SECTION = "datadir"
def inside_project() -> bool:
scrapy_module = os.environ.get(ENVVAR)
if scrapy_module:
try:
import_module(scrapy_module)
except ImportError as exc:
warnings.warn(
f"Cannot import scrapy settings module {scrapy_module}: {exc}"
)
else:
return True
return bool(closest_scrapy_cfg())
def project_data_dir(project: str = "default") -> str:
"""Return the current project data dir, creating it if it doesn't exist"""
if not inside_project():
raise NotConfigured("Not inside a project")
cfg = get_config()
if cfg.has_option(DATADIR_CFG_SECTION, project):
d = Path(cfg.get(DATADIR_CFG_SECTION, project))
else:
scrapy_cfg = closest_scrapy_cfg()
if not scrapy_cfg:
raise NotConfigured(
"Unable to find scrapy.cfg file to infer project data dir"
)
d = (Path(scrapy_cfg).parent / ".scrapy").resolve()
if not d.exists():
d.mkdir(parents=True)
return str(d)
def data_path(path: str | os.PathLike[str], createdir: bool = False) -> str:
"""
Return the given path joined with the .scrapy data directory.
If given an absolute path, return it unmodified.
"""
path_obj = Path(path)
if not path_obj.is_absolute():
if inside_project():
path_obj = Path(project_data_dir(), path)
else:
path_obj = Path(".scrapy", path)
if createdir and not path_obj.exists():
path_obj.mkdir(parents=True)
return str(path_obj)
def get_project_settings() -> Settings:
if ENVVAR not in os.environ:
project = os.environ.get("SCRAPY_PROJECT", "default")
init_env(project)
settings = Settings()
settings_module_path = os.environ.get(ENVVAR)
if settings_module_path:
settings.setmodule(settings_module_path, priority="project")
valid_envvars = {
"CHECK",
"PROJECT",
"PYTHON_SHELL",
"SETTINGS_MODULE",
}
scrapy_envvars = {
k[7:]: v
for k, v in os.environ.items()
if k.startswith("SCRAPY_") and k.replace("SCRAPY_", "") in valid_envvars
}
settings.setdict(scrapy_envvars, priority="project")
return settings
|