1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
|
#!/usr/bin/env python3
"""
Check pandas required and optional dependencies are synced across:
ci/deps/actions-.*-minimum_versions.yaml
pandas/compat/_optional.py
setup.cfg
TODO: doc/source/getting_started/install.rst
This is meant to be run as a pre-commit hook - to run it manually, you can do:
pre-commit run validate-min-versions-in-sync --all-files
"""
from __future__ import annotations
import pathlib
import sys
import yaml
if sys.version_info >= (3, 11):
import tomllib
else:
import tomli as tomllib
from typing import Any
from scripts.generate_pip_deps_from_conda import CONDA_TO_PIP
DOC_PATH = pathlib.Path("doc/source/getting_started/install.rst").resolve()
CI_PATH = next(
pathlib.Path("ci/deps").absolute().glob("actions-*-minimum_versions.yaml")
)
CODE_PATH = pathlib.Path("pandas/compat/_optional.py").resolve()
SETUP_PATH = pathlib.Path("pyproject.toml").resolve()
YAML_PATH = pathlib.Path("ci/deps")
ENV_PATH = pathlib.Path("environment.yml")
EXCLUDE_DEPS = {"tzdata", "blosc", "pandas-gbq", "pyqt", "pyqt5"}
EXCLUSION_LIST = frozenset(["python=3.8[build=*_pypy]"])
# pandas package is not available
# in pre-commit environment
sys.path.append("pandas/compat")
sys.path.append("pandas/util")
import _exceptions
import version
sys.modules["pandas.util.version"] = version
sys.modules["pandas.util._exceptions"] = _exceptions
import _optional
def pin_min_versions_to_ci_deps() -> int:
"""
Pin minimum versions to CI dependencies.
Pip dependencies are not pinned.
"""
all_yaml_files = list(YAML_PATH.iterdir())
all_yaml_files.append(ENV_PATH)
toml_dependencies = {}
with open(SETUP_PATH, "rb") as toml_f:
toml_dependencies = tomllib.load(toml_f)
ret = 0
for curr_file in all_yaml_files:
with open(curr_file, encoding="utf-8") as yaml_f:
yaml_start_data = yaml_f.read()
yaml_file = yaml.safe_load(yaml_start_data)
yaml_dependencies = yaml_file["dependencies"]
yaml_map = get_yaml_map_from(yaml_dependencies)
toml_map = get_toml_map_from(toml_dependencies)
yaml_result_data = pin_min_versions_to_yaml_file(
yaml_map, toml_map, yaml_start_data
)
if yaml_result_data != yaml_start_data:
with open(curr_file, "w", encoding="utf-8") as f:
f.write(yaml_result_data)
ret |= 1
return ret
def get_toml_map_from(toml_dic: dict[str, Any]) -> dict[str, str]:
toml_deps = {}
toml_dependencies = set(toml_dic["project"]["optional-dependencies"]["all"])
for dependency in toml_dependencies:
toml_package, toml_version = dependency.strip().split(">=")
toml_deps[toml_package] = toml_version
return toml_deps
def get_operator_from(dependency: str) -> str | None:
if "<=" in dependency:
operator = "<="
elif ">=" in dependency:
operator = ">="
elif "=" in dependency:
operator = "="
elif ">" in dependency:
operator = ">"
elif "<" in dependency:
operator = "<"
else:
operator = None
return operator
def get_yaml_map_from(
yaml_dic: list[str | dict[str, list[str]]]
) -> dict[str, list[str] | None]:
yaml_map: dict[str, list[str] | None] = {}
for dependency in yaml_dic:
if (
isinstance(dependency, dict)
or dependency in EXCLUSION_LIST
or dependency in yaml_map
):
continue
search_text = str(dependency)
operator = get_operator_from(search_text)
if "," in dependency:
yaml_dependency, yaml_version1 = search_text.split(",")
operator = get_operator_from(yaml_dependency)
assert operator is not None
yaml_package, yaml_version2 = yaml_dependency.split(operator)
yaml_version2 = operator + yaml_version2
yaml_map[yaml_package] = [yaml_version1, yaml_version2]
elif "[build=*_pypy]" in dependency:
search_text = search_text.replace("[build=*_pypy]", "")
yaml_package, yaml_version = search_text.split(operator)
yaml_version = operator + yaml_version
yaml_map[yaml_package] = [yaml_version]
elif operator is not None:
yaml_package, yaml_version = search_text.split(operator)
yaml_version = operator + yaml_version
yaml_map[yaml_package] = [yaml_version]
else:
yaml_package, yaml_version = search_text.strip(), None
yaml_map[yaml_package] = yaml_version
return yaml_map
def clean_version_list(
yaml_versions: list[str], toml_version: version.Version
) -> list[str]:
for i in range(len(yaml_versions)):
yaml_version = yaml_versions[i]
operator = get_operator_from(yaml_version)
assert operator is not None
if "<=" in operator or ">=" in operator:
yaml_version = yaml_version[2:]
else:
yaml_version = yaml_version[1:]
yaml_version = version.parse(yaml_version)
if yaml_version < toml_version:
yaml_versions[i] = "-" + str(yaml_version)
elif yaml_version >= toml_version:
if ">" in operator:
yaml_versions[i] = "-" + str(yaml_version)
return yaml_versions
def pin_min_versions_to_yaml_file(
yaml_map: dict[str, list[str] | None], toml_map: dict[str, str], yaml_file_data: str
) -> str:
data = yaml_file_data
for yaml_package, yaml_versions in yaml_map.items():
if yaml_package in EXCLUSION_LIST:
continue
old_dep = yaml_package
if yaml_versions is not None:
old_dep = old_dep + ", ".join(yaml_versions)
if CONDA_TO_PIP.get(yaml_package, yaml_package) in toml_map:
min_dep = toml_map[CONDA_TO_PIP.get(yaml_package, yaml_package)]
elif yaml_package in toml_map:
min_dep = toml_map[yaml_package]
else:
continue
if yaml_versions is None:
new_dep = old_dep + ">=" + min_dep
data = data.replace(old_dep, new_dep, 1)
continue
toml_version = version.parse(min_dep)
yaml_versions_list = clean_version_list(yaml_versions, toml_version)
cleaned_yaml_versions = [x for x in yaml_versions_list if "-" not in x]
new_dep = yaml_package
for clean_yaml_version in cleaned_yaml_versions:
new_dep += clean_yaml_version + ", "
operator = get_operator_from(new_dep)
if operator != "=":
new_dep += ">=" + min_dep
else:
new_dep = new_dep[:-2]
data = data.replace(old_dep, new_dep)
return data
def get_versions_from_code() -> dict[str, str]:
"""Min versions for checking within pandas code."""
install_map = _optional.INSTALL_MAPPING
inverse_install_map = {v: k for k, v in install_map.items()}
versions = _optional.VERSIONS
for item in EXCLUDE_DEPS:
item = inverse_install_map.get(item, item)
versions.pop(item, None)
return {install_map.get(k, k).casefold(): v for k, v in versions.items()}
def get_versions_from_ci(content: list[str]) -> tuple[dict[str, str], dict[str, str]]:
"""Min versions in CI job for testing all optional dependencies."""
# Don't parse with pyyaml because it ignores comments we're looking for
seen_required = False
seen_optional = False
seen_test = False
required_deps = {}
optional_deps = {}
for line in content:
if "# test dependencies" in line:
seen_test = True
elif seen_test and "- pytest>=" in line:
# Only grab pytest
package, version = line.strip().split(">=")
package = package[2:]
optional_deps[package.casefold()] = version
elif "# required dependencies" in line:
seen_required = True
elif "# optional dependencies" in line:
seen_optional = True
elif "- pip:" in line:
continue
elif seen_required and line.strip():
if "==" in line:
package, version = line.strip().split("==", maxsplit=1)
else:
package, version = line.strip().split("=", maxsplit=1)
package = package.split()[-1]
if package in EXCLUDE_DEPS:
continue
if not seen_optional:
required_deps[package.casefold()] = version
else:
optional_deps[package.casefold()] = version
return required_deps, optional_deps
def get_versions_from_toml() -> dict[str, str]:
"""Min versions in pyproject.toml for pip install pandas[extra]."""
install_map = _optional.INSTALL_MAPPING
optional_dependencies = {}
with open(SETUP_PATH, "rb") as pyproject_f:
pyproject_toml = tomllib.load(pyproject_f)
opt_deps = pyproject_toml["project"]["optional-dependencies"]
dependencies = set(opt_deps["all"])
# remove pytest plugin dependencies
pytest_plugins = {dep for dep in opt_deps["test"] if dep.startswith("pytest-")}
dependencies = dependencies.difference(pytest_plugins)
for dependency in dependencies:
package, version = dependency.strip().split(">=")
optional_dependencies[install_map.get(package, package).casefold()] = version
for item in EXCLUDE_DEPS:
optional_dependencies.pop(item, None)
return optional_dependencies
def main() -> int:
ret = 0
ret |= pin_min_versions_to_ci_deps()
with open(CI_PATH, encoding="utf-8") as f:
_, ci_optional = get_versions_from_ci(f.readlines())
code_optional = get_versions_from_code()
setup_optional = get_versions_from_toml()
diff = (ci_optional.items() | code_optional.items() | setup_optional.items()) - (
ci_optional.items() & code_optional.items() & setup_optional.items()
)
if diff:
packages = {package for package, _ in diff}
out = sys.stdout
out.write(
f"The follow minimum version differences were found between "
f"{CI_PATH}, {CODE_PATH} AND {SETUP_PATH}. "
f"Please ensure these are aligned: \n\n"
)
for package in packages:
out.write(
f"{package}\n"
f"{CI_PATH}: {ci_optional.get(package, 'Not specified')}\n"
f"{CODE_PATH}: {code_optional.get(package, 'Not specified')}\n"
f"{SETUP_PATH}: {setup_optional.get(package, 'Not specified')}\n\n"
)
ret |= 1
return ret
if __name__ == "__main__":
sys.exit(main())
|