File: use_io_common_urlopen.py

package info (click to toggle)
pandas 2.2.3%2Bdfsg-9
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 66,784 kB
  • sloc: python: 422,228; ansic: 9,190; sh: 270; xml: 102; makefile: 83
file content (67 lines) | stat: -rw-r--r-- 1,743 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
Check that pandas/core imports pandas.array as pd_array.

This makes it easier to grep for usage of pandas array.

This is meant to be run as a pre-commit hook - to run it manually, you can do:

    pre-commit run use-io-common-urlopen --all-files

"""

from __future__ import annotations

import argparse
import ast
import sys
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from collections.abc import Sequence


ERROR_MESSAGE = (
    "{path}:{lineno}:{col_offset}: "
    "Don't use urllib.request.urlopen, use pandas.io.common.urlopen instead\n"
)


class Visitor(ast.NodeVisitor):
    def __init__(self, path: str) -> None:
        self.path = path

    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
        # Check that pandas.io.common.urlopen is used instead of
        # urllib.request.urlopen
        if (
            node.module is not None
            and node.module.startswith("urllib.request")
            and any(i.name == "urlopen" for i in node.names)
        ):
            msg = ERROR_MESSAGE.format(
                path=self.path, lineno=node.lineno, col_offset=node.col_offset
            )
            sys.stdout.write(msg)
            sys.exit(1)
        super().generic_visit(node)


def use_io_common_urlopen(content: str, path: str) -> None:
    tree = ast.parse(content)
    visitor = Visitor(path)
    visitor.visit(tree)


def main(argv: Sequence[str] | None = None) -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("paths", nargs="*")
    args = parser.parse_args(argv)

    for path in args.paths:
        with open(path, encoding="utf-8") as fd:
            content = fd.read()
        use_io_common_urlopen(content, path)


if __name__ == "__main__":
    main()