File: utilpathremove.py

package info (click to toggle)
python-beartype 0.22.9-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 9,504 kB
  • sloc: python: 85,502; sh: 328; makefile: 30; javascript: 18
file content (186 lines) | stat: -rw-r--r-- 8,335 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env python3
# --------------------( LICENSE                            )--------------------
# Copyright (c) 2014-2025 Beartype authors.
# See "LICENSE" for further details.

'''
Project-wide **path removers** (i.e., low-level callables permanently removing
on-disk files and directories in various reasonably safe and portable ways).

This private submodule is *not* intended for importation by downstream callers.
'''

# ....................{ IMPORTS                            }....................
# from beartype.roar._roarexc import _BeartypeUtilPathException
from beartype._data.typing.datatyping import (
    PathnameLike,
    PathnameLikeTuple,
)
from importlib.machinery import BYTECODE_SUFFIXES
from pathlib import Path

# ....................{ REMOVERS                           }....................
#FIXME: Unit test us up, please.
def remove_package_bytecode_files(package_dirname: PathnameLike) -> None:
    '''
    Permanently, silently, and recursively remove all **bytecode files** (i.e.,
    pure-Python bytecode compiled to platform-dependent temporary files residing
    in temporary ``__pycache__/`` subdirectories) of both the passed package and
    all subpackages of that package regardless of nesting depth.

    Usage
    -----
    This function is typically intended for usage in our test suite. Unit tests
    exercising :mod:`beartype` functionality that dynamically modifies the
    contents of bytecode files guarantee idempotency (i.e., reproducibility) by
    calling this function *before* exercising that functionality. Examples
    include :mod:`beartype.claw` import hooks that dynamically transform the
    abstract syntax trees (ASTs) of sample modules embedded in our test suite
    *before* permanently serializing (i.e., saving, writing) those changes back
    to disk within those bytecode files. Preventing desynchronization between
    the frequently changing implementations of those import hooks and those
    bytecode files requires calling this function beforehand.

    Caveats
    -------
    **This function is subject to subtle race conditions if multiple threads
    and/or processes concurrently attempt to mutate this package on the local
    filesystem.** Since *all* filesystem-centric logic suffers similar issues,
    we leave this issue as an exercise for the caller.

    Parameters
    ----------
    package_dirname : PathnameLike
        Absolute dirname of the package to remove all previously compiled
        bytecode files from.
    '''
    assert isinstance(package_dirname, PathnameLikeTuple), (
        f'{repr(package_dirname)} neither string nor "Path" object.')

    # Avoid circular import dependencies.
    from beartype._util.path.utilpathtest import die_unless_dir

    # High-level "Path" object encapsulating this dirname.
    package_dir = Path(package_dirname)

    # If this directory does *NOT* exist, raise an exception.
    die_unless_dir(package_dir)
    # Else, this directory exists.

    # For the "."-prefixed filetype of each type of platform-dependent bytecode
    # file generated by the current platform...
    #
    # Note that Python-specific glob syntax does *not* support disjunction
    # (i.e., alternation). In particular, POSIX-compliant glob disjunction
    # syntax "{match1,...,matchN}" is unsupported. If supported, that syntax
    # would enable this inefficient O(n) iteration to be trivially optimized
    # into a single O(1) call to the remove_paths_globbed() function.
    for BYTECODE_SUFFIX in BYTECODE_SUFFIXES:
        # Permanently and silently remove *ALL* bytecode files previously
        # compiled by Python into this "__pycache__/" subdirectory.
        remove_paths_globbed(
            dirname=package_dir,
            # Note that this filetype is already prefixed by ".". *sigh*
            glob=f'**/__pycache__/*{BYTECODE_SUFFIX}',
        )


#FIXME: Unit test us up, please.
def remove_paths_globbed(dirname: PathnameLike, glob: str) -> None:
    '''
    Permanently, silently, and possibly recursively remove *all* target files
    and empty directories from the source directory with the passed dirname
    matching the passed Python-specific glob expression.

    Note that Python-specific glob syntax is exactly that supported by the
    standard :mod:`fnmatch` module *plus* the recursive glob syntax ``"**/"``.
    Specifically, Python-specific glob syntax supports *only* the following
    small subset of POSIX-compliant glob syntax:

    * ``"*"`` matches everything.
    * ``"?"`` matches any single character.
    * ``"[seq]"`` matches any character in the substring ``"seq"``.
    * ``"[!seq]"`` matches any character not in the substring ``"seq"``.
    * ``"**/"`` matches *all* subdirectories recursively regardless of depth
      (e.g., ``"**/*.jpg"``, recursively removing all JPEG-formatted images from
      both this directory and all subdirectories of this directory).

    Caveats
    -------
    **This function silently ignores all non-empty directories matched by this
    glob expression.** Consider an alternate approach leveraging recursive
    directory tree traversal if requiring non-empty directory removal.

    **This function is subject to subtle race conditions if multiple threads
    and/or processes concurrently attempt to mutate this source directory.**
    Since *all* filesystem-centric logic suffers similar issues, we leave this
    issue as an exercise for the caller.

    **This function is currently inefficiently implemented in a single-threaded
    manner for simplicity.** This approach is appropriate when removing a small
    number of files but inappropriate when removing a large number of files. In
    the latter case, consider an alternate approach leveraging either
    multithreading or multiprocessing. See also this `popular article`_.

    .. _popular article:
        https://superfastpython.com/multithreaded-file-deletion

    Parameters
    ----------
    dirname : PathnameLike
        Dirname of the directory to remove *all* files and empty directories
        matching this glob from, specified as a **pathname-like** (i.e., either
        a low-level string possibly signifying a pathname *or* a high-level
        :class:`Path` instance definitely encapsulating a pathname).
    glob : str
        Python-specific glob expression matching *all* files and empty
        directories to be removed from this directory (e.g., ``"*.jpg"``).

    Raises
    ------
    _BeartypeUtilPathException
        If either:

        * This directory does *not* exist.
        * This directory exists but is *not* actually a directory.

    See Also
    ----------
    https://stackoverflow.com/a/38189275/2809027
        StackOverflow answer strongly inspiring this implementation.
    '''
    assert isinstance(dirname, PathnameLikeTuple), (
        f'{repr(dirname)} neither string nor "Path" object.')
    assert isinstance(glob, str), f'{repr(glob)} not string.'

    # Avoid circular import dependencies.
    from beartype._util.path.utilpathtest import die_unless_dir

    # High-level "Path" object encapsulating this dirname.
    dirname_path = Path(dirname)

    # If this directory does *NOT* exist, raise an exception.
    die_unless_dir(dirname_path)
    # Else, this directory exists.

    # For each matching pathname globbed from this dirname as a "Path" object...
    for pathname_globbed in dirname_path.glob(glob):
        # print(f'Removing globbed path "{pathname_globbed}"...')

        # If this pathname refers to a file...
        if pathname_globbed.is_file():
            #FIXME: Pass "missing_ok=True" *AFTER* dropping Python 3.7, as doing
            #so will improve the robustness of this logic against race
            #conditions.

            # Silently remove this file if feasible *OR* raise an exception.
            pathname_globbed.unlink()
        # Else, this pathname does *NOT* refer to a file.
        #
        # If this pathname refers to a (hopefully empty) subdirectory...
        elif pathname_globbed.is_dir():
            # Silently remove this empty subdirectory if feasible *OR* raise an
            # exception.
            pathname_globbed.rmdir()
        # Else, this pathname refers to neither a file *NOR* subdirectory. In
        # this case, silently ignore this pathname.