File: fs.py

package info (click to toggle)
python-prance 25.4.8.0%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,140 kB
  • sloc: python: 3,381; makefile: 205
file content (327 lines) | stat: -rw-r--r-- 11,700 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
"""This submodule contains file system utilities for Prance."""

__author__ = "Jens Finkhaeuser"
__copyright__ = "Copyright (c) 2016-2019 Jens Finkhaeuser"
__license__ = "MIT"
__all__ = ()


# Re-define an error for backwards compatibility
FileNotFoundError = FileNotFoundError  # pragma: no cover


# The following constant and function are taken from
# https://stackoverflow.com/questions/9532499/check-whether-a-path-is-valid-in-python-without-creating-a-file-at-the-paths-ta

# Sadly, Python fails to provide the following magic number for us.
_ERROR_INVALID_NAME = 123
"""
Windows-specific error code indicating an invalid pathname.

See Also
----------
https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382%28v=vs.85%29.aspx
    Official listing of all such codes.
"""


# Following Microsoft documentation, set the default read size for detecting
# a file encoding to a multiple of 4k that seems to work well on various OSes
# and volume sizes.
# https://support.microsoft.com/en-us/help/140365/default-cluster-size-for-ntfs-fat-and-exfat
_READ_CHUNK_SIZE = 64 * 1024
"""
Default read size for detecting file encoding.
"""


def is_pathname_valid(pathname):
    """
    Test whether a path name is valid.

    :return: True if the passed pathname is valid on the current OS, False
        otherwise.
    :rtype: bool
    """
    import errno
    import os

    # If this pathname is either not a string or is but is empty, this pathname
    # is invalid.
    try:
        if not isinstance(pathname, str) or not pathname:
            return False

        # Strip this pathname's Windows-specific drive specifier (e.g., `C:\`)
        # if any. Since Windows prohibits path components from containing `:`
        # characters, failing to strip this `:`-suffixed prefix would
        # erroneously invalidate all valid absolute Windows pathnames.
        _, pathname = os.path.splitdrive(pathname)

        # Directory guaranteed to exist. If the current OS is Windows, this is
        # the drive to which Windows was installed (e.g., the "%SYSTEMDRIVE%"
        # environment variable); else, the typical root directory.
        # The %systemdrive% (typically c:) is the partition with
        # the %systemroot% (typically Windows) directory.
        import sys

        root_dirname = (
            os.environ.get("SYSTEMDRIVE", "C:")
            if sys.platform == "win32"
            else os.path.sep
        )
        assert os.path.isdir(root_dirname)  # ...Murphy and her ironclad Law

        # Append a path separator to this directory if needed.
        root_dirname = root_dirname.rstrip(os.path.sep) + os.path.sep

        # Test whether each path component split from this pathname is valid or
        # not, ignoring non-existent and non-readable path components.
        for pathname_part in pathname.split(os.path.sep):
            try:
                os.lstat(root_dirname + pathname_part)
            except OSError as exc:
                # If an OS-specific exception is raised, its error code
                # indicates whether this pathname is valid or not. Unless this
                # is the case, this exception implies an ignorable kernel or
                # filesystem complaint (e.g., path not found or inaccessible).
                #
                # Only the following exceptions indicate invalid pathnames:
                #
                # * Instances of the Windows-specific "WindowsError" class
                #   defining the "winerror" attribute whose value is
                #   "_ERROR_INVALID_NAME". Under Windows, "winerror" is more
                #   fine-grained and hence useful than the generic "errno"
                #   attribute. When a too-long pathname is passed, for example,
                #   "errno" is "ENOENT" (i.e., no such file or directory) rather
                #   than "ENAMETOOLONG" (i.e., file name too long).
                # * Instances of the cross-platform "OSError" class defining the
                #   generic "errno" attribute whose value is either:
                #   * Under most POSIX-compatible OSes, "ENAMETOOLONG".
                #   * Under some edge-case OSes (e.g., SunOS, *BSD), "ERANGE".
                if hasattr(exc, "winerror"):  # pragma: nocover
                    if exc.winerror == _ERROR_INVALID_NAME:
                        return False
                elif exc.errno in {errno.ENAMETOOLONG, errno.ERANGE}:
                    return False
    # If a "TypeError" exception was raised, it almost certainly has the
    # error message "embedded NUL character" indicating an invalid pathname.
    except TypeError:  # pragma: nocover
        return False
    # Null-bytes may also cause this, and they are invalid.
    except ValueError:
        return False
    # If no exception was raised, all path components and hence this
    # pathname itself are valid. (Praise be to the curmudgeonly python.)
    else:
        return True
    # If any other exception was raised, this is an unrelated fatal issue
    # (e.g., a bug). Permit this exception to unwind the call stack.
    #
    # Did we mention this should be shipped with Python already?


def from_posix(fname):
    """
    Convert a path from posix-like, to the platform format.

    :param str fname: The filename in posix-like format.
    :return: The filename in the format of the platform.
    :rtype: str
    """
    import sys

    if sys.platform == "win32":  # pragma: nocover
        if fname[0] == "/":
            fname = fname[1:]
        fname = fname.replace("/", "\\")
    return fname


def to_posix(fname):
    """
    Convert a path to posix-like format.

    :param str fname: The filename to convert to posix format.
    :return: The filename in posix-like format.
    :rtype: str
    """
    import sys

    if sys.platform == "win32":  # pragma: nocover
        import os.path

        if os.path.isabs(fname):
            fname = "/" + fname
        fname = fname.replace("\\", "/")
    return fname


def abspath(filename, relative_to=None):
    """
    Return the absolute path of a file relative to a reference file.

    If no reference file is given, this function works identical to
    `canonical_filename`.

    :param str filename: The filename to make absolute.
    :param str relative_to: [optional] the reference file name.
    :return: The absolute path
    :rtype: str
    """
    # Create filename relative to the reference, if it exists.
    import os.path

    fname = from_posix(filename)
    if relative_to and not os.path.isabs(fname):
        relative_to = from_posix(relative_to)
        if os.path.isdir(relative_to):
            fname = os.path.join(relative_to, fname)
        else:
            fname = os.path.join(os.path.dirname(relative_to), fname)

    # Make the result canonical
    fname = canonical_filename(fname)
    return to_posix(fname)


def canonical_filename(filename):
    """
    Return the canonical version of a file name.

    The canonical version is defined as the absolute path, and all file system
    links dereferenced.

    :param str filename: The filename to make canonical.
    :return: The canonical filename.
    :rtype: str
    """
    import os.path

    path = from_posix(filename)
    while True:
        path = os.path.abspath(path)
        try:
            p = os.path.dirname(path)
            # os.readlink doesn't exist in windows python2.7
            try:
                deref_path = os.readlink(path)
            except AttributeError:  # pragma: no cover
                return path
            path = os.path.join(p, deref_path)
        except OSError:
            return path


def detect_encoding(filename, default_to_utf8=True, **kwargs):
    """
    Detect the named file's character encoding.

    If the first parts of the file appear to be ASCII, this function returns
    'UTF-8', as that's a safe superset of ASCII. This can be switched off by
    changing the `default_to_utf8` parameter.

    :param str filename: The name of the file to detect the encoding of.
    :param bool default_to_utf8: Defaults to True. Set to False to disable
        treating ASCII files as UTF-8.
    :param bool read_all: Keyword argument; if True, reads the entire file
        for encoding detection.
    :return: The file encoding.
    :rtype: str
    """
    # Read some of the file
    import os.path

    filename = from_posix(filename)
    file_len = os.path.getsize(filename)
    read_len = min(_READ_CHUNK_SIZE, file_len)

    # ... unless we're supposed to!
    if kwargs.get("read_all", False):
        read_len = file_len

    # Read the first read_len bytes raw, so we can detect the encoding
    with open(filename, "rb") as raw_handle:
        raw = raw_handle.read(read_len)

    # Detect the encoding the file specifies, if any.
    import codecs

    if raw.startswith(codecs.BOM_UTF8):
        encoding = "utf-8-sig"
    else:
        # Detect encoding using the best detector available
        try:
            # First try ICU. ICU will report ASCII in the first 32 Bytes as
            # ISO-8859-1, which isn't exactly wrong, but maybe optimistic.
            import icu

            encoding = icu.CharsetDetector(raw).detect().getName().lower()
        except ImportError:  # pragma: nocover
            # If that doesn't work, try chardet - it's not got native components,
            # which is a bonus in some environments, but it's not as precise.
            import chardet

            encoding = chardet.detect(raw)["encoding"].lower()

            # Chardet is more brutal in that it reports ASCII if none of the first
            # Bytes contain high bits. To emulate ICU, we just bump up the detected
            # encoding.
            if encoding == "ascii":
                encoding = "iso-8859-1"

        # Both chardet and ICU may detect ISO-8859-x, which may not be possible
        # to decode as UTF-8. So whatever they report, we'll try decoding as
        # UTF-8 before reporting it.
        if default_to_utf8 and encoding in ("ascii", "iso-8859-1", "windows-1252"):
            # Try decoding as utf-8
            try:
                raw.decode("utf-8")
                # If this worked... well there's no guarantee it's utf-8, to be
                # honest.
                encoding = "utf-8"
            except UnicodeDecodeError:
                # Decoding as utf-8 failed, so we can't default to it.
                pass

    return encoding


def read_file(filename, encoding=None):
    """
    Read and decode a file, taking BOMs into account.

    :param str filename: The name of the file to read.
    :param str encoding: The encoding to use. If not given, detect_encoding is
        used to determine the encoding.
    :return: The file contents.
    :rtype: unicode string
    """
    filename = from_posix(filename)
    if not encoding:
        # Detect encoding
        encoding = detect_encoding(filename)

    # Finally, read the file in the detected encoding
    with open(filename, encoding=encoding) as handle:
        return handle.read()


def write_file(filename, contents, encoding=None):
    """
    Write a file with the given encoding.

    The default encoding is 'utf-8'. It's recommended not to change that for
    JSON or YAML output.

    :param str filename: The name of the file to read.
    :param str contents: The file contents to write.
    :param str encoding: The encoding to use. If not given, detect_encoding is
        used to determine the encoding.
    """
    if not encoding:
        encoding = "utf-8"

    fname = from_posix(filename)
    with open(fname, mode="w", encoding=encoding) as handle:
        handle.write(contents)