File: app_rewrite.py

package info (click to toggle)

pypy3 7.3.19%2Bdfsg-2

links: PTS, VCS
area: main
in suites: trixie
size: 212,236 kB
sloc: python: 2,098,316; ansic: 540,565; sh: 21,462; asm: 14,419; cpp: 4,451; makefile: 4,209; objc: 761; xml: 530; exp: 499; javascript: 314; pascal: 244; lisp: 45; csh: 12; awk: 4

file content (39 lines) | stat: -rw-r--r-- 1,703 bytes

parent folder | download | duplicates (5)

import re

ASCII_IS_DEFAULT_ENCODING = False

cookie_re = re.compile(r"^[ \t\f]*#.*coding[:=][ \t]*[-\w.]+")
BOM_UTF8 = '\xef\xbb\xbf'

def _prepare_source(fn):
    """Read the source code for re-writing."""
    try:
        stat = fn.stat()
        source = fn.read("rb")
    except EnvironmentError:
        return None, None
    if ASCII_IS_DEFAULT_ENCODING:
        # ASCII is the default encoding in Python 2. Without a coding
        # declaration, Python 2 will complain about any bytes in the file
        # outside the ASCII range. Sadly, this behavior does not extend to
        # compile() or ast.parse(), which prefer to interpret the bytes as
        # latin-1. (At least they properly handle explicit coding cookies.) To
        # preserve this error behavior, we could force ast.parse() to use ASCII
        # as the encoding by inserting a coding cookie. Unfortunately, that
        # messes up line numbers. Thus, we have to check ourselves if anything
        # is outside the ASCII range in the case no encoding is explicitly
        # declared. For more context, see issue #269. Yay for Python 3 which
        # gets this right.
        end1 = source.find("\n")
        end2 = source.find("\n", end1 + 1)
        if (not source.startswith(BOM_UTF8) and
            cookie_re.match(source[0:end1]) is None and
            cookie_re.match(source[end1 + 1:end2]) is None):
            try:
                source.decode("ascii")
            except UnicodeDecodeError:
                # Let it fail in real import.
                return None, None
    # On Python versions which are not 2.7 and less than or equal to 3.1, the
    # parser expects *nix newlines.
    return stat, source