File: source_encoding.py

package info (click to toggle)

python-weberror 0.13.1%2Bdfsg-1

links: PTS, VCS
area: main
in suites: buster, stretch
size: 408 kB
ctags: 441
sloc: python: 2,774; makefile: 18

file content (52 lines) | stat: -rw-r--r-- 1,646 bytes

parent folder | download | duplicates (5)

"""Parse a Python source code encoding string"""
import codecs
import re

# Regexp to match python magic encoding line
PYTHON_MAGIC_COMMENT_re = re.compile(
    r'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', re.VERBOSE)
def parse_encoding(lines):
    """Deduce the encoding of a source file from magic comment.

    It does this in the same way as the `Python interpreter`__

    .. __: http://docs.python.org/ref/encodings.html

    The ``lines`` argument should be a list of the first 2 lines of the
    source code.

    (From Jeff Dairiki)
    """
    try:
        line1 = lines[0]
        has_bom = line1.startswith(codecs.BOM_UTF8)
        if has_bom:
            line1 = line1[len(codecs.BOM_UTF8):]

        m = PYTHON_MAGIC_COMMENT_re.match(line1)
        if not m:
            try:
                import parser
                parser.suite(line1)
            except (ImportError, SyntaxError):
                # Either it's a real syntax error, in which case the source is
                # not valid python source, or line2 is a continuation of line1,
                # in which case we don't want to scan line2 for a magic
                # comment.
                pass
            else:
                line2 = lines[1]
                m = PYTHON_MAGIC_COMMENT_re.match(line2)

        if has_bom:
            if m:
                raise SyntaxError(
                    "python refuses to compile code with both a UTF8 "
                    "byte-order-mark and a magic encoding comment")
            return 'utf_8'
        elif m:
            return m.group(1)
        else:
            return None
    except:
        return None