1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
#!/usr/bin/env python
"""Test that untokenize always generates the expected output.
That is, the output of
untokenize.untokenize(tokenize.generate_tokens(file_input.readline))
is exactly equal to the input file.
"""
from __future__ import print_function
from __future__ import unicode_literals
import io
import os
import sys
import tokenize
import untokenize
try:
unicode
except NameError:
unicode = str
def open_with_encoding(filename, encoding, mode='r'):
"""Return opened file with a specific encoding."""
return io.open(filename, mode=mode, encoding=encoding,
newline='') # Preserve line endings
def detect_encoding(filename):
"""Return file encoding."""
try:
with open(filename, 'rb') as input_file:
from fissix.pgen2 import tokenize as lib2to3_tokenize
encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
# Check for correctness of encoding.
with open_with_encoding(filename, encoding) as input_file:
input_file.read()
return encoding
except (SyntaxError, LookupError, UnicodeDecodeError):
return 'latin-1'
def diff(before, after):
"""Return diff of two files."""
import difflib
return ''.join(difflib.unified_diff(
before.splitlines(True),
after.splitlines(True),
lineterm='\n'))
def run(filename):
"""Check untokenize with file.
Return True on success.
"""
with open_with_encoding(filename,
encoding=detect_encoding(filename)) as input_file:
source_code = input_file.read()
# Handle files with trailing whitespace, but no final newline.
# tokenize.generate_tokens() will not report the trailing whitespace in
# such a case.
if source_code.endswith((' ', '\t')):
source_code = source_code.rstrip()
string_io = io.StringIO(source_code)
generated = untokenize.untokenize(
tokenize.generate_tokens(string_io.readline))
if source_code == generated:
return True
else:
print('untokenize failed on ' + filename,
file=sys.stderr)
print(diff(source_code, generated),
file=sys.stderr)
def process_args():
"""Return processed arguments (options and positional arguments)."""
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('files', nargs='*', help='files to format')
return parser.parse_args()
def check(args):
"""Run test recursively on directory of files.
Return False if the fix results in broken syntax.
"""
if args.files:
dir_paths = args.files
else:
dir_paths = [path for path in sys.path
if os.path.isdir(path)]
filenames = dir_paths
completed_filenames = set()
while filenames:
try:
name = os.path.realpath(filenames.pop(0))
if not os.path.exists(name):
# Invalid symlink.
continue
if name in completed_filenames:
print('---> Skipping previously tested ' + name,
file=sys.stderr)
continue
else:
completed_filenames.update(name)
if os.path.isdir(name):
for root, directories, children in os.walk(unicode(name)):
filenames += [os.path.join(root, f) for f in children
if f.endswith('.py') and
not f.startswith('.')]
directories[:] = [d for d in directories
if not d.startswith('.')]
else:
print('---> Testing with ' + name,
file=sys.stderr)
if not run(os.path.join(name)):
return False
except (IndentationError,
tokenize.TokenError,
UnicodeDecodeError,
UnicodeEncodeError) as exception:
print('---> Skipping bad file {0} ({1})'.format(name, exception),
file=sys.stderr)
continue
return True
def main():
"""Run main."""
return 0 if check(process_args()) else 1
if __name__ == '__main__':
try:
sys.exit(main())
except KeyboardInterrupt:
sys.exit(1)
|