1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
|
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: BSD-2-Clause
# Copyright 2022 the gpsd project contributors
"""Scans files for tabs, non-ASCII runes, and trailing whitespace on lines.
.. moduleauthor:: James Browning <JamesB.fe80@GMail.com>
for a file containing a line:
edited. Symbols “$”, “*” “,” and “.” are not allowed
we get the image of:
=== filescan.py ===
10: edited. Symbols "$", "*" "," and "." are not allowed
EAT : " !!! !!! !!! !!! !!! !!! !!! !!! 44644
====================================================================================
This file it seems has some issues deliberately left in
as indicated by the result block, The resulting block
shown is sanitized to avoid Hall Of Mirrors type effects.
"""
import sys
encoding = "utf-8"
pstr = str
fopt = "rUb"
if str is not bytes:
import io
fopt = "rb"
def make_std_wrapper(h):
"""Wrap std{err,out} to permit unicode output."""
return io.TextIOWrapper(
h.buffer,
encoding=encoding,
newline="\n",
line_buffering=True,
)
def pstr(s):
"""Turn (byte(array)) c into a string."""
if not isinstance(s, (bytes, bytearray)):
return str(s)
return str(s, encoding=encoding)
sys.stdout = make_std_wrapper(sys.stdout)
sys.stderr = make_std_wrapper(sys.stderr)
def pchr(c):
"""Turn (number) c into a rune."""
if isinstance(c, int):
return chr(c)
return c
def pord(c):
"""Turn (rune) c into a number."""
if isinstance(c, str):
return ord(c)
return c
def mangled(strung, mods):
"""Return byte-array with incremented/decremented
values according to a template.
#. strung:: the input byte-array
#. mods:: a nested tuple-like with the mods
#. top level is the group of mods
#. level below that is the increment/decrement
value and the list of elements to modify by index
"""
try:
result = strung
for chain in mods:
for point in chain[1]:
result[point] += chain[0]
# result[point] = pchr(int( + mod))
except Exception as e:
_ = locals()
dicky = {}
for key in "e mods strung result chain point".split():
if key in _:
dicky[key] = _[key]
print(repr(dicky))
print(repr(e))
return result
def hunt_in_file(fname):
"""Try to open and scan a file for violations given a relative file path.
#. fname:: open()able argument to find the file, a filename.
the first part of the code dumps violating lines into the dumper[]
. Open the file then for each line.
. Sets up the empty scratch variable
. Look for things on the list and stick them in the scratch variable.
. Possibly tack the scratch variable onto dump.
the display code checks if the dumper variable isn't empty, then.
. Print the banner with the filename and short horizontal rules.
. Set up variables for printing.
. Slap together summary line
. Create storage for the offending line.
. Create/populate modification variable
for mangled to modify the storage.
. Print out the five-digit line number, ': ', and the given line.
. Prints out a line summarizing and indicating the column of the offense
. Prints out a long horizontal rule to separate anything that comes next
"""
dumper = []
long_line = 0
for lnum, line in enumerate(open(fname, fopt), start=1):
a_line = pstr(line)
scratch = [
lnum,
[],
a_line,
[len(a_line.rstrip()), len(a_line.rstrip("\n\r"))],
[],
]
for col, octet in enumerate(a_line):
if pord(octet) > 0x7E:
scratch[1].append(col)
elif pord(octet) == pord("\t"):
scratch[4].append(col)
if (
scratch[1] != []
or scratch[4] != []
or scratch[3][1] != scratch[3][0]
):
dumper.append(scratch)
long_line = max(scratch[3][1], long_line)
if dumper != []:
# print(repr(dumper))
sys.stdout.write("=== %s ===\n" % fname)
for line in dumper:
col, vec = 0, ""
strung = bytearray(" " * line[3][0], encoding=encoding)
strung += (
bytearray(
"4" * (1 + line[3][1] - line[3][0]),
encoding=encoding,
)
+ b" "
)
vec += "E" * (line[3][1] > line[3][0])
vec += "A" * (line[1] != [])
vec += "T" * (line[4] != [])
strung = mangled(strung, [[2, line[4]], [1, line[1]]])
sys.stdout.write("%7d:%s" % (line[0], line[2]))
sys.stdout.write("%-7s:%s\n" % (vec, pstr(strung)))
sys.stdout.write("%s\n\n" % ("=" * (long_line + 8)))
if __name__ == "__main__":
"""Scans files for tabs, non-ASCII runes, and trailing whitespace on lines.
. Unless given the --version -V --help or -h command-line arguments
. Reads filenames from the command-line arguments and then from
stdin if it is not a TTY, (probably the bottom of a pipe).
. If given -V or --version, prints out the wrong version number.
. If given -h or --help, prints text that doesn't help.
. If none of that happened drop back to the command-line silently.
"""
import argparse
parser = argparse.ArgumentParser(
prog="filescan",
description="Scans files for tabs, non-ASCII runes," +
" and trailing whitespace on lines.",
epilog="""* The name of the offending file, as given, with short horizontal rules before and after.
* A non-compliant line number, followed by a colon, and the text of the line.
* An offense summary string for the previous line, and offense pointers
* There are three recognized offenses at this point,
E - Trailing whitespace
A - Inclusion of non-ASCII octets
T - Inclusion of (horizontal) tabs
* There are 3 offense location indicators, that indicate the offset of can be below the offending portion of the line.
" - non-trailing horizontal tabs.
! - a non-ASCII octet.
4 - trailing spaces (0x20).
6 - trailing horizontal tabs.
Offense location indicators seem misplaced because horizontal tabs expand to the following multiple of 8, and non-ASCII octets contract.
* A longer horizontal rule to separate any additional files.
* Takes a list from stdin if piped in as well.""",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
#parser.add_argument(
#"--stdin",
#"-S",
#action="store_true",
#default=False,
#help="Grab list of files from stdin",
#)
parser.add_argument(
"--version", "-V", action="version", version="%(prog)s 0.5"
)
parser.add_argument(
"file_list", nargs="*", help="List of files to scan"
)
_ = parser.parse_args()
rings = [_.file_list] if "file_list" in _ else []
_ = sys.stdin
rings += [] if _.isatty() else [_]
if rings == []:
parser.parse_args(["--help"])
for ring in rings:
for key in ring:
hunt_in_file(key.strip())
|