File: nudatus.py

package info (click to toggle)
python-nudatus 0.0.5-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 160 kB
  • sloc: python: 289; makefile: 50; sh: 12
file content (157 lines) | stat: -rw-r--r-- 4,634 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# -*- coding: utf-8 -*-
"""
nudatus strips comments from Python scripts
https://pypi.org/project/nudatus/

© 2018-2021 Zander Brown

See the LICENSE file for more information, or visit:
https://opensource.org/licenses/MIT
"""

import argparse
import sys
import token
import tokenize
from io import BytesIO
from tokenize import tokenize as tokenizer
from typing import List, Optional

_VERSION = (
    0,
    0,
    5,
)


def get_version() -> str:
    return ".".join([str(i) for i in _VERSION])


def mangle(text: str) -> str:
    """
    Takes a script and mangles it

    TokenError is thrown when encountering bad syntax
    """

    text_bytes = text.encode("utf-8")

    # Wrap the input script as a byte stream
    buff = BytesIO(text_bytes)
    # Byte stream for the mangled script
    mangled = BytesIO()

    last_tok = token.INDENT
    last_line = -1
    last_col = 0
    last_line_text = ""
    open_list_dicts = 0

    # Build tokens from the script
    tokens = tokenizer(buff.readline)
    for t, text, (line_s, col_s), (line_e, col_e), line in tokens:
        # If this is a new line (except the very first)
        if line_s > last_line and last_line != -1:
            # Reset the column
            last_col = 0
            # If the last line ended in a '\' (continuation)
            if last_line_text.rstrip()[-1:] == "\\":
                # Recreate it
                mangled.write(b" \\\n")

        # We don't want to be calling the this multiple times
        striped = text.strip()

        # Tokens or characters for opening or closing a list/dict
        list_dict_open = [token.LSQB, token.LBRACE, "[", "{"]
        list_dict_close = [token.RSQB, token.RBRACE, "]", "}"]

        # If this is a list or dict
        if t in list_dict_open or striped in list_dict_open:
            # Increase the dict / list level
            open_list_dicts += 1
        elif t in list_dict_close or striped in list_dict_close:
            # Decrease the dict / list level
            open_list_dicts -= 1

        # Remove docstrings
        # Docstrings are strings not used in an expression,
        # unfortunatly it isn't as simple as "t is string and t
        # not in expression"
        if t == token.STRING and (
            last_tok == token.INDENT
            or (
                (
                    last_tok == token.NEWLINE
                    or last_tok == tokenize.NL
                    or last_tok == token.DEDENT
                    or last_tok == tokenize.ENCODING
                )
                and open_list_dicts == 0
            )
        ):
            # Output number of lines corresponding those in
            # the docstring comment
            mangled.write(b"\n" * (len(text.split("\n")) - 1))
        # Or is it a standard comment
        elif t == tokenize.COMMENT:
            # Plain comment, just don't write it
            pass
        else:
            # Recreate indentation, ideally we should use tabs
            if col_s > last_col:
                mangled.write(b" " * (col_s - last_col))
            # On Python 3 the first token specifies the encoding
            # but we already know it's utf-8 and writing it just
            # gives us an invalid script
            if t != tokenize.ENCODING:
                mangled.write(text.encode("utf-8"))

        # Store the previous state
        last_tok = t
        last_col = col_e
        last_line = line_e
        last_line_text = line

    # Return a string
    return mangled.getvalue().decode("utf-8")


def main(argv: Optional[List[str]] = None) -> None:
    """
    Command line entry point
    """
    if not argv:
        argv = sys.argv[1:]

    parser = argparse.ArgumentParser(
        description="Strip comments from a Python script.",
        epilog="nūdātus “strip, make naked”",
    )
    parser.add_argument("input", nargs="?", default=None)
    parser.add_argument("output", nargs="?", default=None)
    parser.add_argument(
        "--version", action="version", version="%(prog)s " + get_version()
    )
    args = parser.parse_args(argv)

    if not args.input:
        print("No file specified", file=sys.stderr)
        sys.exit(1)

    try:
        with open(args.input, "r") as f:
            res = mangle(f.read())
            if not args.output:
                print(res, end="")
            else:
                with open(args.output, "w") as o:
                    o.write(res)
    except Exception as ex:
        print("Error mangling {}: {!s}".format(args.input, ex), file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":  # pragma: no cover
    main(sys.argv[1:])