File: docstring_linter.py

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (168 lines) | stat: -rw-r--r-- 5,534 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
from __future__ import annotations

import sys
import token
from functools import cached_property
from pathlib import Path
from typing import Iterator, Sequence, TYPE_CHECKING


_PARENT = Path(__file__).parent.absolute()
_PATH = [Path(p).absolute() for p in sys.path]

if TYPE_CHECKING or _PARENT not in _PATH:
    from . import _linter
else:
    import _linter

if TYPE_CHECKING:
    from tokenize import TokenInfo


MAX_LINES = {"class": 100, "def": 80}

MIN_DOCSTRING = 16  # docstrings shorter than this are ignored
IGNORE_PROTECTED = True  # If True, ignore classes and files whose names start with _.

ERROR_FMT = "Every {type} with more than {length} lines needs a docstring"

DESCRIPTION = """`docstring_linter` reports on long functions, methods or classes
without docstrings"""

# How many top violations to report?
REPORT_TOP_RESULTS = 3


def _is_def(t: TokenInfo) -> bool:
    return t.type == token.NAME and t.string in ("class", "def")


class DocstringLinter(_linter.FileLinter):
    linter_name = "docstring_linter"
    description = DESCRIPTION
    is_fixer = False
    results: dict[str, list[tuple[int, Path, str]]]

    def __init__(self, argv: list[str] | None = None) -> None:
        super().__init__(argv)
        self.results = {}

        help = "Maximum number of lines for an undocumented class"
        self.parser.add_argument(
            "--max-class", "-c", default=MAX_LINES["class"], type=int, help=help
        )

        help = "Maximum number of lines for an undocumented function"
        self.parser.add_argument(
            "--max-def", "-d", default=MAX_LINES["def"], type=int, help=help
        )

        help = "Minimum number of characters for a docstring"
        self.parser.add_argument(
            "--min-docstring", "-m", default=MIN_DOCSTRING, type=int, help=help
        )

        help = "Lint functions, methods and classes that start with _"
        self.parser.add_argument(
            "--lint-protected", "-p", action="store_true", help=help
        )

    @cached_property
    def max_lines(self) -> dict[str, int]:
        return {"class": self.args.max_class, "def": self.args.max_def}

    def lint_all(self) -> bool:
        success = super().lint_all()
        if not self.args.lintrunner and self.results:
            self._report_results()
        return success

    def _lint(self, pf: _linter.PythonFile) -> Iterator[_linter.LintResult]:
        tokens = pf.tokens
        indents = indent_to_dedent(tokens)
        defs = [i for i, t in enumerate(tokens) if _is_def(t)]

        def next_token(start: int, token_type: int, error: str) -> int:  # type: ignore[return]
            for i in range(start, len(tokens)):
                if (t := tokens[i]).type == token_type:
                    return i
            _linter.ParseError.check(False, tokens[-1], error)

        for i in defs:
            name = next_token(i + 1, token.NAME, "Definition with no name")
            if not self.args.lint_protected and tokens[name].string.startswith("_"):
                continue

            indent = next_token(name + 1, token.INDENT, "Definition with no indent")
            dedent = indents[indent]

            lines = tokens[dedent].start[0] - tokens[indent].start[0]
            max_lines = self.max_lines[tokens[i].string]
            if lines <= max_lines:
                continue

            # Now search for a docstring
            docstring_len = -1
            for k in range(indent + 1, len(tokens)):
                tk = tokens[k]
                if tk.type == token.STRING:
                    docstring_len = len(tk.string)
                    break
                if tk.type not in _linter.EMPTY_TOKENS:
                    break

            if docstring_len >= self.args.min_docstring:
                continue

            # Now check if it's omitted
            if pf.omitted(pf.tokens[i:indent]):
                continue

            t = tokens[i]
            def_name = "function" if t.string == "def" else t.string
            tname = tokens[name].string
            msg = f"docstring found for {def_name} '{tname}' ({lines} lines)"
            if docstring_len < 0:
                msg = "No " + msg
            else:
                msg = msg + f" was too short ({docstring_len} characters)"
            yield _linter.LintResult(msg, *t.start)
            if pf.path is not None:
                self.results.setdefault(def_name, []).append((lines, pf.path, tname))

    def _report_results(self) -> None:
        print()
        for i, (k, v) in enumerate(sorted(self.results.items())):
            if i:
                print()
            top = sorted(v, reverse=True)[:REPORT_TOP_RESULTS]
            if len(top) == 1:
                s = "s"
                t = ""
            else:
                s = ""
                t = f"{len(top)} "
            print(f"Top {t}undocumented {k}s:")
            for lines, path, tname in top:
                print(f"    {lines} lines: {path}:{tname}")


def indent_to_dedent(tokens: Sequence[TokenInfo]) -> dict[int, int]:
    indent_to_dedent: dict[int, int] = {}
    stack: list[int] = []

    for i, t in enumerate(tokens):
        if t.type == token.INDENT:
            stack.append(i)
        elif t.type == token.DEDENT:
            assert stack
            indent_to_dedent[stack.pop()] = i

    assert not stack
    # Can't happen: the tokenization process would already have failed on a bad indent

    return indent_to_dedent


if __name__ == "__main__":
    DocstringLinter.run()