File: docstring_detection.py

package info (click to toggle)
flake8-quotes 3.4.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 300 kB
  • sloc: python: 938; sh: 28; makefile: 2
file content (80 lines) | stat: -rw-r--r-- 3,776 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import tokenize

# I don't think this is a minimized state machine, but it's clearer this
# way. Namely, the class vs. function states can be merged

# In the start of the module when we're expecting possibly a string that gets marked as a docstring
STATE_EXPECT_MODULE_DOCSTRING = 0
# After seeing the class keyword, we're waiting for the block colon (and do bracket counting)
STATE_EXPECT_CLASS_COLON = 1
# After seeing the colon in a class definition we're expecting possibly a docstring
STATE_EXPECT_CLASS_DOCSTRING = 2
# Same as EXPECT_CLASS_COLON, but for function definitions
STATE_EXPECT_FUNCTION_COLON = 3
# Same as EXPECT_CLASS_DOCSTRING, but for function definitions
STATE_EXPECT_FUNCTION_DOCSTRING = 4
# Just skipping tokens until we observe a class or a def.
STATE_OTHER = 5

# These tokens don't matter here - they don't get in the way of docstrings
TOKENS_TO_IGNORE = [
    tokenize.NEWLINE,
    tokenize.INDENT,
    tokenize.DEDENT,
    tokenize.NL,
    tokenize.COMMENT,
]


def get_docstring_tokens(tokens):
    state = STATE_EXPECT_MODULE_DOCSTRING
    # The number of currently open parentheses, square brackets, etc.
    # This doesn't check if they're properly balanced, i.e. there isn't ([)], but we shouldn't
    # need to - if they aren't, it shouldn't parse at all, so we ignore the bracket type
    bracket_count = 0
    docstring_tokens = set()

    for token in tokens:
        if token.type in TOKENS_TO_IGNORE:
            continue
        if token.type == tokenize.STRING:
            if state in [STATE_EXPECT_MODULE_DOCSTRING, STATE_EXPECT_CLASS_DOCSTRING,
                         STATE_EXPECT_FUNCTION_DOCSTRING]:
                docstring_tokens.add(token)
                state = STATE_OTHER
        # A class means we'll expect the class token
        elif token.type == tokenize.NAME and token.string == 'class':
            state = STATE_EXPECT_CLASS_COLON
            # Just in case - they should be balanced normally
            bracket_count = 0
        # A def means we'll expect a colon after that
        elif token.type == tokenize.NAME and token.string == 'def':
            state = STATE_EXPECT_FUNCTION_COLON
            # Just in case - they should be balanced normally
            bracket_count = 0
        # If we get a colon and we're expecting it, move to the next state
        elif token.type == tokenize.OP and token.string == ':':
            # If there are still left brackets open, it must be something other than the block start
            if bracket_count == 0:
                if state == STATE_EXPECT_CLASS_COLON:
                    state = STATE_EXPECT_CLASS_DOCSTRING
                elif state == STATE_EXPECT_FUNCTION_COLON:
                    state = STATE_EXPECT_FUNCTION_DOCSTRING
        # Count opening and closing brackets in bracket_count
        elif token.type == tokenize.OP and token.string in ['(', '[', '{']:
            bracket_count += 1
            if state in [STATE_EXPECT_MODULE_DOCSTRING, STATE_EXPECT_CLASS_DOCSTRING,
                         STATE_EXPECT_FUNCTION_DOCSTRING]:
                state = STATE_OTHER
        elif token.type == tokenize.OP and token.string in [')', ']', '}']:
            bracket_count -= 1
            if state in [STATE_EXPECT_MODULE_DOCSTRING, STATE_EXPECT_CLASS_DOCSTRING,
                         STATE_EXPECT_FUNCTION_DOCSTRING]:
                state = STATE_OTHER
        # The token is not one of the recognized types. If we're expecting a colon, then all good,
        # but if we're expecting a docstring, it would no longer be a docstring
        elif state in [STATE_EXPECT_MODULE_DOCSTRING, STATE_EXPECT_CLASS_DOCSTRING,
                       STATE_EXPECT_FUNCTION_DOCSTRING]:
            state = STATE_OTHER

    return docstring_tokens