File: google.py

package info (click to toggle)
python-docstring-to-markdown 0.15-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 228 kB
  • sloc: python: 1,761; makefile: 2
file content (171 lines) | stat: -rw-r--r-- 4,635 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import re
from textwrap import dedent
from typing import List

# All possible sections in Google style docstrings
SECTION_HEADERS: List[str] = [
    "Args",
    "Returns",
    "Raises",
    "Yields",
    "Example",
    "Examples",
    "Attributes",
    "Note",
    "Todo",
]

# These sections will not be parsed as a list of arguments/return values/etc
PLAIN_TEXT_SECTIONS: List[str] = [
    "Examples",
    "Example",
    "Note",
    "Todo",
]

ESCAPE_RULES = {
    # Avoid Markdown in magic methods or filenames like __init__.py
    r"__(?P<text>\S+)__": r"\_\_\g<text>\_\_",
}


class Section:
    def __init__(self, name: str, content: str) -> None:
        self.name = name
        self.content = ""

        self._parse(content)

    def _parse(self, content: str) -> None:
        content = content.rstrip("\n")

        if self.name in PLAIN_TEXT_SECTIONS:
            self.content = dedent(content)
            return

        parts = []
        cur_part = []

        for line in content.split("\n"):
            line = line.replace("    ", "", 1)

            if line.startswith(" "):
                # Continuation from a multiline description
                cur_part.append(line)
                continue

            if cur_part:
                # Leaving multiline description
                parts.append(cur_part)
                cur_part = [line]
            else:
                # Entering new description part
                cur_part.append(line)

        # Last part
        parts.append(cur_part)

        # Format section
        for part in parts:
            indentation = ""
            skip_first = False

            if ":" in part[0]:
                spl = part[0].split(":")

                arg = spl[0]
                description = ":".join(spl[1:]).lstrip()
                indentation = (len(arg) + 6) * " "

                if description:
                    self.content += "- `{}`: {}\n".format(arg, description)
                else:
                    skip_first = True
                    self.content += "- `{}`: ".format(arg)
            else:
                self.content += "- {}\n".format(part[0])

            for n, line in enumerate(part[1:]):
                if skip_first and n == 0:
                    # This ensures that indented args get moved to the
                    # previous line
                    self.content += "{}\n".format(line.lstrip())
                    continue

                self.content += "{}{}\n".format(indentation, line.lstrip())

        self.content = self.content.rstrip("\n")

    def as_markdown(self) -> str:
        return "#### {}\n\n{}\n\n".format(self.name, self.content)


class GoogleDocstring:
    def __init__(self, docstring: str) -> None:
        self.sections: List[Section] = []
        self.description: str = ""

        self._parse(docstring)

    def _parse(self, docstring: str) -> None:
        self.sections = []
        self.description = ""

        buf = ""
        cur_section = ""

        for line in docstring.split("\n"):
            if is_section(line):
                # Entering new section
                if cur_section:
                    # Leaving previous section, save it and reset buffer
                    self.sections.append(Section(cur_section, buf))
                    buf = ""

                # Remember currently parsed section
                cur_section = line.rstrip(":")
                continue

            # Parse section content
            if cur_section:
                buf += line + "\n"
            else:
                # Before setting cur_section, we're parsing the function description
                self.description += line + "\n"

        # Last section
        self.sections.append(Section(cur_section, buf))

    def as_markdown(self) -> str:
        text = self.description

        for section in self.sections:
            text += section.as_markdown()

        return text.rstrip("\n") + "\n"  # Only keep one last newline


def is_section(line: str) -> bool:
    for section in SECTION_HEADERS:
        if re.search(r"{}:".format(section), line):
            return True

    return False


def looks_like_google(value: str) -> bool:
    for section in SECTION_HEADERS:
        if re.search(r"{}:\n".format(section), value):
            return True

    return False


def google_to_markdown(text: str, extract_signature: bool = True) -> str:
    # Escape parts we don't want to render
    for pattern, replacement in ESCAPE_RULES.items():
        text = re.sub(pattern, replacement, text)

    docstring = GoogleDocstring(text)

    return docstring.as_markdown()