File: numpydoc.py

package info (click to toggle)
docstring-parser 0.16-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 316 kB
sloc: python: 3,386; makefile: 5
file content (532 lines) | stat: -rw-r--r-- 15,983 bytes
"""Numpydoc-style docstring parsing.

:see: https://numpydoc.readthedocs.io/en/latest/format.html
"""

import inspect
import itertools
import re
import typing as T
from textwrap import dedent

from .common import (
    Docstring,
    DocstringDeprecated,
    DocstringExample,
    DocstringMeta,
    DocstringParam,
    DocstringRaises,
    DocstringReturns,
    DocstringStyle,
    RenderingStyle,
)


def _pairwise(iterable: T.Iterable, end=None) -> T.Iterable:
    left, right = itertools.tee(iterable)
    next(right, None)
    return itertools.zip_longest(left, right, fillvalue=end)


def _clean_str(string: str) -> T.Optional[str]:
    string = string.strip()
    if len(string) > 0:
        return string
    return None


KV_REGEX = re.compile(r"^[^\s].*$", flags=re.M)
PARAM_KEY_REGEX = re.compile(r"^(?P<name>.*?)(?:\s*:\s*(?P<type>.*?))?$")
PARAM_OPTIONAL_REGEX = re.compile(r"(?P<type>.*?)(?:, optional|\(optional\))$")

# numpydoc format has no formal grammar for this,
# but we can make some educated guesses...
PARAM_DEFAULT_REGEX = re.compile(
    r"(?<!\S)[Dd]efault(?: is | = |: |s to |)\s*(?P<value>[\w\-\.]*\w)"
)

RETURN_KEY_REGEX = re.compile(r"^(?:(?P<name>.*?)\s*:\s*)?(?P<type>.*?)$")


class Section:
    """Numpydoc section parser.

    :param title: section title. For most sections, this is a heading like
                  "Parameters" which appears on its own line, underlined by
                  en-dashes ('-') on the following line.
    :param key: meta key string. In the parsed ``DocstringMeta`` instance this
                will be the first element of the ``args`` attribute list.
    """

    def __init__(self, title: str, key: str) -> None:
        self.title = title
        self.key = key

    @property
    def title_pattern(self) -> str:
        """Regular expression pattern matching this section's header.

        This pattern will match this instance's ``title`` attribute in
        an anonymous group.
        """
        dashes = "-" * len(self.title)
        return rf"^({self.title})\s*?\n{dashes}\s*$"

    def parse(self, text: str) -> T.Iterable[DocstringMeta]:
        """Parse ``DocstringMeta`` objects from the body of this section.

        :param text: section body text. Should be cleaned with
                     ``inspect.cleandoc`` before parsing.
        """
        yield DocstringMeta([self.key], description=_clean_str(text))


class _KVSection(Section):
    """Base parser for numpydoc sections with key-value syntax.

    E.g. sections that look like this:
        key
            value
        key2 : type
            values can also span...
            ... multiple lines
    """

    def _parse_item(self, key: str, value: str) -> DocstringMeta:
        pass

    def parse(self, text: str) -> T.Iterable[DocstringMeta]:
        for match, next_match in _pairwise(KV_REGEX.finditer(text)):
            start = match.end()
            end = next_match.start() if next_match is not None else None
            value = text[start:end]
            yield self._parse_item(
                key=match.group(), value=inspect.cleandoc(value)
            )


class _SphinxSection(Section):
    """Base parser for numpydoc sections with sphinx-style syntax.

    E.g. sections that look like this:
        .. title:: something
            possibly over multiple lines
    """

    @property
    def title_pattern(self) -> str:
        return rf"^\.\.\s*({self.title})\s*::"


class ParamSection(_KVSection):
    """Parser for numpydoc parameter sections.

    E.g. any section that looks like this:
        arg_name
            arg_description
        arg_2 : type, optional
            descriptions can also span...
            ... multiple lines
    """

    def _parse_item(self, key: str, value: str) -> DocstringParam:
        match = PARAM_KEY_REGEX.match(key)
        arg_name = type_name = is_optional = None
        if match is not None:
            arg_name = match.group("name")
            type_name = match.group("type")
            if type_name is not None:
                optional_match = PARAM_OPTIONAL_REGEX.match(type_name)
                if optional_match is not None:
                    type_name = optional_match.group("type")
                    is_optional = True
                else:
                    is_optional = False

        default = None
        if len(value) > 0:
            default_match = PARAM_DEFAULT_REGEX.search(value)
            if default_match is not None:
                default = default_match.group("value")

        return DocstringParam(
            args=[self.key, arg_name],
            description=_clean_str(value),
            arg_name=arg_name,
            type_name=type_name,
            is_optional=is_optional,
            default=default,
        )


class RaisesSection(_KVSection):
    """Parser for numpydoc raises sections.

    E.g. any section that looks like this:
        ValueError
            A description of what might raise ValueError
    """

    def _parse_item(self, key: str, value: str) -> DocstringRaises:
        return DocstringRaises(
            args=[self.key, key],
            description=_clean_str(value),
            type_name=key if len(key) > 0 else None,
        )


class ReturnsSection(_KVSection):
    """Parser for numpydoc returns sections.

    E.g. any section that looks like this:
        return_name : type
            A description of this returned value
        another_type
            Return names are optional, types are required
    """

    is_generator = False

    def _parse_item(self, key: str, value: str) -> DocstringReturns:
        match = RETURN_KEY_REGEX.match(key)
        if match is not None:
            return_name = match.group("name")
            type_name = match.group("type")
        else:
            return_name = None
            type_name = None

        return DocstringReturns(
            args=[self.key],
            description=_clean_str(value),
            type_name=type_name,
            is_generator=self.is_generator,
            return_name=return_name,
        )


class YieldsSection(ReturnsSection):
    """Parser for numpydoc generator "yields" sections."""

    is_generator = True


class DeprecationSection(_SphinxSection):
    """Parser for numpydoc "deprecation warning" sections."""

    def parse(self, text: str) -> T.Iterable[DocstringDeprecated]:
        version, desc, *_ = text.split(sep="\n", maxsplit=1) + [None, None]

        if desc is not None:
            desc = _clean_str(inspect.cleandoc(desc))

        yield DocstringDeprecated(
            args=[self.key], description=desc, version=_clean_str(version)
        )


class ExamplesSection(Section):
    """Parser for numpydoc examples sections.

    E.g. any section that looks like this:
        >>> import numpy.matlib
        >>> np.matlib.empty((2, 2))    # filled with random data
        matrix([[  6.76425276e-320,   9.79033856e-307], # random
                [  7.39337286e-309,   3.22135945e-309]])
        >>> np.matlib.empty((2, 2), dtype=int)
        matrix([[ 6600475,        0], # random
                [ 6586976, 22740995]])
    """

    def parse(self, text: str) -> T.Iterable[DocstringMeta]:
        """Parse ``DocstringExample`` objects from the body of this section.

        :param text: section body text. Should be cleaned with
                     ``inspect.cleandoc`` before parsing.
        """
        lines = dedent(text).strip().splitlines()
        while lines:
            snippet_lines = []
            description_lines = []
            while lines:
                if not lines[0].startswith(">>>"):
                    break
                snippet_lines.append(lines.pop(0))
            while lines:
                if lines[0].startswith(">>>"):
                    break
                description_lines.append(lines.pop(0))
            yield DocstringExample(
                [self.key],
                snippet="\n".join(snippet_lines) if snippet_lines else None,
                description="\n".join(description_lines),
            )


DEFAULT_SECTIONS = [
    ParamSection("Parameters", "param"),
    ParamSection("Params", "param"),
    ParamSection("Arguments", "param"),
    ParamSection("Args", "param"),
    ParamSection("Other Parameters", "other_param"),
    ParamSection("Other Params", "other_param"),
    ParamSection("Other Arguments", "other_param"),
    ParamSection("Other Args", "other_param"),
    ParamSection("Receives", "receives"),
    ParamSection("Receive", "receives"),
    RaisesSection("Raises", "raises"),
    RaisesSection("Raise", "raises"),
    RaisesSection("Warns", "warns"),
    RaisesSection("Warn", "warns"),
    ParamSection("Attributes", "attribute"),
    ParamSection("Attribute", "attribute"),
    ReturnsSection("Returns", "returns"),
    ReturnsSection("Return", "returns"),
    YieldsSection("Yields", "yields"),
    YieldsSection("Yield", "yields"),
    ExamplesSection("Examples", "examples"),
    ExamplesSection("Example", "examples"),
    Section("Warnings", "warnings"),
    Section("Warning", "warnings"),
    Section("See Also", "see_also"),
    Section("Related", "see_also"),
    Section("Notes", "notes"),
    Section("Note", "notes"),
    Section("References", "references"),
    Section("Reference", "references"),
    DeprecationSection("deprecated", "deprecation"),
]


class NumpydocParser:
    """Parser for numpydoc-style docstrings."""

    def __init__(self, sections: T.Optional[T.Dict[str, Section]] = None):
        """Setup sections.

        :param sections: Recognized sections or None to defaults.
        """
        sections = sections or DEFAULT_SECTIONS
        self.sections = {s.title: s for s in sections}
        self._setup()

    def _setup(self):
        self.titles_re = re.compile(
            r"|".join(s.title_pattern for s in self.sections.values()),
            flags=re.M,
        )

    def add_section(self, section: Section):
        """Add or replace a section.

        :param section: The new section.
        """

        self.sections[section.title] = section
        self._setup()

    def parse(self, text: str) -> Docstring:
        """Parse the numpy-style docstring into its components.

        :returns: parsed docstring
        """
        ret = Docstring(style=DocstringStyle.NUMPYDOC)
        if not text:
            return ret

        # Clean according to PEP-0257
        text = inspect.cleandoc(text)

        # Find first title and split on its position
        match = self.titles_re.search(text)
        if match:
            desc_chunk = text[: match.start()]
            meta_chunk = text[match.start() :]
        else:
            desc_chunk = text
            meta_chunk = ""

        # Break description into short and long parts
        parts = desc_chunk.split("\n", 1)
        ret.short_description = parts[0] or None
        if len(parts) > 1:
            long_desc_chunk = parts[1] or ""
            ret.blank_after_short_description = long_desc_chunk.startswith(
                "\n"
            )
            ret.blank_after_long_description = long_desc_chunk.endswith("\n\n")
            ret.long_description = long_desc_chunk.strip() or None

        for match, nextmatch in _pairwise(self.titles_re.finditer(meta_chunk)):
            title = next(g for g in match.groups() if g is not None)
            factory = self.sections[title]

            # section chunk starts after the header,
            # ends at the start of the next header
            start = match.end()
            end = nextmatch.start() if nextmatch is not None else None
            ret.meta.extend(factory.parse(meta_chunk[start:end]))

        return ret


def parse(text: str) -> Docstring:
    """Parse the numpy-style docstring into its components.

    :returns: parsed docstring
    """
    return NumpydocParser().parse(text)


def compose(
    # pylint: disable=W0613
    docstring: Docstring,
    rendering_style: RenderingStyle = RenderingStyle.COMPACT,
    indent: str = "    ",
) -> str:
    """Render a parsed docstring into docstring text.

    :param docstring: parsed docstring representation
    :param rendering_style: the style to render docstrings
    :param indent: the characters used as indentation in the docstring string
    :returns: docstring text
    """

    def process_one(
        one: T.Union[DocstringParam, DocstringReturns, DocstringRaises]
    ):
        if isinstance(one, DocstringParam):
            head = one.arg_name
        elif isinstance(one, DocstringReturns):
            head = one.return_name
        else:
            head = None

        if one.type_name and head:
            head += f" : {one.type_name}"
        elif one.type_name:
            head = one.type_name
        elif not head:
            head = ""

        if isinstance(one, DocstringParam) and one.is_optional:
            head += ", optional"

        if one.description:
            body = f"\n{indent}".join([head] + one.description.splitlines())
            parts.append(body)
        else:
            parts.append(head)

    def process_sect(name: str, args: T.List[T.Any]):
        if args:
            parts.append("")
            parts.append(name)
            parts.append("-" * len(parts[-1]))
            for arg in args:
                process_one(arg)

    parts: T.List[str] = []
    if docstring.short_description:
        parts.append(docstring.short_description)
    if docstring.blank_after_short_description:
        parts.append("")

    if docstring.deprecation:
        first = ".. deprecated::"
        if docstring.deprecation.version:
            first += f" {docstring.deprecation.version}"
        if docstring.deprecation.description:
            rest = docstring.deprecation.description.splitlines()
        else:
            rest = []
        sep = f"\n{indent}"
        parts.append(sep.join([first] + rest))

    if docstring.long_description:
        parts.append(docstring.long_description)
    if docstring.blank_after_long_description:
        parts.append("")

    process_sect(
        "Parameters",
        [item for item in docstring.params or [] if item.args[0] == "param"],
    )

    process_sect(
        "Attributes",
        [
            item
            for item in docstring.params or []
            if item.args[0] == "attribute"
        ],
    )

    process_sect(
        "Returns",
        [
            item
            for item in docstring.many_returns or []
            if not item.is_generator
        ],
    )

    process_sect(
        "Yields",
        [item for item in docstring.many_returns or [] if item.is_generator],
    )

    if docstring.returns and not docstring.many_returns:
        ret = docstring.returns
        parts.append("Yields" if ret else "Returns")
        parts.append("-" * len(parts[-1]))
        process_one(ret)

    process_sect(
        "Receives",
        [
            item
            for item in docstring.params or []
            if item.args[0] == "receives"
        ],
    )

    process_sect(
        "Other Parameters",
        [
            item
            for item in docstring.params or []
            if item.args[0] == "other_param"
        ],
    )

    process_sect(
        "Raises",
        [item for item in docstring.raises or [] if item.args[0] == "raises"],
    )

    process_sect(
        "Warns",
        [item for item in docstring.raises or [] if item.args[0] == "warns"],
    )

    for meta in docstring.meta:
        if isinstance(
            meta,
            (
                DocstringDeprecated,
                DocstringParam,
                DocstringReturns,
                DocstringRaises,
            ),
        ):
            continue  # Already handled

        parts.append("")
        parts.append(meta.args[0].replace("_", "").title())
        parts.append("-" * len(meta.args[0]))

        if meta.description:
            parts.append(meta.description)

    return "\n".join(parts)