File: markup.py

package info (click to toggle)
zabbix-cli 3.6.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,980 kB
  • sloc: python: 19,920; makefile: 5
file content (154 lines) | stat: -rw-r--r-- 4,775 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from __future__ import annotations

import itertools
from dataclasses import dataclass
from functools import cmp_to_key

from rich.text import Text
from zabbix_cli.output.style import CodeBlockStyle
from zabbix_cli.output.style import CodeStyle

CODEBLOCK_STYLES = list(CodeBlockStyle)
CODE_STYLES = list(CodeStyle)
CODEBLOCK_LANGS = {
    "python": "py",
}


@dataclass
class MarkdownSpan:
    start: int
    end: int
    italic: bool = False
    bold: bool = False
    code: bool = False
    codeblock: bool = False
    language: str = ""

    def to_symbols(self) -> tuple[MarkdownSymbol, MarkdownSymbol]:
        start = MarkdownSymbol.from_span(self, end=False)
        end = MarkdownSymbol.from_span(self, end=True)
        return start, end


@dataclass
class MarkdownSymbol:
    position: int
    italic: bool = False
    bold: bool = False
    code: bool = False
    codeblock: bool = False
    end: bool = False
    language: str = ""

    @property
    def symbol(self) -> str:
        symbol: list[str] = []
        if self.codeblock:
            # Only insert language when opening codeblock
            lang = self.language if not self.end else ""
            symbol.append(f"```{lang}\n")
            # TODO: add support for language in fences (codeblock)
        else:
            if self.italic:
                symbol.append("*")
            if self.bold:
                symbol.append("**")
            if self.code:
                symbol.append("`")
        s = "".join(symbol)
        if self.end:
            s = f"{s[::-1]}"
        return s

    @classmethod
    def from_span(cls, span: MarkdownSpan, *, end: bool = False) -> MarkdownSymbol:
        return cls(
            position=span.end if end else span.start,
            italic=span.italic,
            bold=span.bold,
            code=span.code,
            codeblock=span.codeblock,
            end=end,
            language=span.language,
        )


# Easier than implementing rich comparison methods on MarkdownSymbol
def mdsymbol_cmp(a: MarkdownSymbol, b: MarkdownSymbol) -> int:
    if a.position < b.position:
        return -1
    elif a.position > b.position:
        return 1
    else:
        # code tags cannot have other tags inside them
        if a.code and not b.code:
            return 1
        if b.code and not a.code:
            return -1
    return 0


# TODO: rename `markup_to_markdown` to `markup_as_markdown`
# OR    rename `markup_to_plaintext` to `markup_as_plaintext`
#       I am partial to `x_to_y`.


def markup_to_markdown(s: str) -> str:
    """Parses a string that might contain markup formatting and converts it to Markdown.

    This is a very naive implementation that only supports a subset of Rich markup, but it's
    good enough for our purposes.
    """
    t = Text.from_markup(normalize_spaces(s))
    spans: list[MarkdownSpan] = []
    # Markdown has more limited styles than Rich markup, so we just
    # identify the ones we care about and ignore the rest.
    for span in t.spans:
        new_span = MarkdownSpan(span.start, span.end)
        styles = str(span.style).lower().split(" ")
        # Code (block) styles ignore other styles
        if any(s in CODEBLOCK_STYLES for s in styles):
            new_span.codeblock = True
            lang = next((s for s in styles if s in CODEBLOCK_LANGS), "")
            new_span.language = CODEBLOCK_LANGS.get(lang, "")
        elif any(s in CODE_STYLES for s in styles):
            new_span.code = True
        else:
            if "italic" in styles:
                new_span.italic = True
            if "bold" in styles:
                new_span.bold = True
        spans.append(new_span)

    # Convert MarkdownSpans to MarkdownSymbols
    # Each MarkdownSymbol represents a markdown formatting character along
    # with its position in the string.
    symbols = list(itertools.chain.from_iterable(sp.to_symbols() for sp in spans))
    symbols = sorted(symbols, key=cmp_to_key(mdsymbol_cmp))

    # List of characters that make up string
    plaintext = list(str(t.plain.strip()))  # remove leading and trailing whitespace
    offset = 0
    for symbol in symbols:
        plaintext.insert(symbol.position + offset, symbol.symbol)
        offset += 1

    return "".join(plaintext)


def normalize_spaces(s: str) -> str:
    """Normalizes spaces in a string while keeping newlines intact."""
    split = filter(None, s.split(" "))
    parts: list[str] = []
    for part in split:
        if part.endswith("\n"):
            parts.append(part)
        else:
            parts.append(f"{part} ")
    return "".join(parts)


def markup_as_plain_text(s: str) -> str:
    """Renders a string that might contain markup formatting as a plain text string."""
    return Text.from_markup(s).plain