File: markdown.py

package info (click to toggle)
python-readme-renderer 44.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 728 kB
  • sloc: python: 414; sh: 23; makefile: 6
file content (123 lines) | stat: -rw-r--r-- 3,595 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Copyright 2014 Donald Stufft
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import warnings
from typing import cast, Any, Dict, Callable, Match, Optional

from html import unescape

import pygments
import pygments.lexers
import pygments.formatters

from .clean import clean

_EXTRA_WARNING = (
    "Markdown renderers are not available. "
    "Install 'readme_renderer[md]' to enable Markdown rendering."
)

try:
    import cmarkgfm
    from cmarkgfm.cmark import Options as cmarkgfmOptions
    variants: Dict[str, Callable[[str], str]] = {
        "GFM": lambda raw: cast(str, cmarkgfm.github_flavored_markdown_to_html(
            raw, options=cmarkgfmOptions.CMARK_OPT_UNSAFE
        )),
        "CommonMark": lambda raw: cast(str, cmarkgfm.markdown_to_html(
            raw, options=cmarkgfmOptions.CMARK_OPT_UNSAFE
        )),
    }
except ImportError:
    warnings.warn(_EXTRA_WARNING)
    variants = {}

# Make code fences with `python` as the language default to highlighting as
# Python 3.
_LANG_ALIASES = {
    'python': 'python3',
}


def render(
    raw: str,
    variant: str = "GFM",
    **kwargs: Any
) -> Optional[str]:
    if not variants:
        warnings.warn(_EXTRA_WARNING)
        return None

    renderer = variants.get(variant)

    if not renderer:
        return None

    rendered = renderer(raw)

    if not rendered:
        return None

    highlighted = _highlight(rendered)
    cleaned = clean(highlighted)
    return cleaned


def _highlight(html: str) -> str:
    """Syntax-highlights HTML-rendered Markdown.

    Plucks sections to highlight that conform the the GitHub fenced code info
    string as defined at https://github.github.com/gfm/#info-string.

    Args:
        html (str): The rendered HTML.

    Returns:
        str: The HTML with Pygments syntax highlighting applied to all code
            blocks.
    """

    formatter = pygments.formatters.HtmlFormatter(nowrap=True)

    code_expr = re.compile(
        # cmarkgfm<0.6.0: <pre><code class="language-python">print('hello')</code></pre>
        # cmarkgfm>=0.6.0: <pre lang="python"><code>print('hello')</code></pre>
        r'(<pre>(?P<in_code><code) class="language-|<pre lang=")(?P<lang>[^"]+?)">'
        '(?(in_code)|<code>)(?P<code>.+?)'
        r'</code></pre>', re.DOTALL)

    def replacer(match: Match[Any]) -> str:
        try:
            lang = match.group('lang')
            lang = _LANG_ALIASES.get(lang, lang)
            lexer = pygments.lexers.get_lexer_by_name(lang)
        except ValueError:
            lexer = pygments.lexers.TextLexer()

        code = match.group('code')

        # Decode html entities in the code. cmark tries to be helpful and
        # translate '"' to '&quot;', but it confuses pygments. Pygments will
        # escape any html entities when re-writing the code, and we run
        # everything through bleach after.
        code = unescape(code)

        highlighted = pygments.highlight(code, lexer, formatter)

        return f'<pre lang="{lang}">{highlighted}</pre>'

    result = code_expr.sub(replacer, html)

    return result