File: __init__.py | Debian Sources

File: init.py

package info (click to toggle)
pymdown-extensions 10.13-3
links: PTS, VCS
area: main
in suites: forky, sid
size: 7,104 kB
sloc: python: 60,117; javascript: 846; sh: 8; makefile: 5
file content (514 lines) | stat: -rw-r--r-- 16,540 bytes
parent folder | download | duplicates (2)
"""Generic blocks extension."""
from markdown import Extension
from markdown.blockprocessors import BlockProcessor
from markdown.treeprocessors import Treeprocessor
from markdown import util as mutil
from .. import util
import xml.etree.ElementTree as etree
import re
import yaml
import textwrap

# Fenced block placeholder for SuperFences
FENCED_BLOCK_RE = re.compile(
    r'^([\> ]*){}({}){}$'.format(
        mutil.HTML_PLACEHOLDER[0],
        mutil.HTML_PLACEHOLDER[1:-1] % r'([0-9]+)',
        mutil.HTML_PLACEHOLDER[-1]
    )
)

# Block start/end
RE_START = re.compile(
    r'(?:^|\n)[ ]{0,3}(/{3,})[ ]*([\w-]+)[ ]*(?:\|[ ]*(.*?)[ ]*)?(?:\n|$)'
)

RE_END = re.compile(
    r'(?m)(?:^|\n)[ ]{0,3}(/{3,})[ ]*(?:\n|$)'
)

# Frontmatter patterns
RE_YAML_START = re.compile(r'(?m)^[ ]{0,3}(-{3})[ ]*(?:\n|$)')

RE_YAML_END = re.compile(
    r'(?m)^[ ]{0,3}(-{3})[ ]*(?:\n|$)'
)

RE_INDENT_YAML_LINE = re.compile(r'(?m)^(?:[ ]{4,}(?!\s).*?(?:\n|$))+')


class BlockEntry:
    """Track Block entries."""

    def __init__(self, block, el, parent):
        """Block entry."""

        self.block = block
        self.el = el
        self.parent = parent
        self.hungry = False


def get_frontmatter(string):
    """
    Get frontmatter from string.

    YAML-ish key value pairs.
    """

    frontmatter = None

    try:
        frontmatter = yaml.safe_load(string)
        if frontmatter is None:
            frontmatter = {}
        if not isinstance(frontmatter, dict):
            frontmatter = None
    except Exception:
        pass

    return frontmatter


def reindent(text, pos, level):
    """Reindent the code to where it is supposed to be."""

    indented = []
    for line in text.split('\n'):
        index = pos - level
        indented.append(line[index:])
    return indented


def unescape_markdown(md, blocks, is_raw):
    """Look for SuperFences code placeholders and other HTML stash placeholders and revert them back to plain text."""

    superfences = None
    try:
        from ..superfences import SuperFencesBlockPreprocessor
        processor = md.preprocessors['fenced_code_block']
        if isinstance(processor, SuperFencesBlockPreprocessor):
            superfences = processor.extension
    except Exception:
        pass

    new_blocks = []
    for block in blocks:
        new_lines = []
        for line in block.split('\n'):
            m = FENCED_BLOCK_RE.match(line)
            if m:
                key = m.group(2)

                # Extract SuperFences content
                indent_level = len(m.group(1))
                original = None
                if superfences is not None:
                    original, pos = superfences.stash.get(key, (None, None))
                    if original is not None:
                        code = reindent(original, pos, indent_level)
                        new_lines.extend(code)
                        superfences.stash.remove(key)

                # Extract other HTML stashed content
                if original is None and is_raw:
                    index = int(key.split(':')[1])
                    if index < len(md.htmlStash.rawHtmlBlocks):
                        original = md.htmlStash.rawHtmlBlocks[index]
                        if isinstance(original, etree.Element):
                            original = etree.tostring(original, encoding='unicode', method='html')
                        new_lines.append(original)

                # Couldn't find anything to extract
                if original is None:  # pragma: no cover
                    new_lines.append(line)
            else:
                new_lines.append(line)
        new_blocks.append('\n'.join(new_lines))

    return new_blocks


class BlocksTreeprocessor(Treeprocessor):
    """Blocks tree processor."""

    def __init__(self, md, blocks):
        """Initialize."""

        super().__init__(md)

        self.blocks = blocks

    def run(self, doc):
        """Update tab IDs."""

        while self.blocks.inline_stack:
            entry = self.blocks.inline_stack.pop(0)
            entry.block.on_inline_end(entry.el)


class BlocksProcessor(BlockProcessor):
    """Generic block processor."""

    def __init__(self, parser, md):
        """Initialization."""

        self.md = md

        # The Block classes indexable by name
        self.blocks = {}
        self.config = {}
        self.empty_tags = {'hr',}
        self.block_level_tags = set(md.block_level_elements.copy())
        self.block_level_tags.add('html')

        # Block-level tags in which the content only gets span level parsing
        self.span_tags = {
            'address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'summary', 'td', 'th'
        }
        # Block-level tags which never get their content parsed.
        self.raw_tags = {'canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea', 'code'}
        # Block-level tags in which the content gets parsed as blocks
        self.block_tags = set(self.block_level_tags) - (self.span_tags | self.raw_tags | self.empty_tags)
        self.span_and_blocks_tags = self.block_tags | self.span_tags

        super().__init__(parser)

        # Persistent storage across a document for blocks
        self.trackers = {}
        # Currently queued up blocks
        self.stack = []
        # Blocks that should be processed after inline.
        self.inline_stack = []
        # When set, the assigned block is actively parsing blocks.
        self.working = None
        # Cached the found parent when testing
        # so we can quickly retrieve it when running
        self.cached_parent = None
        self.cached_block = None

        # Used during the alpha/beta stage
        self.start = RE_START
        self.end = RE_END
        self.yaml_line = RE_INDENT_YAML_LINE

    def register(self, b, config):
        """Register a block."""

        if b.NAME in self.blocks:
            raise ValueError(f'The block name {b.NAME} is already registered!')
        self.blocks[b.NAME] = b
        self.config[b.NAME] = config

    def test(self, parent, block):
        """Test to see if we should process the block."""

        # Are we hungry for more?
        if self.get_parent(parent) is not None:
            return True

        # Is this the start of a new block?
        m = self.start.search(block)
        if m:

            pre_text = block[:m.start()] if m.start() > 0 else None

            # Create a block object
            name = m.group(2).lower()
            if name in self.blocks:
                generic_block = self.blocks[name](len(m.group(1)), self.trackers[name], self, self.config[name])

                # Remove first line
                block = block[m.end():]

                # Get frontmatter and argument(s)
                options, the_rest = self.split_header(block, generic_block.length)
                arguments = m.group(3)

                # Options must be valid
                status = options is not None

                # Update the config for the Block
                if status:
                    status = generic_block._validate(parent, arguments, **options)

                # Cache the found Block and any remaining content
                if status:
                    self.cached_block = (generic_block, the_rest)

                    # Any text before the block should get handled
                    if pre_text is not None:
                        self.parser.parseBlocks(parent, [pre_text])

                return status
        return False

    def _reset(self):
        """Reset."""

        self.stack.clear()
        self.inline_stack.clear()
        self.working = None
        self.trackers = {d: {} for d in self.blocks.keys()}

    def split_end(self, blocks, length):
        """Search for end and split the blocks while removing the end."""

        good = []
        bad = []
        end = False

        # Split on our end notation for the current Block
        for e, block in enumerate(blocks):

            # Find the end of the Block
            m = None
            for match in self.end.finditer(block):
                if len(match.group(1)) == length:
                    m = match
                    break

            # Separate everything from before the "end" and after
            if m:
                temp = block[:m.start(0)]
                if temp:
                    good.append(temp[:-1] if temp.endswith('\n') else temp)
                end = True

                # Since we found our end, everything after is unwanted
                temp = block[m.end(0):]
                if temp:
                    bad.append(temp)
                bad.extend(blocks[e + 1:])
                break
            else:
                # Gather blocks until we find our end
                good.append(block)

        # Augment the blocks
        blocks.clear()
        blocks.extend(bad)

        # Send back the new list of blocks to parse and note whether we found our end
        return good, end

    def split_header(self, block, length):
        """Split, YAML-ish header out."""

        # Search for end in first block
        m = None
        blocks = []
        for match in self.end.finditer(block):
            if len(match.group(1)) == length:
                m = match
                break

        # Move block ending to be parsed later
        if m:
            end = block[m.start(0):]
            blocks.insert(0, end)
            block = block[:m.start(0)]

        m = self.yaml_line.match(block)
        if m is not None:
            config = textwrap.dedent(m.group(0))
            blocks.insert(0, block[m.end():])
            if config.strip():
                return get_frontmatter(config), '\n'.join(blocks)

        blocks.insert(0, block)

        return {}, '\n'.join(blocks)

    def get_parent(self, parent):
        """Get parent."""

        # Returned the cached parent from our last attempt
        if self.cached_parent is not None:
            parent = self.cached_parent
            self.cached_parent = None
            return parent

        temp = parent
        while temp is not None:
            for entry in self.stack:
                if entry.hungry and entry.parent is temp:
                    self.cached_parent = temp
                    return temp
            if temp is not None:
                temp = self.lastChild(temp)
        return None

    def is_raw(self, tag):
        """Is tag raw."""

        return tag.tag in self.raw_tags

    def is_block(self, tag):
        """Is tag block."""

        return tag.tag in self.block_tags

    def parse_blocks(self, blocks, entry):
        """Parse the blocks."""

        # Get the target element and parse

        for b in blocks:
            target = entry.block.on_add(entry.el)

            # The Block does not or no longer accepts more content
            if target is None:  # pragma: no cover
                break

            mode = entry.block.on_markdown()
            if mode not in ('block', 'inline', 'raw'):
                mode = 'auto'
            is_block = mode == 'block' or (mode == 'auto' and self.is_block(target))
            is_atomic = mode == 'raw' or (mode == 'auto' and self.is_raw(target))

            # We should revert fenced code in spans or atomic tags.
            # Make sure atomic tags have content wrapped as `AtomicString`.
            if is_atomic or not is_block:
                child = list(target)[-1] if len(target) else None
                text = target.text if child is None else child.tail
                b = '\n\n'.join(unescape_markdown(self.md, [b], is_atomic)).strip('\n')

                if text:
                    text += b if not b else '\n\n' + b
                else:
                    text = b

                if child is None:
                    target.text = mutil.AtomicString(text) if is_atomic else text
                else:  # pragma: no cover
                    # TODO: We would need to build a special plugin to test this,
                    # as none of the default ones do this, but we have verified this
                    # locally. Once we've written a test, we can remove this.
                    child.tail = mutil.AtomicString(text) if is_atomic else text

            # Block tags should have content go through the normal block processor
            else:
                self.parser.state.set('blocks')
                working = self.working
                self.working = entry
                self.parser.parseChunk(target, b)
                self.parser.state.reset()
                self.working = working

    def run(self, parent, blocks):
        """Convert to details/summary block."""

        # Get the appropriate parent for this Block
        temp = self.get_parent(parent)
        if temp is not None:
            parent = temp

        # Did we find a new Block?
        if self.cached_block:
            # Get cached Block and reset the cache
            generic_block, block = self.cached_block
            self.cached_block = None

            # Discard first block as we've already processed what we need from it
            blocks.pop(0)
            if block:
                blocks.insert(0, block)

            # Ensure a "tight" parent list item is converted to "loose".
            if parent is not None and parent.tag in ('li', 'dd'):  # pragma: no cover
                text = parent.text
                if parent.text:
                    parent.text = ''
                    p = etree.SubElement(parent, 'p')
                    p.text = text

            # Create the block element
            el = generic_block._create(parent)

            # Push a Block entry on the stack.
            self.stack.append(BlockEntry(generic_block, el, parent))

            # Split out blocks we care about
            ours, end = self.split_end(blocks, generic_block.length)

            # Parse the text blocks under the Block
            index = len(self.stack) - 1
            self.parse_blocks(ours, self.stack[-1])

            # Remove Block from the stack if we are at the end
            # or add it to the hungry list.
            if end:
                # Run the "on end" event
                generic_block._end(el)
                self.inline_stack.append(self.stack[index])
                del self.stack[index]
            else:
                self.stack[index].hungry = True

        else:
            for r in range(len(self.stack)):
                entry = self.stack[r]
                if entry.hungry and parent is entry.parent:
                    # Find and remove end from the blocks
                    ours, end = self.split_end(blocks, entry.block.length)

                    # Get the target element and parse
                    entry.hungry = False
                    self.parse_blocks(ours, entry)

                    # Clean up if we completed the Block
                    if end:
                        # Run "on end" event
                        entry.block._end(entry.el)
                        self.inline_stack.append(entry)
                        del self.stack[r]
                    else:
                        entry.hungry = True

                    break


class BlocksMgrExtension(Extension):
    """Add generic Blocks extension."""

    def extendMarkdown(self, md):
        """Add Blocks to Markdown instance."""

        md.registerExtension(self)
        util.escape_chars(md, ['/'])
        self.extension = BlocksProcessor(md.parser, md)
        # We want to be right after list indentations are processed
        md.parser.blockprocessors.register(self.extension, "blocks", 89.99)

        tree = BlocksTreeprocessor(md, self.extension)
        md.treeprocessors.register(tree, 'blocks_on_inline_end', 19.99)

    def reset(self):
        """Reset."""

        self.extension._reset()


class BlocksExtension(Extension):
    """Blocks Extension."""

    def register_block_mgr(self, md):
        """Add Blocks to Markdown instance."""

        if 'blocks' not in md.parser.blockprocessors:
            ext = BlocksMgrExtension()
            ext.extendMarkdown(md)
            mgr = ext.extension
        else:
            mgr = md.parser.blockprocessors['blocks']
        return mgr

    def extendMarkdown(self, md):
        """Extend markdown."""

        mgr = self.register_block_mgr(md)
        self.extendMarkdownBlocks(md, mgr)

    def extendMarkdownBlocks(self, md, block_mgr):
        """Extend Markdown blocks."""