1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
# -*- coding: utf-8 -*-
# markdown is released under the BSD license
# Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
# Copyright 2004 Manfred Stienstra (the original version)
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the <organization> nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from __future__ import unicode_literals
import re
import sys
"""
Python 3 Stuff
=============================================================================
"""
PY3 = sys.version_info[0] == 3
if PY3:
string_type = str
text_type = str
int2str = chr
else:
string_type = basestring
text_type = unicode
int2str = unichr
"""
Constants you might want to modify
-----------------------------------------------------------------------------
"""
BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
"|script|noscript|form|fieldset|iframe|math"
"|hr|hr/|style|li|dt|dd|thead|tbody"
"|tr|th|td|section|footer|header|group|figure"
"|figcaption|aside|article|canvas|output"
"|progress|video)$", re.IGNORECASE)
# Placeholders
STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder
ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder
INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
AMP_SUBSTITUTE = STX+"amp"+ETX
"""
Constants you probably do not need to change
-----------------------------------------------------------------------------
"""
RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'),
# Hebrew (0590-05FF), Arabic (0600-06FF),
# Syriac (0700-074F), Arabic supplement (0750-077F),
# Thaana (0780-07BF), Nko (07C0-07FF).
('\u2D30', '\u2D7F'), # Tifinagh
)
# Extensions should use "markdown.util.etree" instead of "etree" (or do `from
# markdown.util import etree`). Do not import it by yourself.
try: # Is the C implemenation of ElementTree available?
import xml.etree.cElementTree as etree
from xml.etree.ElementTree import Comment
# Serializers (including ours) test with non-c Comment
etree.test_comment = Comment
if etree.VERSION < "1.0.5":
raise RuntimeError("cElementTree version 1.0.5 or higher is required.")
except (ImportError, RuntimeError):
# Use the Python implementation of ElementTree?
import xml.etree.ElementTree as etree
if etree.VERSION < "1.1":
raise RuntimeError("ElementTree version 1.1 or higher is required")
"""
AUXILIARY GLOBAL FUNCTIONS
=============================================================================
"""
def isBlockLevel(tag):
"""Check if the tag is a block level HTML tag."""
if isinstance(tag, string_type):
return BLOCK_LEVEL_ELEMENTS.match(tag)
# Some ElementTree tags are not strings, so return False.
return False
"""
MISC AUXILIARY CLASSES
=============================================================================
"""
class AtomicString(text_type):
"""A string which should not be further processed."""
pass
class Processor(object):
def __init__(self, markdown_instance=None):
if markdown_instance:
self.markdown = markdown_instance
class HtmlStash(object):
"""
This class is used for stashing HTML objects that we extract
in the beginning and replace with place-holders.
"""
def __init__ (self):
""" Create a HtmlStash. """
self.html_counter = 0 # for counting inline html segments
self.rawHtmlBlocks=[]
def store(self, html, safe=False):
"""
Saves an HTML segment for later reinsertion. Returns a
placeholder string that needs to be inserted into the
document.
Keyword arguments:
* html: an html segment
* safe: label an html segment as safe for safemode
Returns : a placeholder string
"""
self.rawHtmlBlocks.append((html, safe))
placeholder = self.get_placeholder(self.html_counter)
self.html_counter += 1
return placeholder
def reset(self):
self.html_counter = 0
self.rawHtmlBlocks = []
def get_placeholder(self, key):
return "%swzxhzdk:%d%s" % (STX, key, ETX)
|