1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
|
"""
General utilities.
MIT license.
Copyright (c) 2017 Isaac Muse <isaacmuse@gmail.com>
"""
from markdown.inlinepatterns import InlineProcessor
import xml.etree.ElementTree as etree
from collections import namedtuple
import sys
import copy
import re
import html
from urllib.request import pathname2url, url2pathname
from urllib.parse import urlparse
from functools import wraps
import warnings
RE_WIN_DRIVE_LETTER = re.compile(r"^[A-Za-z]$")
RE_WIN_DRIVE_PATH = re.compile(r"^[A-Za-z]:(?:\\.*)?$")
RE_URL = re.compile('(http|ftp)s?|data|mailto|tel|news')
RE_WIN_DEFAULT_PROTOCOL = re.compile(r"^///[A-Za-z]:(?:/.*)?$")
if sys.platform.startswith('win'):
_PLATFORM = "windows"
elif sys.platform == "darwin": # pragma: no cover
_PLATFORM = "osx"
else:
_PLATFORM = "linux"
PY39 = (3, 9) <= sys.version_info
def clamp(value, mn, mx):
"""Clamp the value to the given minimum and maximum."""
if mn is not None and mx is not None:
return max(min(value, mx), mn)
elif mn is not None:
return max(value, mn)
elif mx is not None:
return min(value, mx)
else:
return value
def is_win(): # pragma: no cover
"""Is Windows."""
return _PLATFORM == "windows"
def is_linux(): # pragma: no cover
"""Is Linux."""
return _PLATFORM == "linux"
def is_mac(): # pragma: no cover
"""Is macOS."""
return _PLATFORM == "osx"
def url2path(path):
"""Path to URL."""
return url2pathname(path)
def path2url(url):
"""URL to path."""
path = pathname2url(url)
# If on windows, replace the notation to use a default protocol `///` with nothing.
if is_win() and RE_WIN_DEFAULT_PROTOCOL.match(path):
path = path.replace('///', '', 1)
return path
def get_code_points(s):
"""Get the Unicode code points."""
return list(s)
def get_ord(c):
"""Get Unicode ord."""
return ord(c)
def get_char(value):
"""Get the Unicode char."""
return chr(value)
def escape_chars(md, echrs):
"""
Add chars to the escape list.
Don't just append as it modifies the global list permanently.
Make a copy and extend **that** copy so that only this Markdown
instance gets modified.
"""
escaped = copy.copy(md.ESCAPED_CHARS)
for ec in echrs:
if ec not in escaped:
escaped.append(ec)
md.ESCAPED_CHARS = escaped
def parse_url(url):
"""
Parse the URL.
Try to determine if the following is a file path or
(as we will call anything else) a URL.
We return it slightly modified and combine the path parts.
We also assume if we see something like c:/ it is a Windows path.
We don't bother checking if this **is** a Windows system, but
'nix users really shouldn't be creating weird names like c: for their folder.
"""
is_url = False
is_absolute = False
scheme, netloc, path, params, query, fragment = urlparse(html.unescape(url))
if RE_URL.match(scheme):
# Clearly a URL
is_url = True
elif scheme == '' and netloc == '' and path == '':
# Maybe just a URL fragment
is_url = True
elif scheme == 'file' and (RE_WIN_DRIVE_PATH.match(netloc)):
# file://c:/path or file://c:\path
path = '/' + (netloc + path).replace('\\', '/')
netloc = ''
is_absolute = True
elif scheme == 'file' and netloc.startswith('\\'):
# file://\c:\path or file://\\path
path = (netloc + path).replace('\\', '/')
netloc = ''
is_absolute = True
elif scheme == 'file':
# file:///path
is_absolute = True
elif RE_WIN_DRIVE_LETTER.match(scheme):
# c:/path
path = '/{}:{}'.format(scheme, path.replace('\\', '/'))
scheme = 'file'
netloc = ''
is_absolute = True
elif scheme == '' and netloc != '' and url.startswith('//'):
# //file/path
path = '//' + netloc + path
scheme = 'file'
netloc = ''
is_absolute = True
elif scheme != '' and netloc != '':
# A non-file path or strange URL
is_url = True
elif path.startswith(('/', '\\')):
# /root path
is_absolute = True
return (scheme, netloc, path, params, query, fragment, is_url, is_absolute)
class PatSeqItem(namedtuple('PatSeqItem', ['pattern', 'builder', 'tags', 'full_recursion'])):
"""Pattern sequence item item."""
def __new__(cls, pattern, builder, tags, full_recursion=False):
"""Create object."""
return super().__new__(cls, pattern, builder, tags, full_recursion)
class PatternSequenceProcessor(InlineProcessor):
"""Processor for handling complex nested patterns such as strong and em matches."""
PATTERNS = []
def build_single(self, m, tag, full_recursion, idx):
"""Return single tag."""
el1 = etree.Element(tag)
text = m.group(2)
self.parse_sub_patterns(text, el1, None, full_recursion, idx)
return el1
def build_double(self, m, tags, full_recursion, idx):
"""Return double tag."""
tag1, tag2 = tags.split(",")
el1 = etree.Element(tag1)
el2 = etree.Element(tag2)
text = m.group(2)
self.parse_sub_patterns(text, el2, None, full_recursion, idx)
el1.append(el2)
if len(m.groups()) == 3:
text = m.group(3)
self.parse_sub_patterns(text, el1, el2, full_recursion, idx)
return el1
def build_double2(self, m, tags, full_recursion, idx):
"""Return double tags (variant 2): `<strong>text <em>text</em></strong>`."""
tag1, tag2 = tags.split(",")
el1 = etree.Element(tag1)
el2 = etree.Element(tag2)
text = m.group(2)
self.parse_sub_patterns(text, el1, None, full_recursion, idx)
text = m.group(3)
el1.append(el2)
self.parse_sub_patterns(text, el2, None, full_recursion, idx)
return el1
def parse_sub_patterns(self, data, parent, last, full_recursion, idx):
"""
Parses sub patterns.
`data` (`str`):
text to evaluate.
`parent` (`etree.Element`):
Parent to attach text and sub elements to.
`last` (`etree.Element`):
Last appended child to parent. Can also be None if parent has no children.
`idx` (`int`):
Current pattern index that was used to evaluate the parent.
"""
offset = 0
pos = 0
length = len(data)
while pos < length:
# Find the start of potential emphasis or strong tokens
if self.compiled_re.match(data, pos):
matched = False
# See if the we can match an emphasis/strong pattern
for index, item in enumerate(self.PATTERNS):
# Only evaluate patterns that are after what was used on the parent
if not full_recursion and index <= idx:
continue
m = item.pattern.match(data, pos)
if m:
# Append child nodes to parent
# Text nodes should be appended to the last
# child if present, and if not, it should
# be added as the parent's text node.
text = data[offset:m.start(0)]
if text:
if last is not None:
last.tail = text
else:
parent.text = text
el = self.build_element(m, item.builder, item.tags, item.full_recursion, index)
parent.append(el)
last = el
# Move our position past the matched hunk
offset = pos = m.end(0)
matched = True
if not matched:
# We matched nothing, move on to the next character
pos += 1
else:
# Increment position as no potential emphasis start was found.
pos += 1
# Append any leftover text as a text node.
text = data[offset:]
if text:
if last is not None:
last.tail = text
else:
parent.text = text
def build_element(self, m, builder, tags, full_recursion, index):
"""Element builder."""
if builder == 'double2':
return self.build_double2(m, tags, full_recursion, index)
elif builder == 'double':
return self.build_double(m, tags, full_recursion, index)
else:
return self.build_single(m, tags, full_recursion, index)
def handleMatch(self, m, data):
"""Parse patterns."""
el = None
start = None
end = None
for index, item in enumerate(self.PATTERNS):
m1 = item.pattern.match(data, m.start(0))
if m1:
start = m1.start(0)
end = m1.end(0)
el = self.build_element(m1, item.builder, item.tags, item.full_recursion, index)
break
return el, start, end
def deprecated(message, stacklevel=2): # pragma: no cover
"""
Raise a `DeprecationWarning` when wrapped function/method is called.
Usage:
@deprecated("This method will be removed in version X; use Y instead.")
def some_method()"
pass
"""
def _wrapper(func):
@wraps(func)
def _deprecated_func(*args, **kwargs):
warnings.warn(
f"'{func.__name__}' is deprecated. {message}",
category=DeprecationWarning,
stacklevel=stacklevel
)
return func(*args, **kwargs)
return _deprecated_func
return _wrapper
def warn_deprecated(message, stacklevel=2): # pragma: no cover
"""Warn deprecated."""
warnings.warn(
message,
category=DeprecationWarning,
stacklevel=stacklevel
)
|