1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
|
#
# This file is part of m.css.
#
# Copyright © 2017, 2018, 2019, 2020, 2021, 2022, 2023
# Vladimír Vondruš <mosra@centrum.cz>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
import os.path
import docutils
from docutils.parsers import rst
from docutils.parsers.rst.roles import set_classes
from docutils.utils.error_reporting import SafeString, ErrorString, locale_encoding
from docutils.parsers.rst import Directive, directives
import docutils.parsers.rst.directives.misc
from docutils import io, nodes, utils, statemachine
from pygments import highlight
from pygments.formatters import HtmlFormatter
from pygments.lexers import TextLexer, BashSessionLexer, get_lexer_by_name
import logging
logger = logging.getLogger(__name__)
try:
import ansilexer
except ImportError:
# The above worked well on Pelican 4.2 and before, and also works with
# other m.css tools like the Python doc generator. Pelican 4.5.0 changed to
# "namespace plugins" and broke packaged plugins completely, 4.5.1 was
# fixed to load namespaced plugins again, however the loading code is
# different from 4.2 and thus anything from the root plugins/ directory
# *isn't* in PATH anymore. Thus attempting to import those modules fails
# and as a DIRTY hack I have to add the path back.
#
# TODO: Pelican 4.5+ treats everything that isn't in the pelican.plugins
# namespace as "legacy plugins", which is unfortunate because then I
# wouldn't be able to easily share the plugin code with other m.css tools
# which don't (and shouldn't need to) care about Pelican at all. Allowing
# 3rd party plugins without enforcing implicit assumptions on them (the
# namespace, an unprefixed register() function...) would probably involve a
# complex discussion with Pelican maintainers which I don't have the energy
# for right now. Let's hope the "legacy plugins" codepath stays in for the
# foreseeable future.
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
import ansilexer
filters_pre = None
filters_post = None
def _highlight(code, language, options, *, is_block, filters=[]):
# Use our own lexer for ANSI
if language == 'ansi':
lexer = ansilexer.AnsiLexer()
else:
try:
lexer = get_lexer_by_name(language)
except ValueError:
logger.warning("No lexer found for language '{}', code highlighting disabled".format(language))
lexer = TextLexer()
if (isinstance(lexer, BashSessionLexer) or
isinstance(lexer, ansilexer.AnsiLexer)):
class_ = 'm-console'
else:
class_ = 'm-code'
# Pygments wants the underscored option
if 'hl-lines' in options:
options['hl_lines'] = options['hl-lines']
del options['hl-lines']
if isinstance(lexer, ansilexer.AnsiLexer):
formatter = ansilexer.HtmlAnsiFormatter(**options)
else:
formatter = HtmlFormatter(nowrap=True, **options)
global filters_pre
# First apply local pre filters, if any
for filter in filters:
f = filters_pre.get((lexer.name, filter))
if f: code = f(code)
# Then a global pre filter, if any
f = filters_pre.get(lexer.name)
if f: code = f(code)
highlighted = highlight(code, lexer, formatter).rstrip()
# Strip whitespace around if inline code, strip only trailing whitespace if
# a block
if not is_block: highlighted = highlighted.lstrip()
global filters_post
# First apply local post filters, if any
for filter in filters:
f = filters_post.get((lexer.name, filter))
if f: highlighted = f(highlighted)
# Then a global post filter, if any
f = filters_post.get(lexer.name)
if f: highlighted = f(highlighted)
return class_, highlighted
class Code(Directive):
required_arguments = 1
optional_arguments = 0
final_argument_whitespace = True
option_spec = {
'hl-lines': directives.unchanged,
# Legacy alias to hl-lines (I hate underscores)
'hl_lines': directives.unchanged,
'class': directives.class_option,
'filters': directives.unchanged
}
has_content = True
def run(self):
self.assert_has_content()
set_classes(self.options)
classes = []
if 'classes' in self.options:
classes += self.options['classes']
del self.options['classes']
# Legacy alias to hl-lines
if 'hl_lines' in self.options:
self.options['hl-lines'] = self.options['hl_lines']
del self.options['hl_lines']
filters = self.options.pop('filters', '').split()
class_, highlighted = _highlight('\n'.join(self.content), self.arguments[0], self.options, is_block=True, filters=filters)
classes += [class_]
content = nodes.raw('', highlighted, format='html')
pre = nodes.literal_block('', classes=classes)
pre.append(content)
return [pre]
class Include(docutils.parsers.rst.directives.misc.Include):
option_spec = {
'code': directives.unchanged,
'tab-width': int,
'start-line': int,
'end-line': int,
'start-after': directives.unchanged_required,
'start-on': directives.unchanged_required,
'end-before': directives.unchanged,
'strip-prefix': directives.unchanged,
'class': directives.class_option,
'filters': directives.unchanged,
'hl-lines': directives.unchanged
}
has_content = False
def run(self):
"""
Verbatim copy of docutils.parsers.rst.directives.misc.Include.run()
that just calls to our Code instead of builtin CodeBlock, is without
the rarely useful :encoding:, :literal: and :name: options and adds
support for :start-on:, empty :end-before: and :strip-prefix:.
"""
source = self.state_machine.input_lines.source(
self.lineno - self.state_machine.input_offset - 1)
source_dir = os.path.dirname(os.path.abspath(source))
path = directives.path(self.arguments[0])
if path.startswith('<') and path.endswith('>'):
path = os.path.join(self.standard_include_path, path[1:-1])
path = os.path.normpath(os.path.join(source_dir, path))
path = utils.relative_path(None, path)
path = nodes.reprunicode(path)
e_handler=self.state.document.settings.input_encoding_error_handler
tab_width = self.options.get(
'tab-width', self.state.document.settings.tab_width)
try:
self.state.document.settings.record_dependencies.add(path)
include_file = io.FileInput(source_path=path,
error_handler=e_handler)
except UnicodeEncodeError as error:
raise self.severe('Problems with "%s" directive path:\n'
'Cannot encode input file path "%s" '
'(wrong locale?).' %
(self.name, SafeString(path)))
except IOError as error:
raise self.severe('Problems with "%s" directive path:\n%s.' %
(self.name, ErrorString(error)))
startline = self.options.get('start-line', None)
endline = self.options.get('end-line', None)
try:
if startline or (endline is not None):
lines = include_file.readlines()
rawtext = ''.join(lines[startline:endline])
else:
rawtext = include_file.read()
except UnicodeError as error:
raise self.severe('Problem with "%s" directive:\n%s' %
(self.name, ErrorString(error)))
# start-after/end-before: no restrictions on newlines in match-text,
# and no restrictions on matching inside lines vs. line boundaries
after_text = self.options.get('start-after', None)
if after_text:
# skip content in rawtext before *and incl.* a matching text
after_index = rawtext.find(after_text)
if after_index < 0:
raise self.severe('Problem with "start-after" option of "%s" '
'directive:\nText not found.' % self.name)
rawtext = rawtext[after_index + len(after_text):]
# Compared to start-after, this includes the matched line
on_text = self.options.get('start-on', None)
if on_text:
on_index = rawtext.find('\n' + on_text)
if on_index < 0:
raise self.severe('Problem with "start-on" option of "%s" '
'directive:\nText not found.' % self.name)
rawtext = rawtext[on_index:]
# Compared to builtin include directive, the end-before can be empty,
# in which case it simply matches the first empty line (which is
# usually end of the code block)
before_text = self.options.get('end-before', None)
if before_text is not None:
# skip content in rawtext after *and incl.* a matching text
if before_text == '':
before_index = rawtext.find('\n\n')
else:
before_index = rawtext.find(before_text)
if before_index < 0:
raise self.severe('Problem with "end-before" option of "%s" '
'directive:\nText not found.' % self.name)
rawtext = rawtext[:before_index]
include_lines = statemachine.string2lines(rawtext, tab_width,
convert_whitespace=True)
# Strip a common prefix from all lines. Useful for example when
# including a reST snippet that's embedded in a comment, or cutting
# away excessive indentation. Can be wrapped in quotes in order to
# avoid trailing whitespace in reST markup.
if 'strip-prefix' in self.options and self.options['strip-prefix']:
prefix = self.options['strip-prefix']
if prefix[0] == prefix[-1] and prefix[0] in ['\'', '"']:
prefix = prefix[1:-1]
for i, line in enumerate(include_lines):
if line.startswith(prefix): include_lines[i] = line[len(prefix):]
# Strip the prefix also if the line is just the prefix alone,
# with trailing whitespace removed
elif line.rstrip() == prefix.rstrip(): include_lines[i] = ''
if 'code' in self.options:
self.options['source'] = path
# Don't convert tabs to spaces, if `tab_width` is negative:
if tab_width < 0:
include_lines = rawtext.splitlines()
codeblock = Code(self.name,
[self.options.pop('code')], # arguments
self.options,
include_lines, # content
self.lineno,
self.content_offset,
self.block_text,
self.state,
self.state_machine)
return codeblock.run()
self.state_machine.insert_input(include_lines, path)
return []
def code(role, rawtext, text, lineno, inliner, options={}, content=[]):
# In order to properly preserve backslashes (well, and backticks)
text = rawtext[rawtext.find('`') + 1:rawtext.rfind('`')]
set_classes(options)
classes = []
if 'classes' in options:
classes += options['classes']
del options['classes']
# If language is not specified, render a simple literal
if not 'language' in options:
content = nodes.raw('', utils.unescape(text), format='html')
node = nodes.literal(rawtext, '', **options)
node.append(content)
return [node], []
language = options['language']
del options['language']
# Not sure why language is duplicated in classes?
if language in classes: classes.remove(language)
filters = options.pop('filters', '').split()
class_, highlighted = _highlight(utils.unescape(text), language, options, is_block=False, filters=filters)
classes += [class_]
content = nodes.raw('', highlighted, format='html')
node = nodes.literal(rawtext, '', classes=classes, **options)
node.append(content)
return [node], []
code.options = {'class': directives.class_option,
'language': directives.unchanged,
'filters': directives.unchanged}
def register_mcss(mcss_settings, **kwargs):
rst.directives.register_directive('code', Code)
rst.directives.register_directive('include', Include)
rst.roles.register_canonical_role('code', code)
# These two are builtin aliases to .. code:: in docutils:
# https://github.com/docutils-mirror/docutils/blob/e88c5fb08d5cdfa8b4ac1020dd6f7177778d5990/docutils/parsers/rst/languages/en.py#L22-L24
# Since a lot of existing markup (especially coming from Sphinx) uses
# .. code-block:: and since there's no reason for .. code-block:: /
# .. sourcecode:: to behave like unpatched docutils, let's add those too:
rst.directives.register_directive('code-block', Code)
rst.directives.register_directive('sourcecode', Code)
global filters_pre, filters_post
filters_pre = mcss_settings.get('M_CODE_FILTERS_PRE', {})
filters_post = mcss_settings.get('M_CODE_FILTERS_POST', {})
# Below is only Pelican-specific functionality. If Pelican is not found, these
# do nothing.
def _pelican_configure(pelicanobj):
settings = {}
for key in ['M_CODE_FILTERS_PRE', 'M_CODE_FILTERS_POST']:
if key in pelicanobj.settings: settings[key] = pelicanobj.settings[key]
register_mcss(mcss_settings=settings)
def register(): # for Pelican
from pelican import signals
signals.initialized.connect(_pelican_configure)
|