1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
|
# :Id: $Id: mathml_elements.py 10136 2025-05-20 15:48:27Z milde $
# :Copyright: 2024 Günter Milde.
#
# :License: Released under the terms of the `2-Clause BSD license`_, in short:
#
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
# notice and this notice are preserved.
# This file is offered as-is, without any warranty.
#
# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
"""MathML element classes based on `xml.etree`.
The module is intended for programmatic generation of MathML
and covers the part of `MathML Core`_ that is required by
Docutil's *TeX math to MathML* converter.
This module is PROVISIONAL:
the API is not settled and may change with any minor Docutils version.
.. _MathML Core: https://www.w3.org/TR/mathml-core/
"""
from __future__ import annotations
__docformat__ = 'reStructuredText'
# Usage:
#
# >>> from mathml_elements import *
import numbers
import xml.etree.ElementTree as ET
GLOBAL_ATTRIBUTES = (
'class', # space-separated list of element classes
# 'data-*', # custom data attributes (see HTML)
'dir', # directionality ('ltr', 'rtl')
'displaystyle', # True: normal, False: compact
'id', # unique identifier
# 'mathbackground', # color definition, deprecated
# 'mathcolor', # color definition, deprecated
# 'mathsize', # font-size, deprecated
'nonce', # cryptographic nonce ("number used once")
'scriptlevel', # math-depth for the element
'style', # CSS styling declarations
'tabindex', # indicate if the element takes input focus
)
"""Global MathML attributes
https://w3c.github.io/mathml-core/#global-attributes
"""
# Base classes
# ------------
class MathElement(ET.Element):
"""Base class for MathML elements."""
nchildren = None
"""Expected number of children or None"""
# cf. https://www.w3.org/TR/MathML3/chapter3.html#id.3.1.3.2
parent = None
"""Parent node in MathML element tree."""
def __init__(self, *children, **attributes) -> None:
"""Set up node with `children` and `attributes`.
Attribute names are normalised to lowercase.
You may use "CLASS" to set a "class" attribute.
Attribute values are converted to strings
(with True -> "true" and False -> "false").
>>> math(CLASS='test', level=3, split=True)
math(class='test', level='3', split='true')
>>> math(CLASS='test', level=3, split=True).toxml()
'<math class="test" level="3" split="true"></math>'
"""
attrib = {k.lower(): self.a_str(v) for k, v in attributes.items()}
super().__init__(self.__class__.__name__, **attrib)
self.extend(children)
@staticmethod
def a_str(v):
# Return string representation for attribute value `v`.
if isinstance(v, bool):
return str(v).lower()
return str(v)
def __repr__(self) -> str:
"""Return full string representation."""
args = [repr(child) for child in self]
if self.text:
args.append(repr(self.text))
if self.nchildren != self.__class__.nchildren:
args.append(f'nchildren={self.nchildren}')
if getattr(self, 'switch', None):
args.append('switch=True')
args += [f'{k}={v!r}' for k, v in self.items() if v is not None]
return f'{self.tag}({", ".join(args)})'
def __str__(self) -> str:
"""Return concise, informal string representation."""
if self.text:
args = repr(self.text)
else:
args = ', '.join(f'{child}' for child in self)
return f'{self.tag}({args})'
def set(self, key, value) -> None:
super().set(key, self.a_str(value))
def __setitem__(self, key, value) -> None:
if self.nchildren == 0:
raise TypeError(f'Element "{self}" does not take children.')
if isinstance(value, MathElement):
value.parent = self
else: # value may be an iterable
if self.nchildren and len(self) + len(value) > self.nchildren:
raise TypeError(f'Element "{self}" takes only {self.nchildren}'
' children')
for e in value:
e.parent = self
super().__setitem__(key, value)
def is_full(self):
"""Return boolean indicating whether children may be appended."""
return self.nchildren is not None and len(self) >= self.nchildren
def close(self):
"""Close element and return first non-full anchestor or None."""
self.nchildren = len(self) # mark node as full
parent = self.parent
while parent is not None and parent.is_full():
parent = parent.parent
return parent
def append(self, element):
"""Append `element` and return new "current node" (insertion point).
Append as child element and set the internal `parent` attribute.
If self is already full, raise TypeError.
If self is full after appending, call `self.close()`
(returns first non-full anchestor or None) else return `self`.
"""
if self.is_full():
if self.nchildren:
status = f'takes only {self.nchildren} children'
else:
status = 'does not take children'
raise TypeError(f'Element "{self}" {status}.')
super().append(element)
element.parent = self
if self.is_full():
return self.close()
return self
def extend(self, elements):
"""Sequentially append `elements`. Return new "current node".
Raise TypeError if overfull.
"""
current_node = self
for element in elements:
current_node = self.append(element)
return current_node
def pop(self, index=-1):
element = self[index]
del self[index]
return element
def in_block(self):
"""Return True, if `self` or an ancestor has ``display='block'``.
Used to find out whether we are in inline vs. displayed maths.
"""
if self.get('display') is None:
try:
return self.parent.in_block()
except AttributeError:
return False
return self.get('display') == 'block'
# XML output:
def indent_xml(self, space=' ', level=0) -> None:
"""Format XML output with indents.
Use with care:
Formatting whitespace is permanently added to the
`text` and `tail` attributes of `self` and anchestors!
"""
ET.indent(self, space, level)
def unindent_xml(self) -> None:
"""Strip whitespace at the end of `text` and `tail` attributes...
to revert changes made by the `indent_xml()` method.
Use with care, trailing whitespace from the original may be lost.
"""
for e in self.iter():
if not isinstance(e, MathToken) and e.text:
e.text = e.text.rstrip()
if e.tail:
e.tail = e.tail.rstrip()
def toxml(self, encoding=None):
"""Return an XML representation of the element.
By default, the return value is a `str` instance. With an explicit
`encoding` argument, the result is a `bytes` instance in the
specified encoding. The XML default encoding is UTF-8, any other
encoding must be specified in an XML document header.
Name and encoding handling match `xml.dom.minidom.Node.toxml()`
while `etree.Element.tostring()` returns `bytes` by default.
"""
xml = ET.tostring(self, encoding or 'unicode',
short_empty_elements=False)
# Visible representation for "Apply Function" character:
try:
xml = xml.replace('\u2061', '⁡')
except TypeError:
xml = xml.replace('\u2061'.encode(encoding), b'⁡')
return xml
# Group sub-expressions in a horizontal row
#
# The elements <msqrt>, <mstyle>, <merror>, <mpadded>, <mphantom>,
# <menclose>, <mtd>, <mscarry>, and <math> treat their contents
# as a single inferred mrow formed from all their children.
# (https://www.w3.org/TR/mathml4/#presm_inferredmrow)
#
# MathML Core uses the term "anonymous mrow element".
class MathRow(MathElement):
"""Base class for elements treating content as a single mrow."""
# 2d Schemata
class MathSchema(MathElement):
"""Base class for schemata expecting 2 or more children.
The special attribute `switch` indicates that the last two child
elements are in reversed order and must be switched before XML-export.
See `msub` for an example.
"""
nchildren = 2
def __init__(self, *children, **kwargs) -> None:
self.switch = kwargs.pop('switch', False)
super().__init__(*children, **kwargs)
def append(self, element):
"""Append element. Normalize order and close if full."""
current_node = super().append(element)
if self.switch and self.is_full():
self[-1], self[-2] = self[-2], self[-1]
self.switch = False
return current_node
# Token elements represent the smallest units of mathematical notation which
# carry meaning.
class MathToken(MathElement):
"""Token Element: contains textual data instead of children.
Expect text data on initialisation.
"""
nchildren = 0
def __init__(self, text, **attributes) -> None:
super().__init__(**attributes)
if not isinstance(text, (str, numbers.Number)):
raise ValueError('MathToken element expects `str` or number,'
f' not "{text}".')
self.text = str(text)
# MathML element classes
# ----------------------
class math(MathRow):
"""Top-level MathML element, a single mathematical formula."""
# Token elements
# ~~~~~~~~~~~~~~
class mtext(MathToken):
"""Arbitrary text with no notational meaning."""
class mi(MathToken):
"""Identifier, such as a function name, variable or symbolic constant."""
class mn(MathToken):
"""Numeric literal.
>>> mn(3.41).toxml()
'<mn>3.41</mn>'
Normally a sequence of digits with a possible separator (a dot or a comma).
(Values with comma must be specified as `str`.)
"""
class mo(MathToken):
"""Operator, Fence, Separator, or Accent.
>>> mo('<').toxml()
'<mo><</mo>'
Besides operators in strict mathematical meaning, this element also
includes "operators" like parentheses, separators like comma and
semicolon, or "absolute value" bars.
"""
class mspace(MathElement):
"""Blank space, whose size is set by its attributes.
Takes additional attributes `depth`, `height`, `width`.
Takes no children and no text.
See also `mphantom`.
"""
nchildren = 0
# General Layout Schemata
# ~~~~~~~~~~~~~~~~~~~~~~~
class mrow(MathRow):
"""Generic element to group children as a horizontal row.
Removed on closing if not required (see `mrow.close()`).
"""
def transfer_attributes(self, other) -> None:
"""Transfer attributes from self to other.
"List values" (class, style) are appended to existing values,
other values replace existing values.
"""
delimiters = {'class': ' ', 'style': '; '}
for k, v in self.items():
if k in ('class', 'style') and v:
if other.get(k):
v = delimiters[k].join(
(other.get(k).rstrip(delimiters[k]), v))
other.set(k, v)
def close(self):
"""Close element and return first non-full anchestor or None.
Remove <mrow> if it has only one child element.
"""
parent = self.parent
# replace `self` with single child
if parent is not None and len(self) == 1:
child = self[0]
try:
parent[list(parent).index(self)] = child
child.parent = parent
except (AttributeError, ValueError):
return None
self.transfer_attributes(child)
return super().close()
class mfrac(MathSchema):
"""Fractions or fraction-like objects such as binomial coefficients."""
class msqrt(MathRow):
"""Square root. See also `mroot`."""
nchildren = 1 # \sqrt expects one argument or a group
class mroot(MathSchema):
"""Roots with an explicit index. See also `msqrt`."""
class mstyle(MathRow):
"""Style Change.
In modern browsers, <mstyle> is equivalent to an <mrow> element.
However, <mstyle> may still be relevant for compatibility with
MathML implementations outside browsers.
"""
class merror(MathRow):
"""Display contents as error messages."""
class menclose(MathRow):
"""Renders content inside an enclosing notation...
... specified by the notation attribute.
Non-standard but still required by Firefox for boxed expressions.
"""
nchildren = 1 # \boxed expects one argument or a group
class mpadded(MathRow):
"""Adjust space around content."""
# nchildren = 1 # currently not used by latex2mathml
class mphantom(MathRow):
"""Placeholder: Rendered invisibly but dimensions are kept."""
nchildren = 1 # \phantom expects one argument or a group
# Script and Limit Schemata
# ~~~~~~~~~~~~~~~~~~~~~~~~~
class msub(MathSchema):
"""Attach a subscript to an expression."""
class msup(MathSchema):
"""Attach a superscript to an expression."""
class msubsup(MathSchema):
"""Attach both a subscript and a superscript to an expression."""
nchildren = 3
# Examples:
#
# The `switch` attribute reverses the order of the last two children:
# >>> msub(mn(1), mn(2)).toxml()
# '<msub><mn>1</mn><mn>2</mn></msub>'
# >>> msub(mn(1), mn(2), switch=True).toxml()
# '<msub><mn>2</mn><mn>1</mn></msub>'
#
# >>> msubsup(mi('base'), mn(1), mn(2)).toxml()
# '<msubsup><mi>base</mi><mn>1</mn><mn>2</mn></msubsup>'
# >>> msubsup(mi('base'), mn(1), mn(2), switch=True).toxml()
# '<msubsup><mi>base</mi><mn>2</mn><mn>1</mn></msubsup>'
class munder(msub):
"""Attach an accent or a limit under an expression."""
class mover(msup):
"""Attach an accent or a limit over an expression."""
class munderover(msubsup):
"""Attach accents or limits both under and over an expression."""
# Tabular Math
# ~~~~~~~~~~~~
class mtable(MathElement):
"""Table or matrix element."""
class mtr(MathRow):
"""Row in a table or a matrix."""
class mtd(MathRow):
"""Cell in a table or a matrix"""
|