File: entity.py

package info (click to toggle)
thunderbird 1%3A140.6.0esr-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 4,617,324 kB
  • sloc: cpp: 7,722,016; javascript: 5,902,751; ansic: 3,901,079; python: 1,413,347; xml: 653,997; asm: 462,284; java: 180,927; sh: 113,491; makefile: 20,460; perl: 14,288; objc: 13,059; yacc: 4,583; pascal: 3,352; lex: 1,720; ruby: 1,222; exp: 762; sql: 715; awk: 580; php: 436; lisp: 430; sed: 70; csh: 10
file content (53 lines) | stat: -rw-r--r-- 1,651 bytes parent folder | download | duplicates (15)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# Process html entity - {, ¯, ", ...
import re

from ..common.entities import entities
from ..common.utils import fromCodePoint, isValidEntityCode
from .state_inline import StateInline

DIGITAL_RE = re.compile(r"^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));", re.IGNORECASE)
NAMED_RE = re.compile(r"^&([a-z][a-z0-9]{1,31});", re.IGNORECASE)


def entity(state: StateInline, silent: bool) -> bool:
    pos = state.pos
    maximum = state.posMax

    if state.src[pos] != "&":
        return False

    if pos + 1 >= maximum:
        return False

    if state.src[pos + 1] == "#":
        if match := DIGITAL_RE.search(state.src[pos:]):
            if not silent:
                match1 = match.group(1)
                code = (
                    int(match1[1:], 16) if match1[0].lower() == "x" else int(match1, 10)
                )

                token = state.push("text_special", "", 0)
                token.content = (
                    fromCodePoint(code)
                    if isValidEntityCode(code)
                    else fromCodePoint(0xFFFD)
                )
                token.markup = match.group(0)
                token.info = "entity"

            state.pos += len(match.group(0))
            return True

    else:
        if (match := NAMED_RE.search(state.src[pos:])) and match.group(1) in entities:
            if not silent:
                token = state.push("text_special", "", 0)
                token.content = entities[match.group(1)]
                token.markup = match.group(0)
                token.info = "entity"

            state.pos += len(match.group(0))
            return True

    return False