File: __init__.py

package info (click to toggle)
plastex 3.1-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,132 kB
  • sloc: python: 23,341; xml: 18,076; javascript: 7,755; ansic: 46; makefile: 40; sh: 26
file content (124 lines) | stat: -rw-r--r-- 3,975 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import re
from plasTeX.Renderers.PageTemplate import Renderer as _Renderer
from plasTeX.Renderers.PageTemplate import xmltemplate
from plasTeX import Command
try:
    from lxml import etree
except ImportError:
    have_lxml = False
else:
    have_lxml = True
xns = {'d': 'http://docbook.org/ns/docbook'}

def drop_tag(elem):
    """
    Remove the tag, but not its children or text.
    The children and text are merged into the parent.
    """
    parent = elem.getparent()
    previous = elem.getprevious()
    if elem.text and isinstance(elem.tag, str):
        if previous is None:
            parent.text = (parent.text or '') + elem.text
        else:
            previous.tail = (previous.tail or '') + elem.text
    if elem.tail:
        if elem:
            last = elem[-1]
            last.tail = (last.tail or '') + elem.tail
        elif previous is None:
            parent.text = (parent.text or '') + elem.tail
        else:
            previous.tail = (previous.tail or '') + elem.tail
    index = parent.index(elem)
    parent[index:index + 1] = elem[:]

def clean_para(tree, name):
    for elem in tree.findall('.//d:%s' % name, namespaces=xns):
        e = elem.findall('d:para', namespaces=xns)
        if e:
            for tag in e:
                drop_tag(tag)
    return tree

def get_see(term):
    see = None
    seealso = None
    if term.find('|') != -1:
        term, fmt = term.split('|')
        if fmt.find('seealso') != -1:
            seealso = fmt
        elif fmt.find('see') != -1:
            see = fmt

    return term, see, seealso

def parse_indexentry(s):
    term = s
    sortstr = None
    if term.find('@') != -1:
        term, sortstr = term.split('@')
    term, see, seealso = get_see(term)
    return (term, sortstr, see, seealso)

class index(Command):
    args = 'argument:str'

    def invoke(self, tex):
        Command.invoke(self, tex)
        entry = self.attributes['argument']
        if entry.find('!') != -1:
            primary, secondary = entry.split('!')

            primary, prisort, see, seealso= parse_indexentry(primary)
            if see or seealso:
                secondary, secsort, _, _ = parse_indexentry(secondary)
            else:
                secondary, secsort, see, seealso = parse_indexentry(secondary)
        else:
            primary, prisort, see, seealso = parse_indexentry(entry)

        self.data = {
            'primary': primary,
            'secondary':secondary,
            'prisort': prisort,
            'secsort': secsort,
            'see': see,
            'seealso': seealso,
            }

class DocBook(_Renderer):
    """ Renderer for DocBook documents """
    fileExtension = '.xml'
    imageTypes = ['.png','.jpg','.jpeg','.gif']
    vectorImageTypes = ['.svg']

    def __init__(self, *args, **kwargs):
        _Renderer.__init__(self, *args, **kwargs)
        self.registerEngine('xml', None, '.xml', xmltemplate)

    def cleanup(self, document, files, postProcess=None):
        res = _Renderer.cleanup(self, document, files, postProcess=postProcess)
        return res

    def processFileContent(self, document, s):
        s = _Renderer.processFileContent(self, document, s)

        if have_lxml:
            tree = etree.fromstring(s)
            for name in ['itemizedlist', 'table', 'term', 'para']:
                tree = clean_para(tree, name)
            s = etree.tostring(tree, encoding='unicode')
        else:
            s = re.sub(r'</partintro>\s*<partintro>','', s, flags=re.I)
            s = re.sub(r'<para>\s*(<articleinfo>)', r'\1', s, flags=re.I)
            s = re.sub(r'(</articleinfo>)\s*</para>', r'\1', s, flags=re.I)
            s = re.sub(r'<para>\s*</para>', '', s, flags=re.I)

            for name in ['itemizedlist', 'term', 'para']:
                s = re.sub(r'(<%s>)\s*<para>' % name, r'\1', s, flags=re.I)
                s = re.sub(r'</para>\s*(</%s>)' % name, r'\1', s, flags=re.I)

        return s

Renderer = DocBook