File: table.py

package info (click to toggle)
python-textile 1%3A4.0.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 460 kB
  • sloc: python: 2,791; makefile: 17; sh: 7
file content (233 lines) | stat: -rw-r--r-- 9,109 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# -*- coding: utf-8 -*-
from xml.etree import ElementTree

from textile.regex_strings import (align_re_s, cls_re_s, regex_snippets,
                                   table_span_re_s, valign_re_s, pnct_re_s)
from textile.utils import generate_tag, parse_attributes

try:
    import regex as re
except ImportError:
    import re


class Table(object):
    caption_re = re.compile(
        (r'^\|\=(?P<capts>{s}{a}{c})\. '
         r'(?P<cap>[^\n]*)(?P<row>.*)'
         .format(**{'s': table_span_re_s, 'a': align_re_s, 'c': cls_re_s})),
        re.S)
    colgroup_re = re.compile(
        r'^\|:(?P<cols>{s}{a}{c}\. .*)'
        .format(**{'s': table_span_re_s, 'a': align_re_s, 'c': cls_re_s}),
        re.M)
    heading_re = re.compile(
        r'^_(?={0}|{1})'.format(regex_snippets['space'], pnct_re_s))

    def __init__(self, textile, tatts, rows, summary):
        self.textile = textile
        self.attributes = parse_attributes(tatts, 'table', restricted=self.textile.restricted)
        if summary:
            self.attributes.update(summary=summary.strip())
        self.input = rows
        self.caption = ''
        self.colgroup = ''
        self.content = []

    def process(self):
        rgrp = None
        groups = []
        split = (
            re.compile(r'\|{0}*?$'.format(regex_snippets['space']), re.M)
            .split(self.input))
        for i, row in enumerate([x for x in split if x]):
            row = row.lstrip()

            # Caption -- only occurs on row 1, otherwise treat '|=. foo |...'
            # as a normal center-aligned cell.
            cmtch = self.caption_re.match(row)
            if i == 0 and cmtch:
                caption = Caption(restricted=self.textile.restricted, **cmtch.groupdict())
                self.caption = '\n{0}'.format(caption.caption)
                row = cmtch.group('row').lstrip()
                if row == '':
                    continue

            # Colgroup -- A colgroup row will not necessarily end with a |.
            # Hence it may include the next row of actual table data.
            if row[:2] == '|:':
                if '\n' in row:
                    colgroup_data, row = row[2:].split('\n')
                else:
                    colgroup_data, row = row[2:], ''
                colgroup_atts, cols = colgroup_data, None
                if '|' in colgroup_data:
                    colgroup_atts, cols = colgroup_data.split('|', 1)
                colgrp = Colgroup(cols, colgroup_atts, restricted=self.textile.restricted)
                self.colgroup = colgrp.process()
                if row == '':
                    continue

            # search the row for a table group - thead, tfoot, or tbody
            grpmatchpattern = (r"(:?^\|(?P<part>{v})(?P<rgrpatts>{s}{a}{c})"
                               r"\.\s*$\n)?^(?P<row>.*)").format(
                                   **{'v': valign_re_s, 's': table_span_re_s,
                                      'a': align_re_s, 'c': cls_re_s})
            grpmatch_re = re.compile(grpmatchpattern, re.S | re.M)
            grpmatch = grpmatch_re.match(row.lstrip())

            grptypes = {'^': Thead, '~': Tfoot, '-': Tbody}
            if grpmatch.group('part'):
                # we're about to start a new group, so process the current one
                # and add it to the output
                if rgrp:
                    groups.append('\n\t{0}'.format(rgrp.process()))
                rgrp = grptypes[grpmatch.group('part')](grpmatch.group(
                    'rgrpatts'), restricted=self.textile.restricted)
            row = grpmatch.group('row')

            rmtch = re.search(r'^(?P<ratts>{0}{1}\. )(?P<row>.*)'.format(
                align_re_s, cls_re_s), row.lstrip())
            if rmtch:
                row_atts = parse_attributes(rmtch.group('ratts'), 'tr', restricted=self.textile.restricted)
                row = rmtch.group('row')
            else:
                row_atts = {}

            # create a row to hold the cells.
            r = Row(row_atts, row)
            for cellctr, cell in enumerate(row.split('|')[1:]):
                ctag = 'td'
                if self.heading_re.match(cell):
                    ctag = 'th'

                cmtch = re.search(r'^(?P<catts>_?{0}{1}{2}\. )'
                                  '(?P<cell>.*)'.format(
                                      table_span_re_s, align_re_s, cls_re_s),
                                  cell, flags=re.S)
                if cmtch:
                    catts = cmtch.group('catts')
                    cell_atts = parse_attributes(catts, 'td', restricted=self.textile.restricted)
                    cell = cmtch.group('cell')
                else:
                    cell_atts = {}

                if not self.textile.lite:
                    a_pattern = r'(?P<space>{0}*)(?P<cell>.*)'.format(
                        regex_snippets['space'])
                    a = re.search(a_pattern, cell, flags=re.S)
                    cell = self.textile.redcloth_list(a.group('cell'))
                    cell = self.textile.textileLists(cell)
                    cell = '{0}{1}'.format(a.group('space'), cell)

                # create a cell
                c = Cell(ctag, cell, cell_atts)
                cline_tag = '\n\t\t\t{0}'.format(c.process())
                # add the cell to the row
                r.cells.append(self.textile.doTagBr(ctag, cline_tag))

            # if we're in a group, add it to the group's rows, else add it
            # directly to the content
            if rgrp:
                rgrp.rows.append(r.process())
            else:
                self.content.append(r.process())

        # if there's still an rgrp, process it and add it to the output
        if rgrp:
            groups.append('\n\t{0}'.format(rgrp.process()))

        content = '{0}{1}{2}{3}\n\t'.format(
            self.caption, self.colgroup, ''.join(groups), ''.join(self.content))
        tbl = generate_tag('table', content, self.attributes)
        return '\t{0}\n\n'.format(tbl)


class Caption(object):
    def __init__(self, capts, cap, row, restricted):
        self.attributes = parse_attributes(capts, restricted=restricted)
        self.caption = self.process(cap)

    def process(self, cap):
        tag = generate_tag('caption', cap.strip(), self.attributes)
        return '\t{0}'.format(tag)


class Colgroup(object):
    def __init__(self, cols, atts, restricted):
        self.row = ''
        self.attributes = atts
        self.cols = cols
        self.restricted = restricted

    def process(self):
        enc = 'unicode'

        group_atts = parse_attributes(self.attributes, 'col', restricted=self.restricted)
        colgroup = ElementTree.Element('colgroup', attrib=group_atts)
        colgroup.text = '\n\t'
        if self.cols is not None:
            match_cols = self.cols.replace('.', '').split('|')
            # colgroup is the first item in match_cols, the remaining items are
            # cols.
            for idx, col in enumerate(match_cols):
                col_atts = parse_attributes(col.strip(), 'col', restricted=self.restricted)
                ElementTree.SubElement(colgroup, 'col', col_atts)
        colgrp = ElementTree.tostring(colgroup, encoding=enc)
        # cleanup the extra xml declaration if it exists, (python versions
        # differ) and then format the resulting string accordingly: newline and
        # tab between cols and a newline at the end
        xml_declaration = "<?xml version='1.0' encoding='UTF-8'?>\n"
        colgrp = colgrp.replace(xml_declaration, '')
        colgrp = colgrp.replace('><', '>\n\t<')
        return f"\n\t{colgrp}"


class Row(object):
    def __init__(self, attributes, row):
        self.tag = 'tr'
        self.attributes = attributes
        self.cells = []

    def process(self):
        output = []
        for c in self.cells:
            output.append(c)
        cell_data = '{0}\n\t\t'.format(''.join(output))
        tag = generate_tag('tr', cell_data, self.attributes)
        return '\n\t\t{0}'.format(tag)


class Cell(object):
    def __init__(self, tag, content, attributes):
        self.tag = tag
        self.content = content
        self.attributes = attributes

    def process(self):
        return generate_tag(self.tag, self.content, self.attributes)


class _TableSection(object):
    def __init__(self, tag, attributes, restricted):
        self.tag = tag
        self.attributes = parse_attributes(attributes, restricted=restricted)
        self.rows = []

    def process(self):
        return generate_tag(self.tag, '{0}\n\t'.format(''.join(self.rows)), self.attributes)


class Thead(_TableSection):
    def __init__(self, attributes, restricted):
        super(Thead, self).__init__('thead', attributes, restricted)


class Tbody(_TableSection):
    def __init__(self, attributes, restricted):
        super(Tbody, self).__init__('tbody', attributes, restricted)


class Tfoot(_TableSection):
    def __init__(self, attributes, restricted):
        super(Tfoot, self).__init__('tfoot', attributes, restricted)