File: tables-align-columns.py

package info (click to toggle)
python-markdown2 2.5.4-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 6,492 kB
  • sloc: python: 6,201; perl: 1,493; php: 865; makefile: 37
file content (142 lines) | stat: -rwxr-xr-x 4,798 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python

"""
Convert [tables](https://github.com/trentm/python-markdown2/wiki/tables)
a given Markdown document such that columns are aligned.

Limitations:
- Can't handle tables where cells have a pipe.
"""



__version__ = "1.0.0"

import codecs
import re
import sys
from collections import defaultdict

p = print
def e(*args, **kwargs):
    kwargs['file'] = sys.stderr
    p(*args, **kwargs)



#---- internal support stuff

def tables_align_columns(path):
    def _table_sub(match):
        head, underline, body = match.groups()

        data_rows = [
            [cell.strip() for cell in head.strip().strip('|').split('|')],
        ]
        for line in body.strip('\n').split('\n'):
            data_rows.append([cell.strip() for cell in line.strip().strip('|').split('|')])

        width_from_col_idx = defaultdict(int)
        for data_row in data_rows:
            for col_idx, cell in enumerate(data_row):
                width_from_col_idx[col_idx] = max(
                    2, width_from_col_idx[col_idx], len(cell))

        # Determine aligns for columns.
        ucells = [cell.strip() for cell in underline.strip('| \t\n').split('|')]
        align_from_col_idx = {}
        for col_idx, cell in enumerate(ucells):
            if cell[0] == ':' and cell[-1] == ':':
                align_from_col_idx[col_idx] = 'center'
            elif cell[0] == ':':
                align_from_col_idx[col_idx] = 'left'
            elif cell[-1] == ':':
                align_from_col_idx[col_idx] = 'right'
            else:
                align_from_col_idx[col_idx] = None

        table = []
        for data_row in data_rows:
            row = []
            #e('align_from_col_idx:', align_from_col_idx)
            #e('data_row:', data_row)
            for col_idx, cell in enumerate(data_row):
                width = width_from_col_idx[col_idx]
                try:
                    align = align_from_col_idx[col_idx]
                except KeyError:
                    # Limitation: We hit a table row where a cell has a
                    # literal `|` in it. We can't currently handle that, so
                    # lets just skip this table.
                    e('tables-align-columns: warning: skipping a table '
                      'with literal `|`: %r' % match.group(0))
                    return match.group(0)
                if align == 'center':
                    space = width - len(cell)
                    left = space / 2
                    right = space - left
                    row.append(' '*left + cell + ' '*right)
                elif align == 'right':
                    row.append('%%%ds' % width % cell)
                else:
                    row.append('%%-%ds' % width % cell)
            table.append(row)

        underline = []
        for col_idx, cell in enumerate(data_rows[0]):
            width = width_from_col_idx[col_idx]
            align = align_from_col_idx[col_idx]
            if align == 'center':
                underline.append(':' + '-'*(width-2) + ':')
            elif align == 'right':
                underline.append('-'*(width-1) + ':')
            elif align == 'left':
                underline.append(':' + '-'*(width-1))
            else:
                underline.append('-'*width)
        table[1:1] = [underline]
        #e(pformat(table, width=200))

        table_str = '\n'.join(('| ' + ' | '.join(r) + ' |') for r in table)
        return table_str + '\n'

    text = codecs.open(path, 'rb', 'utf8').read()

    less_than_tab = 3
    table_re = re.compile(r'''
            (?:(?<=\n\n)|\A\n?)             # leading blank line

            ^[ ]{0,%d}                      # allowed whitespace
            (.*[|].*)  \n                   # $1: header row (at least one pipe)

            ^[ ]{0,%d}                      # allowed whitespace
            (                               # $2: underline row
                # underline row with leading bar
                (?:  \|\ *:?-+:?\ *  )+  \|?  \n
                |
                # or, underline row without leading bar
                (?:  \ *:?-+:?\ *\|  )+  (?:  \ *:?-+:?\ *  )?  \n
            )

            (                               # $3: data rows
                (?:
                    ^[ ]{0,%d}(?!\ )         # ensure line begins with 0 to less_than_tab spaces
                    .*\|.*  \n
                )+
            )
        ''' % (less_than_tab, less_than_tab, less_than_tab), re.M | re.X)
    return table_re.sub(_table_sub, text)




#---- mainline

def main(argv):
    for path in argv[1:]:
        text = tables_align_columns(path)
        sys.stdout.write(text.encode(
            sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))

if __name__ == "__main__":
    sys.exit( main(sys.argv) )