File: pygments_ansi_color.py

package info (click to toggle)
datalad 1.3.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,132 kB
  • sloc: python: 69,299; sh: 1,521; makefile: 220
file content (191 lines) | stat: -rw-r--r-- 6,154 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# -*- coding: utf-8 -*-
"""Pygments lexer for text containing ANSI color codes."""
from __future__ import (
    absolute_import,
    unicode_literals,
)

import itertools
import re

import pygments.lexer
import pygments.token

Color = pygments.token.Token.Color

_ansi_code_to_color = {
    0: 'Black',
    1: 'Red',
    2: 'Green',
    3: 'Yellow',
    4: 'Blue',
    5: 'Magenta',
    6: 'Cyan',
    7: 'White',
}


def _token_from_lexer_state(bold, fg_color, bg_color):
    """Construct a token given the current lexer state.

    We can only emit one token even though we have a multiple-tuple state.
    To do work around this, we construct tokens like "BoldRed".
    """
    token_name = ''

    if bold:
        token_name += 'Bold'

    if fg_color:
        token_name += fg_color

    if bg_color:
        token_name += 'BG' + bg_color

    if token_name == '':
        return pygments.token.Text
    else:
        return getattr(Color, token_name)


def color_tokens(fg_colors, bg_colors):
    """Return color tokens for a given set of colors.

    Pygments doesn't have a generic "color" token; instead everything is
    contextual (e.g. "comment" or "variable"). That doesn't make sense for us,
    where the colors actually *are* what we care about.

    This function will register combinations of tokens (things like "Red" or
    "BoldRedBGGreen") based on the colors passed in.

    You can also define the tokens yourself, but note that the token names are
    *not* currently guaranteed to be stable between releases as I'm not really
    happy with this approach.

    Usage:

        fg_colors = bg_colors = {
            'Black': '#000000',
            'Red': '#EF2929',
            'Green': '#8AE234',
            'Yellow': '#FCE94F',
            'Blue': '#3465A4',
            'Magenta': '#c509c5',
            'Cyan': '#34E2E2',
            'White': '#ffffff',
        }
        class MyStyle(pygments.styles.SomeStyle):
            styles = dict(pygments.styles.SomeStyle.styles)
            styles.update(color_tokens(fg_colors, bg_colors))
    """
    styles = {}

    for bold, fg_color, bg_color in itertools.product(
            (False, True),
            {None} | set(fg_colors),
            {None} | set(bg_colors),
    ):
        token = _token_from_lexer_state(bold, fg_color, bg_color)
        if token is not pygments.token.Text:
            value = []
            if bold:
                value.append('bold')
            if fg_color:
                value.append(fg_colors[fg_color])
            if bg_color:
                value.append('bg:' + bg_colors[bg_color])
            styles[token] = ' '.join(value)

    return styles


class AnsiColorLexer(pygments.lexer.RegexLexer):
    name = 'ANSI Color'
    aliases = ('ansi-color', 'ansi', 'ansi-terminal')
    flags = re.DOTALL | re.MULTILINE

    def __init__(self, *args, **kwargs):
        super(AnsiColorLexer, self).__init__(*args, **kwargs)
        self.reset_state()

    def reset_state(self):
        self.bold = False
        self.fg_color = None
        self.bg_color = None

    @property
    def current_token(self):
        return _token_from_lexer_state(
            self.bold, self.fg_color, self.bg_color,
        )

    def process(self, match):
        """Produce the next token and bit of text.

        Interprets the ANSI code (which may be a color code or some other
        code), changing the lexer state and producing a new token. If it's not
        a color code, we just strip it out and move on.

        Some useful reference for ANSI codes:
          * http://ascii-table.com/ansi-escape-sequences.php
        """
        # "after_escape" contains everything after the start of the escape
        # sequence, up to the next escape sequence. We still need to separate
        # the content from the end of the escape sequence.
        after_escape = match.group(1)

        # TODO: this doesn't handle the case where the values are non-numeric.
        # This is rare but can happen for keyboard remapping, e.g.
        # '\x1b[0;59;"A"p'
        parsed = re.match(
            r'([0-9;=]*?)?([a-zA-Z])(.*)$',
            after_escape,
            re.DOTALL | re.MULTILINE,
        )
        if parsed is None:
            # This shouldn't ever happen if we're given valid text + ANSI, but
            # people can provide us with utter junk, and we should tolerate it.
            text = after_escape
        else:
            value, code, text = parsed.groups()

            if code == 'm':  # "m" is "Set Graphics Mode"
                # Special case \x1b[m is a reset code
                if value == '':
                    self.reset_state()
                else:
                    values = value.split(';')
                    for value in values:
                        try:
                            value = int(value)
                        except ValueError:
                            # Shouldn't ever happen, but could with invalid
                            # ANSI.
                            continue
                        else:
                            fg_color = _ansi_code_to_color.get(value - 30)
                            bg_color = _ansi_code_to_color.get(value - 40)
                            if fg_color:
                                self.fg_color = fg_color
                            elif bg_color:
                                self.bg_color = bg_color
                            elif value == 1:
                                self.bold = True
                            elif value == 22:
                                self.bold = False
                            elif value == 39:
                                self.fg_color = None
                            elif value == 49:
                                self.bg_color = None
                            elif value == 0:
                                self.reset_state()

        yield match.start(), self.current_token, text

    tokens = {
        # states have to be native strings
        str('root'): [
            (r'\x1b\[([^\x1b]*)', process),
            (r'[^\x1b]+', pygments.token.Text),
        ],
    }