File: pygments_sh.py

package info (click to toggle)
turing 0.11-4
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 11,560 kB
  • sloc: python: 106,582; xml: 101; makefile: 53; sh: 29
file content (383 lines) | stat: -rw-r--r-- 14,333 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
# -*- coding: utf-8 -*-
"""
This module contains Syntax Highlighting mode and the QSyntaxHighlighter based
on pygments.

.. note: This code is taken and adapted from the IPython project.
"""
import logging
import mimetypes
import sys

from pygments.formatters.html import HtmlFormatter
from pygments.lexer import Error, RegexLexer, Text, _TokenType
from pygments.lexers import get_lexer_for_filename, get_lexer_for_mimetype
from pygments.lexers.agile import PythonLexer
from pygments.lexers.compiled import CLexer, CppLexer
from pygments.lexers.dotnet import CSharpLexer
from pygments.lexers.special import TextLexer
from pygments.styles import get_style_by_name, get_all_styles
from pygments.token import Whitespace, Comment, Token
from pygments.util import ClassNotFound
from pyqode.qt import QtGui
from pyqode.qt.QtCore import QRegExp

from pyqode.core.api.syntax_highlighter import (
    SyntaxHighlighter, ColorScheme, TextBlockUserData)


def _logger():
    """ Returns the module's logger """
    return logging.getLogger(__name__)


#: A sorted list of available pygments styles, for convenience
PYGMENTS_STYLES = sorted(list(get_all_styles()))

if hasattr(sys, 'frozen'):
    PYGMENTS_STYLES += ['darcula', 'qt']


def get_tokens_unprocessed(self, text, stack=('root',)):
    """ Split ``text`` into (tokentype, text) pairs.

        Monkeypatched to store the final stack on the object itself.
    """
    pos = 0
    tokendefs = self._tokens
    if hasattr(self, '_saved_state_stack'):
        statestack = list(self._saved_state_stack)
    else:
        statestack = list(stack)
    statetokens = tokendefs[statestack[-1]]
    while 1:
        for rexmatch, action, new_state in statetokens:
            m = rexmatch(text, pos)
            if m:
                if action is not None:
                    if type(action) is _TokenType:
                        yield pos, action, m.group()
                    else:
                        for item in action(self, m):
                            yield item
                pos = m.end()
                if new_state is not None:
                    # state transition
                    if isinstance(new_state, tuple):
                        for state in new_state:
                            if state == '#pop':
                                statestack.pop()
                            elif state == '#push':
                                statestack.append(statestack[-1])
                            else:
                                statestack.append(state)
                    elif isinstance(new_state, int):
                        # pop
                        del statestack[new_state:]
                    elif new_state == '#push':
                        statestack.append(statestack[-1])
                    else:
                        assert False, "wrong state def: %r" % new_state
                    statetokens = tokendefs[statestack[-1]]
                break
        else:
            try:
                if text[pos] == '\n':
                    # at EOL, reset state to "root"
                    pos += 1
                    statestack = ['root']
                    statetokens = tokendefs['root']
                    yield pos, Text, '\n'
                    continue
                yield pos, Error, text[pos]
                pos += 1
            except IndexError:
                break
    self._saved_state_stack = list(statestack)

# Monkeypatch!
RegexLexer.get_tokens_unprocessed = get_tokens_unprocessed


# Even with the above monkey patch to store state, multiline comments do not
# work since they are stateless (Pygments uses a single multiline regex for
# these comments, but Qt lexes by line). So we need to append a state for
# comments # to the C and C++ lexers. This means that nested multiline
# comments will appear to be valid C/C++, but this is better than the
# alternative for now.
def replace_pattern(tokens, new_pattern):
    """ Given a RegexLexer token dictionary 'tokens', replace all patterns that
        match the token specified in 'new_pattern' with 'new_pattern'.
    """
    for state in tokens.values():
        for index, pattern in enumerate(state):
            if isinstance(pattern, tuple) and pattern[1] == new_pattern[1]:
                state[index] = new_pattern

# More monkeypatching!
COMMENT_START = (r'/\*', Comment.Multiline, 'comment')
COMMENT_STATE = [(r'[^*/]', Comment.Multiline),
                 (r'/\*', Comment.Multiline, '#push'),
                 (r'\*/', Comment.Multiline, '#pop'),
                 (r'[*/]', Comment.Multiline)]
replace_pattern(CLexer.tokens, COMMENT_START)
replace_pattern(CppLexer.tokens, COMMENT_START)
CLexer.tokens['comment'] = COMMENT_STATE
CppLexer.tokens['comment'] = COMMENT_STATE
CSharpLexer.tokens['comment'] = COMMENT_STATE


class PygmentsSH(SyntaxHighlighter):
    """ Highlights code using the pygments parser.

    This mode enable syntax highlighting using the pygments library. This is a
    generic syntax highlighter, it is slower than a native highlighter and
    does not do any code folding detection. Use it as a fallback for languages
    that do not have a native highlighter available. Check the other pyqode
    namespace packages to see what other languages are available (at the time
    of writing, only python has specialised support).

    .. warning:: There are some issues with multi-line comments, they are not
                 properly highlighted until a full re-highlight is triggered.
                 The text is automatically re-highlighted on save.
    """
    #: Mode description
    DESCRIPTION = "Apply syntax highlighting to the editor using pygments"

    @property
    def pygments_style(self):
        """
        Gets/Sets the pygments style
        """
        return self.color_scheme.name

    @pygments_style.setter
    def pygments_style(self, value):
        self._pygments_style = value
        self._update_style()
        # triggers a rehighlight
        self.color_scheme = ColorScheme(value)

    def __init__(self, document, lexer=None, color_scheme=None):
        super(PygmentsSH, self).__init__(document, color_scheme=color_scheme)
        self._pygments_style = self.color_scheme.name
        self._style = None
        self._formatter = HtmlFormatter(nowrap=True)
        self._lexer = lexer if lexer else PythonLexer()

        self._brushes = {}
        self._formats = {}
        self._init_style()
        self._prev_block = None

    def _init_style(self):
        """ Init pygments style """
        self._update_style()

    def on_install(self, editor):
        """
        :type editor: pyqode.code.api.CodeEdit
        """
        self._clear_caches()
        self._update_style()
        super(PygmentsSH, self).on_install(editor)

    def set_mime_type(self, mime_type):
        """
        Update the highlighter lexer based on a mime type.

        :param mime_type: mime type of the new lexer to setup.
        """
        try:
            self.set_lexer_from_mime_type(mime_type)
        except ClassNotFound:
            _logger().exception('failed to get lexer from mimetype')
            self._lexer = TextLexer()
            return False
        except ImportError:
            # import error while loading some pygments plugins, the editor
            # should not crash
            _logger().warning('failed to get lexer from mimetype (%s)' %
                              mime_type)
            self._lexer = TextLexer()
            return False
        else:
            return True

    def set_lexer_from_filename(self, filename):
        """
        Change the lexer based on the filename (actually only the extension is
        needed)

        :param filename: Filename or extension
        """
        self._lexer = None
        if filename.endswith("~"):
            filename = filename[0:len(filename) - 1]
        try:
            self._lexer = get_lexer_for_filename(filename)
        except (ClassNotFound, ImportError):
            print('class not found for url', filename)
            try:
                m = mimetypes.guess_type(filename)
                print(m)
                self._lexer = get_lexer_for_mimetype(m[0])
            except (ClassNotFound, IndexError, ImportError):
                self._lexer = get_lexer_for_mimetype('text/plain')
        if self._lexer is None:
            _logger().warning('failed to get lexer from filename: %s, using '
                              'plain text instead...', filename)
            self._lexer = TextLexer()

    def set_lexer_from_mime_type(self, mime, **options):
        """
        Sets the pygments lexer from mime type.

        :param mime: mime type
        :param options: optional addtional options.
        """
        self._lexer = get_lexer_for_mimetype(mime, **options)
        _logger().debug('lexer for mimetype (%s): %r', mime, self._lexer)

    def highlight_block(self, text, block):
        """
        Highlights the block using a pygments lexer.

        :param text: text of the block to highlith
        :param block: block to highlight
        """
        if self.color_scheme.name != self._pygments_style:
            self._pygments_style = self.color_scheme.name
            self._update_style()
        original_text = text
        if self.editor and self._lexer and self.enabled:
            if block.blockNumber():
                prev_data = self._prev_block.userData()
                if prev_data:
                    if hasattr(prev_data, "syntax_stack"):
                        self._lexer._saved_state_stack = prev_data.syntax_stack
                    elif hasattr(self._lexer, '_saved_state_stack'):
                        del self._lexer._saved_state_stack

            # Lex the text using Pygments
            index = 0
            usd = block.userData()
            if usd is None:
                usd = TextBlockUserData()
                block.setUserData(usd)
            tokens = list(self._lexer.get_tokens(text))
            for token, text in tokens:
                length = len(text)
                fmt = self._get_format(token)
                if token in [Token.Literal.String, Token.Literal.String.Doc,
                             Token.Comment]:
                    fmt.setObjectType(fmt.UserObject)
                self.setFormat(index, length, fmt)
                index += length

            if hasattr(self._lexer, '_saved_state_stack'):
                setattr(usd, "syntax_stack", self._lexer._saved_state_stack)
                # Clean up for the next go-round.
                del self._lexer._saved_state_stack

            # spaces
            text = original_text
            expression = QRegExp(r'\s+')
            index = expression.indexIn(text, 0)
            while index >= 0:
                index = expression.pos(0)
                length = len(expression.cap(0))
                self.setFormat(index, length, self._get_format(Whitespace))
                index = expression.indexIn(text, index + length)

            self._prev_block = block

    def _update_style(self):
        """ Sets the style to the specified Pygments style.
        """
        try:
            self._style = get_style_by_name(self._pygments_style)
        except ClassNotFound:
            # unknown style, also happen with plugins style when used from a
            # frozen app.
            if self._pygments_style == 'qt':
                from pyqode.core.styles import QtStyle
                self._style = QtStyle
            elif self._pygments_style == 'darcula':
                from pyqode.core.styles import DarculaStyle
                self._style = DarculaStyle
            else:
                self._style = get_style_by_name('default')
                self._pygments_style = 'default'
        self._clear_caches()

    def _clear_caches(self):
        """ Clear caches for brushes and formats.
        """
        self._brushes.clear()
        self._formats.clear()

    def _get_format(self, token):
        """ Returns a QTextCharFormat for token or None.
        """
        if token == Whitespace:
            return self.editor.whitespaces_foreground

        if token in self._formats:
            return self._formats[token]

        result = self._get_format_from_style(token, self._style)

        self._formats[token] = result
        return result

    def _get_format_from_style(self, token, style):
        """ Returns a QTextCharFormat for token by reading a Pygments style.
        """
        result = QtGui.QTextCharFormat()
        try:
            style = style.style_for_token(token)
        except KeyError:
            # fallback to plain text
            style = style.style_for_token(Text)
        for key, value in list(style.items()):
            if value:
                if key == 'color':
                    result.setForeground(self._get_brush(value))
                elif key == 'bgcolor':
                    result.setBackground(self._get_brush(value))
                elif key == 'bold':
                    result.setFontWeight(QtGui.QFont.Bold)
                elif key == 'italic':
                    result.setFontItalic(True)
                elif key == 'underline':
                    result.setUnderlineStyle(
                        QtGui.QTextCharFormat.SingleUnderline)
                elif key == 'sans':
                    result.setFontStyleHint(QtGui.QFont.SansSerif)
                elif key == 'roman':
                    result.setFontStyleHint(QtGui.QFont.Times)
                elif key == 'mono':
                    result.setFontStyleHint(QtGui.QFont.TypeWriter)
        return result

    def _get_brush(self, color):
        """ Returns a brush for the color.
        """
        result = self._brushes.get(color)
        if result is None:
            qcolor = self._get_color(color)
            result = QtGui.QBrush(qcolor)
            self._brushes[color] = result
        return result

    @staticmethod
    def _get_color(color):
        """ Returns a QColor built from a Pygments color string.
        """
        color = str(color).replace("#", "")
        qcolor = QtGui.QColor()
        qcolor.setRgb(int(color[:2], base=16),
                      int(color[2:4], base=16),
                      int(color[4:6], base=16))
        return qcolor