File: python.rb

package info (click to toggle)
ruby-rouge 4.7.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,844 kB
  • sloc: ruby: 38,489; sed: 2,071; perl: 152; makefile: 8
file content (295 lines) | stat: -rw-r--r-- 8,854 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
  module Lexers
    class Python < RegexLexer
      title "Python"
      desc "The Python programming language (python.org)"
      tag 'python'
      aliases 'py'
      filenames '*.py', '*.pyi', '*.pyw', '*.sc', 'SConstruct', 'SConscript',
                '*.tac', '*.bzl', 'BUCK', 'BUILD', 'BUILD.bazel', 'WORKSPACE'
      mimetypes 'text/x-python', 'application/x-python'

      def self.detect?(text)
        return true if text.shebang?(/pythonw?(?:[23](?:\.\d+)?)?/)
      end

      def self.keywords
        @keywords ||= %w(
          assert break continue del elif else except exec
          finally for global if lambda pass print raise
          return try while yield as with from import
          async await nonlocal
        )
      end

      def self.builtins
        @builtins ||= %w(
          __import__ abs aiter all anext any apply ascii
          basestring bin bool buffer breakpoint bytearray bytes
          callable chr classmethod cmp coerce compile complex
          delattr dict dir divmod enumerate eval exec execfile exit
          file filter float format frozenset getattr globals
          hasattr hash help hex
          id input int intern isinstance issubclass iter len list locals long
          map max memoryview min next object oct open ord pow print property
          range raw_input reduce reload repr reversed round set setattr slice
          sorted staticmethod str sum super tuple type unichr unicode vars
          xrange zip
        )
      end

      def self.builtins_pseudo
        @builtins_pseudo ||= %w(None Ellipsis NotImplemented False True)
      end

      def self.exceptions
        @exceptions ||= %w(
          ArithmeticError AssertionError AttributeError BaseException
          BaseExceptionGroup BlockingIOError BrokenPipeError BufferError
          BytesWarning ChildProcessError ConnectionAbortedError ConnectionError
          ConnectionRefusedError ConnectionResetError DeprecationWarning
          EOFError EnvironmentError EncodingWarning Exception ExceptionGroup
          FileExistsError FileNotFoundError FloatingPointError FutureWarning
          GeneratorExit IOError ImportError ImportWarning IndentationError
          IndexError InterruptedError IsADirectoryError
          KeyError KeyboardInterrupt LookupError
          MemoryError ModuleNotFoundError
          NameError NotADirectoryError NotImplemented NotImplementedError
          OSError OverflowError OverflowWarning PendingDeprecationWarning
          PermissionError ProcessLookupError PythonFinalizationError
          RecursionError ReferenceError ResourceWarning RuntimeError RuntimeWarning
          StandardError StopAsyncIteration StopIteration SyntaxError SyntaxWarning
          SystemError SystemExit TabError TimeoutError TypeError
          UnboundLocalError UnicodeDecodeError UnicodeEncodeError UnicodeError
          UnicodeTranslateError UnicodeWarning UserWarning ValueError VMSError
          Warning WindowsError
          ZeroDivisionError
        )
      end

      identifier =        /[[:alpha:]_][[:alnum:]_]*/
      dotted_identifier = /[[:alpha:]_.][[:alnum:]_.]*/

      def current_string
        @string_register ||= StringRegister.new
      end

      state :root do
        rule %r/\n+/m, Text
        rule %r/^(:)(\s*)([ru]{,2}""".*?""")/mi do
          groups Punctuation, Text, Str::Doc
        end

        rule %r/\.\.\.\B$/, Name::Builtin::Pseudo

        rule %r/[^\S\n]+/, Text
        rule %r(#(.*)?\n?), Comment::Single
        rule %r/[\[\]{}:(),;.]/, Punctuation
        rule %r/\\\n/, Text
        rule %r/\\/, Text

        rule %r/@#{dotted_identifier}/i, Name::Decorator

        rule %r/(in|is|and|or|not)\b/, Operator::Word
        rule %r/(<<|>>|\/\/|\*\*)=?/, Operator
        rule %r/[-~+\/*%=<>&^|@]=?|!=/, Operator

        rule %r/(from)((?:\\\s|\s)+)(#{dotted_identifier})((?:\\\s|\s)+)(import)/ do
          groups Keyword::Namespace,
                 Text,
                 Name,
                 Text,
                 Keyword::Namespace
        end

        rule %r/(import)(\s+)(#{dotted_identifier})/ do
          groups Keyword::Namespace, Text, Name
        end

        rule %r/(def)((?:\s|\\\s)+)/ do
          groups Keyword, Text
          push :funcname
        end

        rule %r/(class)((?:\s|\\\s)+)/ do
          groups Keyword, Text
          push :classname
        end

        rule %r/([a-z_]\w*)[ \t]*(?=(\(.*\)))/m, Name::Function
        rule %r/([A-Z_]\w*)[ \t]*(?=(\(.*\)))/m, Name::Class

        # TODO: not in python 3
        rule %r/`.*?`/, Str::Backtick
        rule %r/([rtfbu]{0,2})('''|"""|['"])/i do |m|
          groups Str::Affix, Str::Heredoc
          current_string.register type: m[1].downcase, delim: m[2]
          push :generic_string
        end

        mixin :soft_keywords

        # using negative lookbehind so we don't match property names
        rule %r/(?<!\.)#{identifier}/ do |m|
          if self.class.keywords.include? m[0]
            token Keyword
          elsif self.class.exceptions.include? m[0]
            token Name::Builtin
          elsif self.class.builtins.include? m[0]
            token Name::Builtin
          elsif self.class.builtins_pseudo.include? m[0]
            token Name::Builtin::Pseudo
          else
            token Name
          end
        end

        rule identifier, Name

        digits = /[0-9](_?[0-9])*/
        decimal = /((#{digits})?\.#{digits}|#{digits}\.)/
        exponent = /e[+-]?#{digits}/i
        rule %r/#{decimal}(#{exponent})?j?/i, Num::Float
        rule %r/#{digits}#{exponent}j?/i, Num::Float
        rule %r/#{digits}j/i, Num::Float

        rule %r/0b(_?[0-1])+/i, Num::Bin
        rule %r/0o(_?[0-7])+/i, Num::Oct
        rule %r/0x(_?[a-f0-9])+/i, Num::Hex
        rule %r/\d+L/, Num::Integer::Long
        rule %r/([1-9](_?[0-9])*|0(_?0)*)/, Num::Integer
      end

      state :funcname do
        rule identifier, Name::Function, :pop!
      end

      state :classname do
        rule identifier, Name::Class, :pop!
      end

      state :soft_keywords do
        rule %r/
          (^[ \t]*)
          (match|case)\b
          (?![ \t]*
            (?:[:,;=^&|@~)\]}] |
              (?:#{Python.keywords.join('|')})\b))
        /x do |m|
          token Text::Whitespace, m[1]
          token Keyword, m[2]
          push :soft_keywords_inner
        end
      end

      state :soft_keywords_inner do
        rule %r((\s+)([^\n_]*)(_\b)) do |m|
          groups Text::Whitespace, Text, Keyword
        end

        rule(//) { pop! }
      end

      state :raise do
        rule %r/from\b/, Keyword
        rule %r/raise\b/, Keyword
        rule %r/yield\b/, Keyword
        rule %r/\n/, Text, :pop!
        rule %r/;/, Punctuation, :pop!
        mixin :root
      end

      state :yield do
        mixin :raise
      end

      state :generic_string do
        rule %r/^\s*(>>>|\.\.\.)\B/, Generic::Prompt, :doctest
        rule %r/[^'"\\{]+?/, Str
        rule %r/{{/, Str

        rule %r/'''|"""|['"]/ do |m|
          token Str::Heredoc
          if current_string.delim? m[0]
            current_string.remove
            pop!
          end
        end

        rule %r/(?=\\)/, Str, :generic_escape

        rule %r/{/ do |m|
          if current_string.type? "f"
            token Str::Interpol
            push :generic_interpol
          else
            token Str
          end
        end
      end

      state :generic_escape do
        rule %r(\\
          ( [\\abfnrtv"']
          | \n
          | newline
          | N{[a-zA-Z][a-zA-Z ]+[a-zA-Z]}
          | u[a-fA-F0-9]{4}
          | U[a-fA-F0-9]{8}
          | x[a-fA-F0-9]{2}
          | [0-7]{1,3}
          )
        )x do
          current_string.type?("r") ? token(Str) : token(Str::Escape)
          pop!
        end

        rule %r/\\./, Str, :pop!
      end

      state :doctest do
        rule %r/\n\n/, Text, :pop!

        rule %r/'''|"""/ do
          token Str::Heredoc
          pop!(2) if in_state?(:generic_string) # pop :doctest and :generic_string
        end

        mixin :root
      end

      state :generic_interpol do
        rule %r/[^{}!:]+/ do |m|
          recurse m[0]
        end
        rule %r/![asr]/, Str::Interpol
        rule %r/:/, Str::Interpol
        rule %r/{/, Str::Interpol, :generic_interpol
        rule %r/}/, Str::Interpol, :pop!
      end

      class StringRegister < Array
        def delim?(delim)
          self.last[1] == delim
        end

        def register(type: "u", delim: "'")
          self.push [type, delim]
        end

        def remove
          self.pop
        end

        def type?(type)
          self.last[0].include? type
        end
      end

      private_constant :StringRegister
    end
  end
end