File: rust.rb

package info (click to toggle)
ruby-rouge 4.7.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,844 kB
  • sloc: ruby: 38,489; sed: 2,071; perl: 152; makefile: 8
file content (260 lines) | stat: -rw-r--r-- 8,613 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
  module Lexers
    class Rust < RegexLexer
      title "Rust"
      desc 'The Rust programming language (rust-lang.org)'
      tag 'rust'
      aliases 'rs',
        # So that directives from https://github.com/budziq/rust-skeptic
        # do not prevent highlighting.
        'rust,no_run', 'rs,no_run',
        'rust,ignore', 'rs,ignore',
        'rust,should_panic', 'rs,should_panic'
      filenames '*.rs'
      mimetypes 'text/x-rust'

      def self.detect?(text)
        return true if text.shebang? 'rustc'
      end

      def self.keywords
        @keywords ||= %w(
          as async await break const continue crate dyn else enum extern false
          fn for if impl in let log loop match mod move mut pub ref return self
          Self static struct super trait true type unsafe use where while
          abstract become box do final macro
          override priv typeof unsized virtual
          yield try
          union
        )
      end

      def self.builtins
        @builtins ||= Set.new %w(
          Add BitAnd BitOr BitXor bool c_char c_double c_float char
          c_int clock_t c_long c_longlong Copy c_schar c_short
          c_uchar c_uint c_ulong c_ulonglong c_ushort c_void dev_t DIR
          dirent Div Eq Err f32 f64 FILE float fpos_t
          i16 i32 i64 i8 isize Index ino_t int intptr_t mode_t Mul
          Neg None off_t Ok Option Ord Owned pid_t ptrdiff_t
          Send Shl Shr size_t Some ssize_t str Sub time_t
          u16 u32 u64 u8 usize uint uintptr_t
          Box Vec String Rc Arc
          u128 i128 Result Sync Pin Unpin Sized Drop drop Fn FnMut FnOnce
          Clone PartialEq PartialOrd AsMut AsRef From Into Default
          DoubleEndedIterator ExactSizeIterator Extend IntoIterator Iterator
          FromIterator ToOwned ToString TryFrom TryInto
        )
      end

      def macro_closed?
        @macro_delims.values.all?(&:zero?)
      end

      start {
        @macro_delims = { ']' => 0, ')' => 0, '}' => 0 }
        push :bol
      }

      delim_map = { '[' => ']', '(' => ')', '{' => '}' }

      id = /[\p{XID_Start}_]\p{XID_Continue}*/
      hex = /[0-9a-f]/i
      escapes = %r(
        \\ ([nrt'"\\0] | x#{hex}{2} | u\{(#{hex}_*){1,6}\})
      )x
      size = /8|16|32|64|128|size/

      # Although not officially part of Rust, the rustdoc tool allows code in
      # comments to begin with `#`. Code like this will be evaluated but not
      # included in the HTML output produced by rustdoc. So that code intended
      # for these comments can be higlighted with Rouge, the  Rust lexer needs
      # to check if the beginning of the line begins with a `# `.
      state :bol do
        mixin :whitespace
        rule %r/#\s[^\n]*/, Comment::Special
        rule(//) { pop! }
      end

      state :attribute do
        mixin :whitespace
        mixin :has_literals
        rule %r/[(,)=:]/, Name::Decorator
        rule %r/\]/, Name::Decorator, :pop!
        rule id, Name::Decorator
      end

      state :whitespace do
        rule %r/\s+/, Text
        mixin :comments
      end

      state :comments do
        # Only 3 slashes are doc comments, `////` and beyond become normal
        # comments again (for some reason), so match this before the
        # doc line comments rather than figure out a
        rule %r(////+[^\n]*), Comment::Single
        # doc line comments — either inner (`//!`), or outer (`///`).
        rule %r(//[/!][^\n]*), Comment::Doc
        # otherwise, `//` is just a plain line comme
        rule %r(//[^\n]*), Comment::Single
        # /**/ and /***/ are self-closing block comments, not doc. Because this
        # is self-closing, it doesn't enter the states for nested comments
        rule %r(/\*\*\*?/), Comment::Multiline
        # 3+ stars and it's a normal non-doc block comment.
        rule %r(/\*\*\*+), Comment::Multiline, :nested_plain_block
        # `/*!` and `/**` begin doc comments. These nest and can have internal
        # block/doc comments, but they're still part of the documentation
        # inside.
        rule %r(/[*][*!]), Comment::Doc, :nested_doc_block
        # any other /* is a plain multiline comment
        rule %r(/[*]), Comment::Multiline, :nested_plain_block
      end

      # Multiline/block comments fully nest. This is true for ones that are
      # marked as documentation too. The behavior here is:
      #
      # - Anything inside a block doc comment is still included in the
      #   documentation, even if it's a nested non-doc block comment. For
      #   example: `/** /* still docs */ */`
      # - Anything inside of a block non-doc comment is still just a normal
      #   comment, even if it's a nested block documentation comment. For
      #   example: `/* /** not docs */ */`
      #
      # This basically means: if (on the outermost level) the comment starts as
      # one kind of block comment (either doc/non-doc), then everything inside
      # of it, including nested block comments of the opposite type, needs to
      # stay that type.
      #
      # Also note that single line comments do nothing anywhere inside of block
      # comments, thankfully.
      #
      # We just define this as two states, because this seems easier than
      # tracking it with instance vars.
      [
        [:nested_plain_block, Comment::Multiline],
        [:nested_doc_block, Comment::Doc]
      ].each do |state_name, comment_token|
        state state_name do
          rule %r(\*/), comment_token, :pop!
          rule %r(/\*), comment_token, state_name
          # We only want to eat at most one `[*/]` at a time,
          # but we can skip past non-`[*/]` in bulk.
          rule %r([^*/]+|[*/]), comment_token
        end
      end

      state :root do
        rule %r/\n/, Text, :bol
        mixin :whitespace
        rule %r/#!?\[/, Name::Decorator, :attribute
        rule %r/\b(?:#{Rust.keywords.join('|')})\b/, Keyword
        mixin :has_literals

        rule %r([=-]>), Keyword
        rule %r(<->), Keyword
        rule %r/[()\[\]{}|,:;]/, Punctuation
        rule %r/[*\/!@~&+%^<>=\?-]|\.{2,3}/, Operator

        rule %r/([.]\s*)?#{id}(?=\s*[(])/m, Name::Function
        rule %r/[.]\s*await\b/, Keyword
        rule %r/[.]\s*#{id}/, Name::Property
        rule %r/[.]\s*\d+/, Name::Attribute
        rule %r/(#{id})(::)/m do
          groups Name::Namespace, Punctuation
        end

        # macros
        rule %r/\bmacro_rules!/, Name::Decorator, :macro_rules
        rule %r/#{id}!/, Name::Decorator, :macro

        rule %r/'static\b/, Keyword
        rule %r/'#{id}/, Name::Variable
        rule %r/#{id}/ do |m|
          name = m[0]
          if self.class.builtins.include? name
            token Name::Builtin
          else
            token Name
          end
        end
      end

      state :macro do
        mixin :has_literals

        rule %r/[\[{(]/ do |m|
          @macro_delims[delim_map[m[0]]] += 1
          puts "    macro_delims: #{@macro_delims.inspect}" if @debug
          token Punctuation
        end

        rule %r/[\]})]/ do |m|
          @macro_delims[m[0]] -= 1
          puts "    macro_delims: #{@macro_delims.inspect}" if @debug
          pop! if macro_closed?
          token Punctuation
        end

        # same as the rule in root, but don't push another macro state
        rule %r/#{id}!/, Name::Decorator
        mixin :root

        # No syntax errors in macros
        rule %r/./, Text
      end

      state :macro_rules do
        rule %r/[$]#{id}(:#{id})?/, Name::Variable
        rule %r/[$]/, Name::Variable

        mixin :macro
      end

      state :has_literals do
        # constants
        rule %r/\b(?:true|false)\b/, Keyword::Constant

        # characters/bytes
        rule %r(
          b?' (?: #{escapes} | [^\\] ) '
        )x, Str::Char

        rule %r/b?"/, Str, :string
        rule %r/b?r(#*)".*?"\1/m, Str

        # numbers
        dot = /[.][0-9][0-9_]*/
        exp = /[eE][-+]?[0-9_]+/
        flt = /f32|f64/

        rule %r(
          [0-9][0-9_]*
          (#{dot}  #{exp}? #{flt}?
          |#{dot}? #{exp}  #{flt}?
          |#{dot}? #{exp}? #{flt}
          |[.](?![._\p{XID_Start}])
          )
        )x, Num::Float

        rule %r(
          ( 0b[10_]+
          | 0x[0-9a-fA-F_]+
          | 0o[0-7_]+
          | [0-9][0-9_]*
          ) (u#{size}?|i#{size})?
        )x, Num::Integer

      end

      state :string do
        rule %r/"/, Str, :pop!
        rule escapes, Str::Escape
        rule %r/[^"\\]+/m, Str
      end
    end
  end
end