File: ruby_lexer.rex

package info (click to toggle)
ruby-ruby-parser 3.21.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,728 kB
  • sloc: ruby: 136,318; yacc: 6,245; makefile: 11
file content (185 lines) | stat: -rw-r--r-- 8,098 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# encoding: UTF-8
#
# lexical scanner definition for ruby

class RubyLexer

option

  lineno
  column

macro

  IDENT_CHAR    /[a-zA-Z0-9_[:^ascii:]]/

  ESC           /\\((?>[0-7]{1,3}|x\h{1,2}|M-[^\\]|(C-|c)[^\\]|u\h{1,4}|u\{\h+(?:\s+\h+)*\}|[^0-7xMCc]))/
  SIMPLE_STRING /((#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*)/o
  SSTRING       /((\\.|[^\'])*)/

  INT_DEC       /[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)(ri|r|i)?\b|0d[0-9_]+)(ri|r|i)?/i
  INT_HEX       /[+]?0x[a-f0-9_]+(ri|r|i)?/i
  INT_BIN       /[+]?0b[01_]+(ri|r|i)?/i
  INT_OCT       /[+]?0o?[0-7_]+(ri|r|i)?|0o(ri|r|i)?/i
  FLOAT         /[+]?\d[\d_]*\.[\d_]+(e[+-]?[\d_]+)?(?:(ri|r|i)\b)?|[+]?[\d_]+e[+-]?[\d_]+(?:(ri|r|i)\b)?/i
  INT_DEC2      /[+]?\d[0-9_]*(?![e])((ri|r|i)\b)?/i

  NUM_BAD       /[+]?0[xbd]\b/i
  INT_OCT_BAD   /[+]?0o?[0-7_]*[89]/i
  FLOAT_BAD     /[+]?\d[\d_]*_(e|\.)/i

start

  maybe_pop_stack
  return process_string_or_heredoc if lex_strterm

  self.cmd_state = self.command_start
  self.command_start = false
  self.space_seen    = false # TODO: rename token_seen?
  self.last_state    = lex_state

rule

# [:state]      pattern                 [actions]

                # \s - \n + \v
                /[\ \t\r\f\v]+/         { self.space_seen = true; next }

                /\n|\#/                 process_newline_or_comment

                /[\]\)\}]/              process_brace_close

: /\!/
| is_after_operator? /\!\@/             { result EXPR_ARG,   TOKENS[text], text }
|               /\![=~]?/               { result :arg_state, TOKENS[text], text }

: /\./
|               /\.\.\.?/               process_dots
|               /\.\d/                  { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
|               /\./                    { self.lex_state = EXPR_BEG; result EXPR_DOT, :tDOT, "." }

                /\(/                    process_paren

                /\,/                    { result EXPR_PAR, TOKENS[text], text }

: /=/
|               /\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/ { result arg_state, TOKENS[text], text }
| bol?          /\=begin(?=\s)/         process_begin
|               /\=(?=begin\b)/         { result arg_state, TOKENS[text], text }

ruby22_label?   /\"#{SIMPLE_STRING}\":/o process_label
                /\"(#{SIMPLE_STRING})\"/o process_simple_string
                /\"/                    { string STR_DQUOTE, '"'; result nil, :tSTRING_BEG, text }

                /\@\@?\d/               { rb_compile_error "`#{text}` is not allowed as a variable name" }
                /\@\@?#{IDENT_CHAR}+/o  process_ivar

: /:/
| not_end?      /:([a-zA-Z_]#{IDENT_CHAR}*(?:[?]|[!](?!=)|=(?==>)|=(?![=>]))?)/o process_symbol
| not_end?      /\:\"(#{SIMPLE_STRING})\"/o process_symbol
| not_end?      /\:\'(#{SSTRING})\'/o       process_symbol
|               /\:\:/                      process_colon2
|               /\:/                        process_colon1

                /->/                    { result EXPR_ENDFN, :tLAMBDA, text }

                /[+-]/                  process_plus_minus

: /[+\d]/
|               /#{NUM_BAD}/o           { rb_compile_error "Invalid numeric format"  }
|               /#{INT_DEC}/o           { int_with_base 10                           }
|               /#{INT_HEX}/o           { int_with_base 16                           }
|               /#{INT_BIN}/o           { int_with_base 2                            }
|               /#{INT_OCT_BAD}/o       { rb_compile_error "Illegal octal digit."    }
|               /#{INT_OCT}/o           { int_with_base 8                            }
|               /#{FLOAT_BAD}/o         { rb_compile_error "Trailing '_' in number." }
|               /#{FLOAT}/o             process_float
|               /#{INT_DEC2}/o          { int_with_base 10                           }
|               /[0-9]/                 { rb_compile_error "Bad number format" }

                /\[/                    process_square_bracket

was_label?        /\'#{SSTRING}\':?/o   process_label_or_string
                  /\'/                  { string STR_SQUOTE, "'"; result nil, :tSTRING_BEG, text }

: /\|/
|               /\|\|\=/                { result EXPR_BEG, :tOP_ASGN, "||" }
|               /\|\|/                  { result EXPR_BEG, :tOROP,    "||" }
|               /\|\=/                  { result EXPR_BEG, :tOP_ASGN, "|" }
|               /\|/                    { state = is_after_operator? ? EXPR_ARG : EXPR_PAR; result state, :tPIPE, "|" }

                /\{/                    process_brace_open

: /\*/
|               /\*\*=/                 { result EXPR_BEG, :tOP_ASGN, "**" }
|               /\*\*/                  { result :arg_state, space_vs_beginning(:tDSTAR, :tDSTAR, :tPOW), "**" }
|               /\*\=/                  { result EXPR_BEG, :tOP_ASGN, "*" }
|               /\*/                    { result :arg_state, space_vs_beginning(:tSTAR, :tSTAR, :tSTAR2), "*" }

# TODO: fix result+process_lchevron to set command_start = true
: /</
|               /\<\=\>/                { result :arg_state, :tCMP, "<=>"    }
|               /\<\=/                  { result :arg_state, :tLEQ, "<="     }
|               /\<\<\=/                { result EXPR_BEG,  :tOP_ASGN, "<<" }
|               /\<\</                  process_lchevron
|               /\</                    { result :arg_state, :tLT, "<"       }

: />/
|               /\>\=/                  { result :arg_state, :tGEQ, ">="     }
|               /\>\>=/                 { result EXPR_BEG,  :tOP_ASGN, ">>" }
|               /\>\>/                  { result :arg_state, :tRSHFT, ">>"   }
|               /\>/                    { result :arg_state, :tGT, ">"       }

: /\`/
| expr_fname?   /\`/                   { result EXPR_END, :tBACK_REF2, "`" }
| expr_dot?     /\`/                   { result((cmd_state ? EXPR_CMDARG : EXPR_ARG), :tBACK_REF2, "`") }
|               /\`/                   { string STR_XQUOTE, '`'; result nil, :tXSTRING_BEG, "`" }

                /\?/                    process_questionmark

: /&/
|               /\&\&\=/                { result EXPR_BEG, :tOP_ASGN, "&&" }
|               /\&\&/                  { result EXPR_BEG, :tANDOP,   "&&" }
|               /\&\=/                  { result EXPR_BEG, :tOP_ASGN, "&"  }
|               /\&\./                  { result EXPR_DOT, :tLONELY,  "&." }
|               /\&/                    process_amper

                /\//                    process_slash

: /\^/
|               /\^=/                   { result EXPR_BEG, :tOP_ASGN, "^" }
|               /\^/                    { result :arg_state, :tCARET, "^" }

                /\;/                    { self.command_start = true; result EXPR_BEG, :tSEMI, ";" }

: /~/
| is_after_operator? /\~@/              { result :arg_state, :tTILDE, "~" }
|               /\~/                    { result :arg_state, :tTILDE, "~" }

: /\\/
|               /\\\r?\n/               { self.lineno += 1; self.space_seen = true; next }
|               /\\/                    { rb_compile_error "bare backslash only allowed before newline" }

                /\%/                    process_percent

: /\$/
|               /\$_\w+/                         process_gvar
|               /\$_/                            process_gvar
|               /\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/ process_gvar
| in_fname?     /\$([\&\`\'\+])/                 process_gvar
|               /\$([\&\`\'\+])/                 process_backref
| in_fname?     /\$([1-9]\d*)/                   process_gvar
|               /\$([1-9]\d*)/                   process_nthref
|               /\$0/                            process_gvar
|               /\$#{IDENT_CHAR}+/               process_gvar
|               /\$\W/                           process_gvar_oddity

                /\_/                    process_underscore

                /#{IDENT_CHAR}+/o       process_token

                /\004|\032|\000|\Z/     { [RubyLexer::EOF, RubyLexer::EOF] }

                /./                     { rb_compile_error "Invalid char #{text.inspect} in expression" }

end