File: lexer.l

package info (click to toggle)
rumur 2020.12.20-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,292 kB
  • sloc: cpp: 17,090; ansic: 2,537; objc: 1,542; python: 1,120; sh: 538; yacc: 536; lex: 229; lisp: 15; makefile: 5
file content (229 lines) | stat: -rw-r--r-- 8,089 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
%{

#include <cstddef>
#include "parser.yy.hh"
#include <rumur/except.h>
#include <rumur/scanner.h>

/* Override the declaration of yylex with the one from our derived scanner. */
#ifdef YY_DECL
  #undef YY_DECL
#endif
#define YY_DECL int rumur::scanner::yylex(\
  rumur::parser::semantic_type *const lval, rumur::parser::location_type *loc)

/* Each time yylex() is called, update the begin position with the last end
 * position. This ensures we get correct starting column numbers.
 */
#define YY_USER_ACTION  loc->step(); loc->columns(yyleng);

%}

  /* Track line numbers while scanning. These are retrievable via
   * FlexLexer::lineno().
   */
%option yylineno

  /* When the end of the input is encountered, do not expect another file. */
%option noyywrap

  /* Generate a C++ scanner, not C. */
%option c++

  /* Use our own scanner class, rather than the default yyFlexLexer. */
%option yyclass="rumur::scanner"

  /* Switch to case-insensitive. */
%option caseless

  /* States for handling comments. This is not the most efficient way of
   * dealing with these, but meh.
   */
%x SC_COMMENT SC_MULTILINE_COMMENT

%%

(0x[[:xdigit:]]+|[[:digit:]]+) {
  lval->build<std::string>(YYText());
  return rumur::parser::token::NUMBER;
}

([[:digit:]]?\.[[:digit:]]+) {
  throw rumur::Error("real types are not supported", *loc);
}

alias         { return rumur::parser::token::ALIAS;         }
array         { return rumur::parser::token::ARRAY;         }
assert        { return rumur::parser::token::ASSERT;        }
assume        { return rumur::parser::token::ASSUME;        }
begin         { return rumur::parser::token::BEGIN_TOK;     }
boolean       { return rumur::parser::token::BOOLEAN;       }
by            { return rumur::parser::token::BY;            }
case          { return rumur::parser::token::CASE;          }
clear         { return rumur::parser::token::CLEAR;         }
const         { return rumur::parser::token::CONST;         }
cover         { return rumur::parser::token::COVER;         }
do            { return rumur::parser::token::DO;            }
else          { return rumur::parser::token::ELSE;          }
elsif         { return rumur::parser::token::ELSIF;         }
end           { return rumur::parser::token::END;           }
endalias      { return rumur::parser::token::ENDALIAS;      }
endexists     { return rumur::parser::token::ENDEXISTS;     }
endfor        { return rumur::parser::token::ENDFOR;        }
endforall     { return rumur::parser::token::ENDFORALL;     }
endfunction   { return rumur::parser::token::ENDFUNCTION;   }
endif         { return rumur::parser::token::ENDIF;         }
endprocedure  { return rumur::parser::token::ENDPROCEDURE;  }
endrecord     { return rumur::parser::token::ENDRECORD;     }
endrule       { return rumur::parser::token::ENDRULE;       }
endruleset    { return rumur::parser::token::ENDRULESET;    }
endstartstate { return rumur::parser::token::ENDSTARTSTATE; }
endswitch     { return rumur::parser::token::ENDSWITCH;     }
endwhile      { return rumur::parser::token::ENDWHILE;      }
enum          { return rumur::parser::token::ENUM;          }
error         { return rumur::parser::token::ERROR;         }
exists        { return rumur::parser::token::EXISTS;        }
for           { return rumur::parser::token::FOR;           }
forall        { return rumur::parser::token::FORALL;        }
function      { return rumur::parser::token::FUNCTION;      }
if            { return rumur::parser::token::IF;            }
invariant     { return rumur::parser::token::INVARIANT;     }
isundefined   { return rumur::parser::token::ISUNDEFINED;   }
liveness      { return rumur::parser::token::LIVENESS;      }
of            { return rumur::parser::token::OF;            }
procedure     { return rumur::parser::token::PROCEDURE;     }
put           { return rumur::parser::token::PUT;           }
real          { throw rumur::Error("real types are not supported", *loc); }
record        { return rumur::parser::token::RECORD;        }
return        { return rumur::parser::token::RETURN;        }
rule          { return rumur::parser::token::RULE;          }
ruleset       { return rumur::parser::token::RULESET;       }
scalarset     { return rumur::parser::token::SCALARSET;     }
startstate    { return rumur::parser::token::STARTSTATE;    }
switch        { return rumur::parser::token::SWITCH;        }
then          { return rumur::parser::token::THEN;          }
to            { return rumur::parser::token::TO;            }
type          { return rumur::parser::token::TYPE;          }
undefine      { return rumur::parser::token::UNDEFINE;      }
union         { throw rumur::Error("union types are not supported", *loc); }
var           { return rumur::parser::token::VAR;           }
while         { return rumur::parser::token::WHILE;         }

"∀"           { return rumur::parser::token::FORALL;        }
"∃"           { return rumur::parser::token::EXISTS;        }

  /* Recognise true and false explicitly rather than as generic IDs (below). The
   * purpose of this is so that we match them case-insensitively.
   */
false {
  lval->build<std::string>("false");
  return rumur::parser::token::ID;
}
true {
  lval->build<std::string>("true");
  return rumur::parser::token::ID;
}

[[:alpha:]][_[:alnum:]]* {
  lval->build<std::string>(YYText());
  return rumur::parser::token::ID;
}

"--" {
  BEGIN(SC_COMMENT);
}

"/*" {
  BEGIN(SC_MULTILINE_COMMENT);
}

":=" { return rumur::parser::token::COLON_EQ;  }
"≔"  { return rumur::parser::token::COLON_EQ;  }
".." { return rumur::parser::token::DOTDOT;    }
">=" { return rumur::parser::token::GEQ;       }
"≥"  { return rumur::parser::token::GEQ;       }
">>" { return rumur::parser::token::RSH;       }
"->" { return rumur::parser::token::IMPLIES;   }
"→"  { return rumur::parser::token::IMPLIES;   }
"<=" { return rumur::parser::token::LEQ;       }
"≤"  { return rumur::parser::token::LEQ;       }
"<<" { return rumur::parser::token::LSH;       }
"!=" { return rumur::parser::token::NEQ;       }
"≠"  { return rumur::parser::token::NEQ;       }
"==>" { return rumur::parser::token::ARROW;    }
"⇒"  { return rumur::parser::token::ARROW;    }
"==" { return rumur::parser::token::DEQ;       }
"¬"  { return '!';                             }
"∧"  { return rumur::parser::token::LAND;      }
"∨"  { return rumur::parser::token::LOR;       }
"&&" { return rumur::parser::token::AMPAMP;    }
"||" { return rumur::parser::token::PIPEPIPE;  }

[\^~&!|:=><\-\%\+;{},\[\]\.()/\*\?] {
  return YYText()[0];
}

(\"|“)(\\.|[^\\\"]|\\”)*(\"|”) {
  std::string s(YYText());

  /* figure out which quote character this string starts with */
  const std::string opener = s.find("“") == 0 ? "“" : "\"";
  assert(s.find(opener) == 0 && "logic bug in string lexing rule");

  /* figure out which quote character this string ends with */
  static const std::string squote = "”";
  size_t offset = s.size() - squote.size();
  const std::string closer
    = s.size() > squote.size() && s.find(squote, offset) == offset
    ? squote
    : "\"";

  /* build a string, stripping the quote delimiters */
  lval->build<std::string>(s.substr(opener.size(),
    s.size() - opener.size() - closer.size()).c_str());

  return rumur::parser::token::STRING;
}

    /* Ensure we keep a correct line count for error reporting. */
"\n" {
  loc->lines(yyleng);
  loc->step(); /* maintain correct column count */
}

  /* Ignore white space. */
[[:space:]] {
  loc->step(); /* maintain correct column count */
}

. {
  throw rumur::Error(std::string("unexpected character: '") + YYText() + "'", *loc);
}

  /* Comment handling. */
<SC_COMMENT>. {
  /* do nothing */
}

<SC_COMMENT>\n {
  loc->lines(yyleng);
  loc->step(); /* maintain correct column count */
  BEGIN(INITIAL);
}

<SC_MULTILINE_COMMENT>[^*\n]* {
  /* do nothing */
}

<SC_MULTILINE_COMMENT>"*"+[^*/\n]* {
  /* do nothing */
}

<SC_MULTILINE_COMMENT>\n {
  loc->lines(yyleng);
}

<SC_MULTILINE_COMMENT>"*"+"/" {
  loc->step(); /* maintain correct column count */
  BEGIN(INITIAL);
}