File: lex.h

package info (click to toggle)
harec 0.26.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,480 kB
  • sloc: ansic: 20,054; asm: 335; makefile: 116; lisp: 80; sh: 45
file content (190 lines) | stat: -rw-r--r-- 2,476 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#ifndef HAREC_LEX_H
#define HAREC_LEX_H
#include <stdint.h>
#include <stdio.h>
#include "types.h"
#include "utf8.h"

#define C_EOF UTF8_INVALID

// Keep sorted
enum lexical_token {
	T_ATTR_FINI,
	T_ATTR_INIT,
	T_ATTR_PACKED,
	T_ATTR_SYMBOL,
	T_ATTR_TEST,
	T_ATTR_THREADLOCAL,
	T_ATTR_UNDEFINED,
	T_UNDERSCORE,
	T_ABORT,
	T_ALIGN,
	T_ALLOC,
	T_APPEND,
	T_AS,
	T_ASSERT,
	T_BOOL,
	T_BREAK,
	T_CASE,
	T_CONST,
	T_CONTINUE,
	T_DEF,
	T_DEFER,
	T_DELETE,
	T_DONE,
	T_ELSE,
	T_ENUM,
	T_EXPORT,
	T_F32,
	T_F64,
	T_FALSE,
	T_FN,
	T_FOR,
	T_FREE,
	T_I16,
	T_I32,
	T_I64,
	T_I8,
	T_IF,
	T_INSERT,
	T_INT,
	T_IS,
	T_LEN,
	T_LET,
	T_MATCH,
	T_NEVER,
	T_NOMEM,
	T_NULL,
	T_NULLABLE,
	T_OFFSET,
	T_OPAQUE,
	T_RETURN,
	T_RUNE,
	T_SIZE,
	T_STATIC,
	T_STR,
	T_STRUCT,
	T_SWITCH,
	T_TRUE,
	T_TYPE,
	T_U16,
	T_U32,
	T_U64,
	T_U8,
	T_UINT,
	T_UINTPTR,
	T_UNION,
	T_USE,
	T_VAARG,
	T_VAEND,
	T_VALIST,
	T_VASTART,
	T_VOID,
	T_YIELD,
	T_LAST_KEYWORD = T_YIELD,

	// Operators
	T_ARROW,
	T_BANDEQ,
	T_BAND,
	T_BNOT,
	T_BOR,
	T_COLON,
	T_COMMA,
	T_DIV,
	T_DIVEQ,
	T_DOT,
	T_DOUBLE_COLON,
	T_DOUBLE_DOT,
	T_ELLIPSIS,
	T_EQUAL,
	T_GREATER,
	T_GREATEREQ,
	T_LAND,
	T_LANDEQ,
	T_LBRACE,
	T_LBRACKET,
	T_LEQUAL,
	T_LESS,
	T_LESSEQ,
	T_LNOT,
	T_LOR,
	T_LOREQ,
	T_LPAREN,
	T_LSHIFT,
	T_LSHIFTEQ,
	T_LXOR,
	T_LXOREQ,
	T_MINUS,
	T_MINUSEQ,
	T_MODEQ,
	T_MODULO,
	T_NEQUAL,
	T_BOREQ,
	T_PLUS,
	T_PLUSEQ,
	T_QUESTION,
	T_RBRACE,
	T_RBRACKET,
	T_RPAREN,
	T_RSHIFT,
	T_RSHIFTEQ,
	T_SEMICOLON,
	T_TIMES,
	T_TIMESEQ,
	T_BXOR,
	T_BXOREQ,
	T_LAST_OPERATOR = T_BXOREQ,

	// Tokens with additional information
	T_NAME,
	T_LITERAL,

	// Magic tokens
	T_EOF,
	T_NONE,
};

struct location {
	int file;
	int lineno, colno;
};

struct token {
	struct location loc;
	enum lexical_token token;
	enum type_storage storage;
	union {
		const char *name;
		uint32_t rune;
		int64_t ival;
		uint64_t uval;
		double fval;
		struct {
			size_t len;
			const char *value;
		} string;
	};
};

struct lexer {
	FILE *in;
	char *buf;
	size_t bufsz, buflen;
	uint32_t c[2];
	struct token un;
	struct location loc;
	bool require_int;
	bool in_annotation;
	struct intern_table *itbl;
};

void lex_init(struct lexer *lexer, FILE *f, int fileid, struct intern_table *itbl);
void lex_finish(struct lexer *lexer);
enum lexical_token lex(struct lexer *lexer, struct token *out);
void unlex(struct lexer *lexer, const struct token *in);

const char *token_str(const struct token *tok);
const char *lexical_token_str(enum lexical_token tok);

#endif