1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
|
// re2c $INPUT -o $OUTPUT --input-encoding utf8 --loop-switch -i
// This example supports multiple input encodings: UTF-8 and UTF-32.
// Both lexers are generated from the same rules block, and the use
// blocks add only encoding-specific configurations.
#include <assert.h>
#include <stdint.h>
/*!rules:re2c
re2c:yyfill:enable = 0;
"∀x ∃y" { return 0; }
* { return 1; }
*/
static int lex_utf8(const uint8_t *YYCURSOR)
{
const uint8_t *YYMARKER;
/*!use:re2c
re2c:define:YYCTYPE = uint8_t;
re2c:flags:8 = 1;
*/
}
static int lex_utf32(const uint32_t *YYCURSOR)
{
const uint32_t *YYMARKER;
/*!use:re2c
re2c:define:YYCTYPE = uint32_t;
re2c:flags:8 = 0;
re2c:flags:u = 1;
*/
}
int main()
{
static const uint8_t s8[] = // UTF-8
{ 0xe2, 0x88, 0x80, 0x78, 0x20, 0xe2, 0x88, 0x83, 0x79 };
static const uint32_t s32[] = // UTF32
{ 0x00002200, 0x00000078, 0x00000020, 0x00002203, 0x00000079 };
assert(lex_utf8(s8) == 0);
assert(lex_utf32(s32) == 0);
return 0;
}
|