1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
|
%%{
# RFC 3629 4. Syntax of UTF-8 Byte Sequences
# https://tools.ietf.org/html/rfc3629#section-4
machine rfc3629_utf8;
alphtype int;
utf8_tail = 0x80..0xBF;
utf8_2byte = 0xC2..0xDF utf8_tail;
utf8_3byte = 0xE0 0xA0..0xBF utf8_tail |
0xE1..0xEC utf8_tail utf8_tail |
0xED 0x80..0x9F utf8_tail |
0xEE..0xEF utf8_tail utf8_tail;
utf8_4byte = 0xF0 0x90..0xBF utf8_tail utf8_tail |
0xF1..0xF3 utf8_tail utf8_tail utf8_tail |
0xF4 0x80..0x8F utf8_tail utf8_tail;
utf8_non_ascii = utf8_2byte | utf8_3byte | utf8_4byte;
}%%
|