1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
|
%%{
# RFC 5322 Internet Message Format
# Section 3.2. Lexical Tokens
# https://tools.ietf.org/html/rfc5322#section-3.2
machine rfc5322_lexical_tokens;
alphtype int;
include rfc5234_abnf_core_rules "rfc5234_abnf_core_rules.rl";
# 3.2.1. Quoted characters
obs_NO_WS_CTL = 0x01..0x08 | "\v" | "\f" | 0x0e..0x1f | 0x7f;
obs_qp = "\\" (0x00 | obs_NO_WS_CTL | LF | CR);
quoted_pair = ("\\" (VCHAR | WSP)) | obs_qp;
# 3.2.2. Folding White Space and Comments
obs_FWS = (CRLF? WSP)+;
FWS = (WSP* CRLF WSP+) | (CRLF WSP+) | obs_FWS;
obs_ctext = obs_NO_WS_CTL;
rfc5322_ctext = 0x21..0x27 | 0x2a..0x5b | 0x5d..0x7e | obs_ctext;
ctext = rfc5322_ctext | utf8_non_ascii; # RFC6532 for UTF-8
# Recursive comments
action comment_begin { fcall comment_tail; }
action comment_exit { fret; }
ccontent = ctext | quoted_pair | "(" @comment_begin;
comment_tail := ((FWS? ccontent)* >comment_s) FWS? ")" @comment_exit;
comment = "(" @comment_begin %comment_e;
CFWS = ((FWS? comment)+ FWS?) | FWS;
# 3.2.3. Atom
rfc5322_atext = ALPHA | DIGIT | "!" | "#" | "$" | "%" | "&" |
"'" | "*" | "+" | "-" | "/" | "=" | "?" | "^" |
"_" | "`" | "{" | "|" | "}" | "~";
atext = rfc5322_atext | utf8_non_ascii; # RFC6532 for UTF-8
atom = CFWS? atext+ CFWS?;
dot_atom_text = atext ("." atext)*;
dot_atom = CFWS? dot_atom_text CFWS?;
# 3.2.4. Quoted Strings
obs_qtext = obs_NO_WS_CTL;
rfc5322_qtext = 0x21 | 0x23..0x5b | 0x5d..0x7e | obs_qtext;
qtext = rfc5322_qtext | utf8_non_ascii; # RFC6532 for UTF-8
qcontent = qtext | quoted_pair;
quoted_string = CFWS?
(DQUOTE
(((FWS? qcontent)* FWS?) >qstr_s %qstr_e)
DQUOTE)
CFWS?;
# 3.2.5. Miscellaneous Tokens
word = atom | quoted_string;
obs_phrase = (word | "." | "@")+;
phrase = (obs_phrase | word+) >phrase_s %phrase_e;
# Not part of RFC, used for keywords per 3.6.5 Information Fields
phrase_lists = phrase ("," FWS* phrase)*;
}%%
|