File: rfc5322_lexical_tokens.rl

package info (click to toggle)
ruby-mail 2.8.1%2Bdfsg1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,704 kB
  • sloc: ruby: 73,709; makefile: 3
file content (60 lines) | stat: -rw-r--r-- 2,043 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
%%{
  # RFC 5322 Internet Message Format
  # Section 3.2. Lexical Tokens
  # https://tools.ietf.org/html/rfc5322#section-3.2
  machine rfc5322_lexical_tokens;
  alphtype int;

  include rfc5234_abnf_core_rules "rfc5234_abnf_core_rules.rl";

  # 3.2.1.  Quoted characters
  obs_NO_WS_CTL = 0x01..0x08 | "\v" | "\f" | 0x0e..0x1f | 0x7f;
  obs_qp = "\\" (0x00 | obs_NO_WS_CTL | LF | CR);
  quoted_pair = ("\\" (VCHAR | WSP)) | obs_qp;

  # 3.2.2. Folding White Space and Comments
  obs_FWS = (CRLF? WSP)+;
  FWS = (WSP* CRLF WSP+) | (CRLF WSP+) | obs_FWS;

  obs_ctext = obs_NO_WS_CTL;
  rfc5322_ctext = 0x21..0x27 | 0x2a..0x5b | 0x5d..0x7e | obs_ctext;
  ctext = rfc5322_ctext | utf8_non_ascii; # RFC6532 for UTF-8

  # Recursive comments
  action comment_begin { fcall comment_tail; }
  action comment_exit { fret; }
  ccontent = ctext | quoted_pair | "(" @comment_begin;
  comment_tail := ((FWS? ccontent)* >comment_s) FWS? ")" @comment_exit;
  comment = "(" @comment_begin %comment_e;
  CFWS = ((FWS? comment)+ FWS?) | FWS;

  # 3.2.3. Atom
  rfc5322_atext = ALPHA | DIGIT | "!" | "#" | "$" | "%" | "&" |
                  "'" | "*" | "+" | "-" | "/" | "=" | "?" | "^" |
                  "_" | "`" | "{" | "|" | "}" | "~";
  atext = rfc5322_atext | utf8_non_ascii; # RFC6532 for UTF-8
  atom = CFWS? atext+ CFWS?;
  dot_atom_text = atext ("." atext)*;
  dot_atom = CFWS? dot_atom_text CFWS?;

  # 3.2.4. Quoted Strings
  obs_qtext = obs_NO_WS_CTL;
  rfc5322_qtext = 0x21 | 0x23..0x5b | 0x5d..0x7e | obs_qtext;
  qtext = rfc5322_qtext | utf8_non_ascii; # RFC6532 for UTF-8

  qcontent = qtext | quoted_pair;
  quoted_string = CFWS?
                  (DQUOTE
                    (((FWS? qcontent)* FWS?) >qstr_s %qstr_e)
                  DQUOTE)
                  CFWS?;

  # 3.2.5. Miscellaneous Tokens
  word = atom | quoted_string;

  obs_phrase = (word | "." | "@")+;
  phrase = (obs_phrase | word+) >phrase_s %phrase_e;

  # Not part of RFC, used for keywords per 3.6.5 Information Fields
  phrase_lists = phrase ("," FWS* phrase)*;
}%%