1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
|
#########################################################################
#
# mairix - message index builder and finder for maildir folders.
#
# Copyright (C) Richard P. Curnow 2002-2004,2006
# Copyright (C) Jonathan Kamens 2010
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# =======================================================================
%{
#include "from.h"
%}
# Define tokens
# CR : \n
# DIGIT : [0-9]
# AT : @
# COLON : :
# WHITE : ' ', \t
# LOWER : [a-z]
# UPPER : [A-Z]
# PLUSMINUS : [+-]
# OTHER_EMAIL : other stuff valid in the LHS of an address
# DOMAIN : stuff valid in the RHS of an address
Abbrev LF = [\n]
Abbrev CR = [\r]
Abbrev DIGIT = [0-9]
Abbrev PERIOD = [.]
Abbrev AT = [@]
Abbrev LOWER = [a-z]
Abbrev UPPER = [A-Z]
Abbrev COLON = [:]
Abbrev WHITE = [ \t]
Abbrev PLUSMINUS = [+\-]
# Explained clearly at
# http://en.wikipedia.org/wiki/E-mail_address#RFC_specification
Abbrev OTHER_EMAIL = [.!#$%&'*/=?^_`{|}~]
Abbrev LT = [<]
Abbrev GT = [>]
Abbrev EMAIL = LOWER | UPPER | DIGIT | PLUSMINUS | OTHER_EMAIL
Abbrev OTHER_DOMAIN = [\-_.]
Abbrev DOMAIN = LOWER | UPPER | DIGIT | OTHER_DOMAIN
Abbrev DQUOTE = ["]
Abbrev OTHER_QUOTED = [@:<>]
Abbrev LEFTSQUARE = [[]
Abbrev RIGHTSQUARE = [\]]
BLOCK email {
STATE in
EMAIL -> in, before_at
DQUOTE -> quoted_before_at
AT -> domain_route
STATE domain_route
DOMAIN -> domain_route
COLON -> in
STATE quoted_before_at
EMAIL | WHITE | OTHER_QUOTED -> quoted_before_at
DQUOTE -> before_at
STATE before_at
EMAIL -> before_at
DQUOTE -> quoted_before_at
# Local part only : >=1 characters will suffice, which we've already
# matched.
-> out
AT -> start_of_domain
STATE start_of_domain
LEFTSQUARE -> dotted_quad
DOMAIN -> after_at
STATE dotted_quad
DIGIT | PERIOD -> dotted_quad
RIGHTSQUARE -> out
STATE after_at
DOMAIN -> after_at, out
}
BLOCK angled_email {
STATE in
LT -> in_angles
STATE in_angles
<email:in->out> -> before_gt
STATE before_gt
GT -> out
}
BLOCK zone {
# Make this pretty lenient
STATE in
UPPER -> zone2
UPPER -> out
PLUSMINUS -> zone2
STATE zone2
UPPER | LOWER -> zone2, out
DIGIT -> zone2, out
}
BLOCK date {
STATE in
WHITE -> in, before_weekday
STATE before_weekday
UPPER ; LOWER ; LOWER ; WHITE -> after_weekday
STATE after_weekday
WHITE -> after_weekday
UPPER ; LOWER ; LOWER ; WHITE -> after_month
STATE after_month
WHITE -> after_month
DIGIT ; WHITE -> after_day
DIGIT ; DIGIT ; WHITE -> after_day
STATE after_day
WHITE -> after_day
# Accept HH:MM:SS
DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time
# Accept HH:MM
DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time
# Allow either 1 or 2 words of timezone
STATE after_time
WHITE -> after_time
-> after_timezone
<zone:in->out> ; WHITE -> after_timezone
<zone:in->out> ; WHITE -> after_timezone_1
# It appears that Pine puts the timezone after the year
DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year_before_zone
STATE after_year_before_zone
WHITE -> after_year_before_zone
<zone:in->out> -> after_timezone_after_year
<zone:in->out> ; WHITE -> after_timezone_after_year_1
STATE after_timezone_after_year_1
WHITE -> after_timezone_after_year_1
<zone:in->out> -> after_timezone_after_year
STATE after_timezone_after_year
WHITE -> after_timezone_after_year
-> out
STATE after_timezone_1
WHITE -> after_timezone_1
<zone:in->out> ; WHITE -> after_timezone
STATE after_timezone
WHITE -> after_timezone
DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year
STATE after_year
WHITE -> after_year
-> out
}
# Assume the earlier code has identified the '\nFrom ' sequence,
# and the validator starts scanning from the character beyond the space
BLOCK main {
STATE in
# Real return address.
WHITE -> in
<email:in->out> -> before_date
<angled_email:in->out> -> before_date
# Cope with Mozilla mbox folder format which just uses a '-' as
# the return address field.
PLUSMINUS -> before_date
# Empty return address
-> before_date
STATE before_date
<date:in->out> ; LF = FROMCHECK_PASS
# Cope with mozilla mbox format
<date:in->out> ; CR ; LF = FROMCHECK_PASS
# Mention this state last : the last mentioned state in the last defined
# block becomes the entry state of the scanner.
STATE in
}
ATTR FROMCHECK_PASS
ATTR FROMCHECK_FAIL
DEFATTR FROMCHECK_FAIL
PREFIX fromcheck
TYPE "enum fromcheck_result"
# vim:ft=txt:et:sw=4:sts=4:ht=4
|