1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
|
import logging
from collections import namedtuple
import ply.yacc as yacc
from flanker.addresslib._parser.lexer import lexer, tokens
log = logging.getLogger(__name__)
Mailbox = namedtuple('Mailbox', ['display_name', 'local_part', 'domain'])
Url = namedtuple('Url', ['address'])
# Parsing rules
start = 'mailbox_or_url_list'
def p_expression_mailbox_or_url_list(p):
'''mailbox_or_url_list : mailbox_or_url_list delim mailbox_or_url
| mailbox_or_url_list delim
| mailbox_or_url'''
if len(p) == 4:
p[0] = p[1] + [p[3]]
elif len(p) == 3:
p[0] = p[1]
elif len(p) == 2:
p[0] = [p[1]]
def p_delim(p):
'''delim : delim fwsp COMMA
| delim fwsp SEMICOLON
| COMMA
| SEMICOLON'''
def p_expression_mailbox_or_url(p):
'''mailbox_or_url : mailbox
| url'''
p[0] = p[1]
def p_expression_url(p):
'url : ofwsp URL ofwsp'
p[0] = Url(p[2])
def p_expression_mailbox(p):
'''mailbox : addr_spec
| angle_addr
| name_addr'''
p[0] = p[1]
def p_expression_name_addr(p):
'name_addr : ofwsp phrase angle_addr'
p[0] = Mailbox(p[2], p[3].local_part, p[3].domain)
def p_expression_angle_addr(p):
'angle_addr : ofwsp LANGLE addr_spec RANGLE ofwsp'
p[0] = Mailbox('', p[3].local_part, p[3].domain)
def p_expression_addr_spec(p):
'addr_spec : ofwsp local_part AT domain ofwsp'
p[0] = Mailbox('', p[2], p[4])
def p_expression_local_part(p):
'''local_part : DOT_ATOM
| ATOM
| quoted_string'''
p[0] = p[1]
def p_expression_domain(p):
'''domain : DOT_ATOM
| ATOM
| domain_literal'''
p[0] = p[1]
def p_expression_quoted_string(p):
'''quoted_string : DQUOTE quoted_string_text DQUOTE
| DQUOTE DQUOTE'''
if len(p) == 4:
p[0] = '"{}"'.format(p[2])
elif len(p) == 3:
p[0] = '""'
def p_expression_quoted_string_text(p):
'''quoted_string_text : quoted_string_text QTEXT
| quoted_string_text QPAIR
| quoted_string_text fwsp
| QTEXT
| QPAIR
| fwsp'''
p[0] = ''.join(p[1:])
def p_expression_domain_literal(p):
'''domain_literal : LBRACKET domain_literal_text RBRACKET
| LBRACKET RBRACKET'''
if len(p) == 4:
p[0] = '[{}]'.format(p[2])
elif len(p) == 3:
p[0] = '[]'
def p_expression_domain_literal_text(p):
'''domain_literal_text : domain_literal_text DTEXT
| domain_literal_text fwsp
| DTEXT
| fwsp'''
p[0] = ''.join(p[1:])
def p_expression_comment(p):
'''comment : LPAREN comment_text RPAREN
| LPAREN RPAREN'''
p[0] = ''
def p_expression_comment_text(p):
'''comment_text : comment_text CTEXT
| comment_text fwsp
| CTEXT
| fwsp'''
p[0] = ''.join(p[1:])
def p_expression_phrase(p):
'''phrase : phrase fwsp ATOM
| phrase fwsp DOT_ATOM
| phrase fwsp DOT
| phrase fwsp quoted_string
| phrase ATOM
| phrase DOT_ATOM
| phrase DOT
| phrase quoted_string
| ATOM
| DOT_ATOM
| DOT
| quoted_string'''
if len(p) == 4:
p[0] = '{} {}'.format(p[1], p[3])
if len(p) == 3:
p[0] = '{}{}'.format(p[1], p[2])
elif len(p) == 2:
p[0] = p[1]
def p_expression_ofwsp(p):
'''ofwsp : fwsp comment fwsp
| fwsp comment
| comment fwsp
| comment
| fwsp
|'''
p[0] = ''.join(p[1:])
def p_expression_fwsp(p):
'fwsp : FWSP'
p[0] = p[1].replace('\r\n', '')
def p_error(p):
if p:
raise SyntaxError('syntax error: token=%s, lexpos=%s' % (p.value, p.lexpos))
raise SyntaxError('syntax error: eof')
# Build the parsers
log.debug('building mailbox parser')
mailbox_parser = yacc.yacc(start='mailbox',
errorlog=log,
tabmodule='mailbox_parsetab',
debug=False,
write_tables=False,
check_recursion=False)
log.debug('building addr_spec parser')
addr_spec_parser = yacc.yacc(start='addr_spec',
errorlog=log,
tabmodule='addr_spec_parsetab',
debug=False,
write_tables=False,
check_recursion=False)
log.debug('building url parser')
url_parser = yacc.yacc(start='url',
errorlog=log,
tabmodule='url_parsetab',
debug=False,
write_tables=False,
check_recursion=False)
log.debug('building mailbox_or_url parser')
mailbox_or_url_parser = yacc.yacc(start='mailbox_or_url',
errorlog=log,
tabmodule='mailbox_or_url_parsetab',
debug=False,
write_tables=False,
check_recursion=False)
log.debug('building mailbox_or_url_list parser')
mailbox_or_url_list_parser = yacc.yacc(start='mailbox_or_url_list',
errorlog=log,
tabmodule='mailbox_or_url_list_parsetab',
debug=False,
write_tables=False,
check_recursion=False)
# Interactive prompt for easy debugging
if __name__ == '__main__':
while True:
try:
s = raw_input('\nflanker> ')
except KeyboardInterrupt:
break
except EOFError:
break
if s == '': continue
print('\nTokens list:\n')
lexer.input(s)
while True:
tok = lexer.token()
if not tok:
break
print(tok)
print('\nParsing behavior:\n')
result = mailbox_or_url_list_parser.parse(s, debug=log)
print('\nResult:\n')
print(result)
|