1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
|
/**
* \file rfc822.c
* Code for slicing and dicing RFC822 mail headers.
*
* How to parse RFC822 headers in C. This is not a fully conformant
* implementation of RFC822 or RFC2822, but it has been in production use in a
* widely-deployed MTA (fetcmail) since 1996 without complaints. Really
* perverse combinations of quoting and commenting could break it.
*
* \author Eric S. Raymond <esr@thyrsus.com>, 1997. This source code example
* is part of fetchmail and the Unix Cookbook, and are released under the MIT
* license.
*/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define HEADER_END(p) ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
#define START_HDR 0 /**< before header colon */
#define SKIP_JUNK 1 /**< skip whitespace, \n, and junk */
#define BARE_ADDRESS 2 /**< collecting address without delimiters */
#define INSIDE_DQUOTE 3 /**< inside double quotes */
#define INSIDE_PARENS 4 /**< inside parentheses */
#define INSIDE_BRACKETS 5 /**< inside bracketed address */
#define ENDIT_ALL 6 /**< after last address */
/**
* Parse addresses in succession out of a specified RFC822 header.
*
* \param hdr header to be parsed, NUL to continue previous \p hdr.
*/
char *next_address(const char *hdr)
{
static char address[BUFSIZ];
static int tp;
static const char *hp;
static int state, oldstate;
int parendepth = 0;
#define NEXTTP() ((tp < sizeof(address)-1) ? tp++ : tp)
if (hdr)
{
hp = hdr;
state = START_HDR;
tp = 0;
}
for (; *hp; hp++)
{
if (state == ENDIT_ALL) /* after last address */
return(NULL);
else if (HEADER_END(hp))
{
state = ENDIT_ALL;
if (tp)
{
while (isspace(address[--tp]))
continue;
address[++tp] = '\0';
tp = 0;
return (address);
}
return(NULL);
}
else if (*hp == '\\') /* handle RFC822 escaping */
{
if (state != INSIDE_PARENS)
{
address[NEXTTP()] = *hp++; /* take the escape */
address[NEXTTP()] = *hp; /* take following char */
}
}
else switch (state)
{
case START_HDR: /* before header colon */
if (*hp == ':')
state = SKIP_JUNK;
break;
case SKIP_JUNK: /* looking for address start */
if (*hp == '"') /* quoted string */
{
oldstate = SKIP_JUNK;
state = INSIDE_DQUOTE;
address[NEXTTP()] = *hp;
}
else if (*hp == '(') /* address comment -- ignore */
{
parendepth = 1;
oldstate = SKIP_JUNK;
state = INSIDE_PARENS;
}
else if (*hp == '<') /* begin <address> */
{
state = INSIDE_BRACKETS;
tp = 0;
}
else if (*hp != ',' && !isspace(*hp))
{
--hp;
state = BARE_ADDRESS;
}
break;
case BARE_ADDRESS: /* collecting address without delimiters */
if (*hp == ',') /* end of address */
{
if (tp)
{
address[NEXTTP()] = '\0';
state = SKIP_JUNK;
tp = 0;
return(address);
}
}
else if (*hp == '(') /* beginning of comment */
{
parendepth = 1;
oldstate = BARE_ADDRESS;
state = INSIDE_PARENS;
}
else if (*hp == '<') /* beginning of real address */
{
state = INSIDE_BRACKETS;
tp = 0;
}
else if (*hp == '"') /* quoted word, copy verbatim */
{
oldstate = state;
state = INSIDE_DQUOTE;
address[NEXTTP()] = *hp;
}
else if (!isspace(*hp)) /* just take it, ignoring whitespace */
address[NEXTTP()] = *hp;
break;
case INSIDE_DQUOTE: /* we're in a quoted string, copy verbatim */
if (*hp != '"')
address[NEXTTP()] = *hp;
else
{
address[NEXTTP()] = *hp;
state = oldstate;
}
break;
case INSIDE_PARENS: /* we're in a parenthesized comment, ignore */
if (*hp == '(')
++parendepth;
else if (*hp == ')')
--parendepth;
if (parendepth == 0)
state = oldstate;
break;
case INSIDE_BRACKETS: /* possible <>-enclosed address */
if (*hp == '>') /* end of address */
{
address[NEXTTP()] = '\0';
state = SKIP_JUNK;
++hp;
tp = 0;
return(address);
}
else if (*hp == '<') /* nested <> */
tp = 0;
else if (*hp == '"') /* quoted address */
{
address[NEXTTP()] = *hp;
oldstate = INSIDE_BRACKETS;
state = INSIDE_DQUOTE;
}
else /* just copy address */
address[NEXTTP()] = *hp;
break;
}
}
return(NULL);
}
|