1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
|
/* MIME Message Parse HTMIME.c
** ==================
**
** This is RFC 1341-specific code.
** The input stream pushed into this parser is assumed to be
** stripped on CRs, ie lines end with LF, not CR LF.
** (It is easy to change this except for the body part where
** conversion can be slow.)
**
** History:
** Feb 92 Written Tim Berners-Lee, CERN
** 8 Jul 94 FM Insulate free() from _free structure element.
**
*/
#include "HTFormat.h"
#include "HTMIME.h" /* Implemented here */
#include "HTAlert.h"
/* MIME Object
** -----------
*/
typedef enum _MIME_state {
MIME_TRANSPARENT, /* put straight through to target ASAP! */
BEGINNING_OF_LINE,
CONTENT_T,
CONTENT_TRANSFER_ENCODING,
CONTENT_TYPE,
SKIP_GET_VALUE, /* Skip space then get value */
GET_VALUE, /* Get value till white space */
JUNK_LINE, /* Ignore the rest of this folded line */
NEWLINE, /* Just found a LF .. maybe continuation */
CHECK, /* check against check_pointer */
MIME_NET_ASCII, /* Translate from net ascii */
MIME_IGNORE /* ignore entire file */
/* TRANSPARENT and IGNORE are defined as stg else in _WINDOWS */
} MIME_state;
#define VALUE_SIZE 128 /* @@@@@@@ Arbitrary? */
struct _HTStream {
CONST HTStreamClass * isa;
BOOL net_ascii; /* Is input net ascii? */
MIME_state state; /* current state */
MIME_state if_ok; /* got this state if match */
MIME_state field; /* remember which field */
MIME_state fold_state; /* state on a fold */
CONST char * check_pointer; /* checking input */
char * value_pointer; /* storing values */
char value[VALUE_SIZE];
HTStream * sink; /* Given on creation */
HTRequest * request; /* Given on creation */
char * boundary; /* For multipart */
HTFormat encoding; /* Content-Transfer-Encoding */
HTFormat format; /* Content-Type */
HTStream * target; /* While writing out */
HTStreamClass targetClass;
HTAtom * targetRep; /* Converting into? */
};
/*_________________________________________________________________________
**
** A C T I O N R O U T I N E S
*/
/* Character handling
** ------------------
**
** This is a FSM parser which is tolerant as it can be of all
** syntax errors. It ignores field names it does not understand,
** and resynchronises on line beginnings.
*/
PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
{
if (me->state == MIME_TRANSPARENT) {
(*me->targetClass.put_character)(me->target, c);/* MUST BE FAST */
return;
}
/* This slightly simple conversion just strips CR and turns LF to
** newline. On unix LF is \n but on Mac \n is CR for example.
** See NetToText for an implementation which preserves single CR or LF.
*/
if (me->net_ascii) {
c = FROMASCII(c);
if (c == CR) return;
else if (c == LF) c = '\n';
}
switch(me->state) {
case MIME_IGNORE:
return;
case MIME_TRANSPARENT: /* Not reached see above */
(*me->targetClass.put_character)(me->target, c);
return;
case MIME_NET_ASCII:
(*me->targetClass.put_character)(me->target, c); /* MUST BE FAST */
return;
case NEWLINE:
if (c != '\n' && WHITE(c)) { /* Folded line */
me->state = me->fold_state; /* pop state before newline */
break;
}
/* else Falls through */
case BEGINNING_OF_LINE:
switch(c) {
case 'c':
case 'C':
me->check_pointer = "ontent-t";
me->if_ok = CONTENT_T;
me->state = CHECK;
break;
case '\n': /* Blank line: End of Header! */
{
if (TRACE) fprintf(stderr,
"HTMIME: MIME content type is %s, converting to %s\n",
HTAtom_name(me->format), HTAtom_name(me->targetRep));
me->target = HTStreamStack(me->format, me->request, NO);
if (!me->target) {
if (TRACE) fprintf(stderr, "MIME: Can't translate! ** \n");
me->target = me->sink; /* Cheat */
}
if (me->target) {
me->targetClass = *me->target->isa;
/* Check for encoding and select state from there @@ */
me->state = MIME_TRANSPARENT; /* From now push straigh through */
} else {
me->state = MIME_IGNORE; /* What else to do? */
}
}
break;
default:
goto bad_field_name;
break;
} /* switch on character */
break;
case CHECK: /* Check against string */
if (TOLOWER(c) == *(me->check_pointer)++) {
if (!*me->check_pointer) me->state = me->if_ok;
} else { /* Error */
if (TRACE) fprintf(stderr,
"HTMIME: Bad character `%c' found where `%s' expected\n",
c, me->check_pointer - 1);
goto bad_field_name;
}
break;
case CONTENT_T:
switch(c) {
case 'r':
case 'R':
me->check_pointer = "ansfer-encoding:";
me->if_ok = CONTENT_TRANSFER_ENCODING;
me->state = CHECK;
break;
case 'y':
case 'Y':
me->check_pointer = "pe:";
me->if_ok = CONTENT_TYPE;
me->state = CHECK;
break;
default:
goto bad_field_name;
} /* switch on character */
break;
case CONTENT_TYPE:
case CONTENT_TRANSFER_ENCODING:
me->field = me->state; /* remember it */
me->state = SKIP_GET_VALUE;
/* Fall through! */
case SKIP_GET_VALUE:
if (c == '\n') {
me->fold_state = me->state;
me->state = NEWLINE;
break;
}
if (WHITE(c)) break; /* Skip white space */
me->value_pointer = me->value;
me->state = GET_VALUE;
/* Fall through to store first character */
case GET_VALUE:
if (WHITE(c)) { /* End of field */
*me->value_pointer = 0;
switch (me->field) {
case CONTENT_TYPE:
me->format = HTAtom_for(me->value);
break;
case CONTENT_TRANSFER_ENCODING:
me->encoding = HTAtom_for(me->value);
break;
default: /* Should never get here */
break;
}
} else {
if (me->value_pointer < me->value + VALUE_SIZE - 1) {
*me->value_pointer++ = c;
break;
} else {
goto value_too_long;
}
}
/* Fall through */
case JUNK_LINE:
if (c == '\n') {
me->state = NEWLINE;
me->fold_state = me->state;
}
break;
} /* switch on state*/
return;
value_too_long:
if (TRACE) fprintf(stderr,
"HTMIME: *** Syntax error. (string too long)\n");
bad_field_name: /* Ignore it */
me->state = JUNK_LINE;
return;
}
/* String handling
** ---------------
**
** Strings must be smaller than this buffer size.
*/
PRIVATE void HTMIME_put_string ARGS2(HTStream *, me, CONST char*, s)
{
CONST char * p;
if (me->state == MIME_TRANSPARENT) /* Optimisation */
(*me->targetClass.put_string)(me->target,s);
else if (me->state != MIME_IGNORE)
for (p=s; *p; p++) HTMIME_put_character(me, *p);
}
/* Buffer write. Buffers can (and should!) be big.
** ------------
*/
PRIVATE void HTMIME_write ARGS3(HTStream *, me, CONST char*, s, int, l)
{
CONST char * p;
if (me->state == MIME_TRANSPARENT) /* Optimisation */
(*me->targetClass.put_block)(me->target, s, l);
else
for (p=s; p < s+l; p++) HTMIME_put_character(me, *p);
}
/* Free an HTML object
** -------------------
**
*/
PRIVATE void HTMIME_free ARGS1(HTStream *, me)
{
if (me->target) (*me->targetClass._free)(me->target);
free(me);
}
/* End writing
*/
PRIVATE void HTMIME_abort ARGS2(HTStream *, me, HTError, e)
{
if (me->target) (*me->targetClass.abort)(me->target, e);
free(me);
}
/* Structured Object Class
** -----------------------
*/
PRIVATE CONST HTStreamClass HTMIME =
{
"MIMEParser",
HTMIME_free,
HTMIME_abort,
HTMIME_put_character,
HTMIME_put_string,
HTMIME_write
};
/* Subclass-specific Methods
** -------------------------
*/
PUBLIC HTStream* HTMIMEConvert ARGS5(
HTRequest *, request,
void *, param,
HTFormat, input_format,
HTFormat, output_format,
HTStream *, output_stream)
{
HTStream* me;
me = (HTStream*)calloc(1, sizeof(*me));
if (me == NULL) outofmem(__FILE__, "HTML_new");
me->isa = &HTMIME;
me->sink = output_stream;
me->request = request;
me->state = BEGINNING_OF_LINE;
me->format = WWW_PLAINTEXT;
me->targetRep = output_format;
return me;
}
PUBLIC HTStream* HTNetMIME ARGS5(
HTRequest *, request,
void *, param,
HTFormat, input_format,
HTFormat, output_format,
HTStream *, output_stream)
{
HTStream* me = HTMIMEConvert(
request, param, input_format, output_format, output_stream);
if (!me) return NULL;
me->net_ascii = YES;
return me;
}
|