1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
|
/*
netrik -- The ANTRIK Internet Viewer
Copyright (C) Olaf D. Buddenhagen AKA antrik, et al (see AUTHORS)
Published under the GNU GPL; see LICENSE for details.
*/
/*
* http-parse-header.c -- parse the HTTP response head
*
* (C) 2002 antrik
*
* This file contains the parse_header() function, which reads the complete
* HTTP head from the resource, and extracts all headers.
*/
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "debug.h"
#include "http-parse-header.h"
#include "load.h"
#define NODATA 0x4000 /* don't interfere with normal chars or PM_-values */
static int sgetc(struct Resource *input); /* get one character from the socket */
static void add_char(char **str, char chr); /* append character to string */
#ifdef DEBUG
static void dump_headers(const struct Http_headers *headers); /* dump all extracted headers */
#endif
#ifdef DEBUG
/* dump all extracted headers */
static void dump_headers(headers)
const struct Http_headers *headers;
{
int cur_header;
debug_printf("extracted headers:\n");
for(cur_header=0; cur_header<headers->count; ++cur_header)
debug_printf("%s: %s\n", headers->header[cur_header].name, headers->header[cur_header].value);
debug_printf("\n");
}
#endif
/* read a character from socket */
static int sgetc(input)
struct Resource *input;
{
/* fill buffer */
if(input->buf_ptr==input->buf_end) { /* buf empty */
load(input); /* -> read next block */
input->buf_ptr=input->buf; /* reset read pointer */
if(input->buf_ptr==input->buf_end) /* still empty => eof */
return EOF;
}
/* read char from buffer */
return (unsigned)*input->buf_ptr++;
}
/* append a character to a string */
static void add_char(str, chr)
char **str;
char chr;
{
*str=realloc(*str, sizeof(char[strlen(*str)+2]));
sprintf(strchr(*str, '\0'), "%c", chr);
}
void parse_header(res)
struct Resource *res;
{
struct Http_headers *headers=&res->handle.http->headers;
enum Parse_mode {
PM_START=0x000, /* before anything read */
PM_STATUSLINE=0x100, /* inside response status line */
PM_FIRST_NEWLINE=0x200, /* after end of status line (like PM_NEWLINE, except no folding possible) */
PM_NAME=0x300, /* inside header name */
PM_SPACE=0x400, /* ' ' after ':' at end of header name */
PM_VALUE=0x500, /* inside header value */
PM_NEWLINE=0x600, /* after '\n' */
PM_FOLDING=0x700, /* ' ' after newline (folded header value) */
PM_END=0x800 /* whole header parsed */
} parse_mode=PM_START;
int recycle; /* have to do more handling with same input char */
DMSG(("parsing HTTP headers...\n"));
do { /* while not PM_END */
const int in=sgetc(res); /* input character from HTTP */
int dispatch_char; /* character class used in switch */
#ifdef DEBUG
if(cfg.debug)
fputc(in, stderr); fflush(stderr);
#endif
if(isalpha(in))
dispatch_char='a';
else if(in==' ' || in=='\t')
dispatch_char=' ';
else if(isdigit(in))
dispatch_char='0';
else if(strchr("-:\r\n", in))
dispatch_char=in;
else if(in==EOF)
dispatch_char=NODATA;
else
dispatch_char='*';
do { /* while recycle */
recycle=0;
switch(dispatch_char|parse_mode) {
/* EOF */
case NODATA|PM_START:
case NODATA|PM_STATUSLINE:
case NODATA|PM_NEWLINE:
case NODATA|PM_NAME:
case NODATA|PM_SPACE:
case NODATA|PM_VALUE:
case NODATA|PM_FOLDING:
if(!res->user_break) {
fprintf(stderr, parse_mode==PM_START ? "\nNo HTTP response.\n" : "\nUnexpected end of file while parsing HTTP header.\n");
res->type=RES_FAIL;
res->url->proto.type=PT_INTERNAL; /* don't keep in history */
}
return;
case 'a'|PM_START:
parse_mode=PM_STATUSLINE;
/* fallthrough */
case ' '|PM_STATUSLINE:
case 'a'|PM_STATUSLINE:
case '0'|PM_STATUSLINE:
case '*'|PM_STATUSLINE:
case '\r'|PM_STATUSLINE:
break; /* we don't handle status line... */
case '\n'|PM_STATUSLINE:
parse_mode=PM_FIRST_NEWLINE;
break;
/* beginning of header line */
case 'a'|PM_FIRST_NEWLINE:
case 'a'|PM_NEWLINE:
headers->header=realloc(headers->header, sizeof(struct Header[++headers->count]));
headers->header[headers->count-1].name=strdup("");
headers->header[headers->count-1].value=strdup("");
parse_mode=PM_NAME;
/* fallthrough */
case 'a'|PM_NAME:
case '0'|PM_NAME:
case '-'|PM_NAME:
add_char(&headers->header[headers->count-1].name, tolower(in)); /* store name char, ignoring case */
break;
case ':'|PM_NAME:
parse_mode=PM_SPACE;
break;
case ' '|PM_SPACE:
parse_mode=PM_VALUE;
break;
case 'a'|PM_VALUE:
case '0'|PM_VALUE:
case ' '|PM_VALUE:
case '-'|PM_VALUE:
case ':'|PM_VALUE:
case '*'|PM_VALUE:
add_char(&headers->header[headers->count-1].value, in);
break;
case '\r'|PM_VALUE:
break;
case '\n'|PM_VALUE:
parse_mode=PM_NEWLINE;
break;
/* ' ' at line start => folded header line */
case ' '|PM_NEWLINE:
add_char(&headers->header[headers->count-1].value, ' '); /* add one space */
parse_mode=PM_FOLDING;
break;
case ' '|PM_FOLDING: /* skip more blank space at folding start */
break;
case 'a'|PM_FOLDING:
case '0'|PM_FOLDING:
case '-'|PM_FOLDING:
case ':'|PM_FOLDING:
case '*'|PM_FOLDING:
parse_mode=PM_VALUE;
recycle=1;
break;
/* empty line? */
case '\r'|PM_NEWLINE:
case '\r'|PM_FIRST_NEWLINE:
break;
case '\n'|PM_NEWLINE:
case '\n'|PM_FIRST_NEWLINE:
parse_mode=PM_END;
break;
/* errors handling */
case 'a'|PM_SPACE:
case '0'|PM_SPACE:
case '-'|PM_SPACE:
case ':'|PM_SPACE:
case '*'|PM_SPACE:
DMSG(("\n"));
fprintf(stderr, "HTTP header parsing error (missing ' ' after ':')\n");
parse_mode=PM_VALUE;
recycle=1;
break;
case '\n'|PM_NAME:
case '\n'|PM_SPACE:
case '\n'|PM_FOLDING:
DMSG(("\n"));
fprintf(stderr, "HTTP header parsing error (unexpected line end)\n");
parse_mode=PM_NEWLINE;
break;
default:
DMSG(("\n"));
fprintf(stderr, "HTTP header parsing error (unexpected character)\n");
} /* switch */
} while(recycle);
} while(parse_mode!=PM_END);
#ifdef DEBUG
if(cfg.debug)
dump_headers(&res->handle.http->headers);
#endif
}
|