File: http-parse-header.c

package info (click to toggle)
netrik 1.16.1-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,288 kB
  • sloc: ansic: 6,657; sh: 994; makefile: 120
file content (254 lines) | stat: -rw-r--r-- 6,841 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
/*
   netrik -- The ANTRIK Internet Viewer
   Copyright (C) Olaf D. Buddenhagen AKA antrik, et al (see AUTHORS)
   Published under the GNU GPL; see LICENSE for details.
*/

/*
 * http-parse-header.c -- parse the HTTP response head
 *
 * (C) 2002 antrik
 *
 * This file contains the parse_header() function, which reads the complete
 * HTTP head from the resource, and extracts all headers.
 */

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "debug.h"
#include "http-parse-header.h"
#include "load.h"

#define NODATA 0x4000    /* don't interfere with normal chars or PM_-values */

static int sgetc(struct Resource *input);    /* get one character from the socket */
static void add_char(char **str, char chr);    /* append character to string */

#ifdef DEBUG
static void dump_headers(const struct Http_headers *headers);    /* dump all extracted headers */
#endif


#ifdef DEBUG
/* dump all extracted headers */
static void dump_headers(headers)
const struct Http_headers       *headers;
{
   int  cur_header;

   debug_printf("extracted headers:\n");

   for(cur_header=0; cur_header<headers->count; ++cur_header)
      debug_printf("%s: %s\n", headers->header[cur_header].name, headers->header[cur_header].value);

   debug_printf("\n");
}
#endif

/* read a character from socket */
static int sgetc(input)
struct Resource	*input;
{
   /* fill buffer */
   if(input->buf_ptr==input->buf_end) {    /* buf empty */
      load(input);    /* -> read next block */
      input->buf_ptr=input->buf;    /* reset read pointer */
      if(input->buf_ptr==input->buf_end)    /* still empty => eof */
	 return EOF;
   }

   /* read char from buffer */
   return (unsigned)*input->buf_ptr++;
}

/* append a character to a string */
static void add_char(str, chr)
char	**str;
char	chr;
{
   *str=realloc(*str, sizeof(char[strlen(*str)+2]));
   sprintf(strchr(*str, '\0'), "%c", chr);
}

void parse_header(res)
struct Resource	*res;
{
   struct Http_headers	*headers=&res->handle.http->headers;

   enum Parse_mode {
      PM_START=0x000,    /* before anything read */
      PM_STATUSLINE=0x100,    /* inside response status line */
      PM_FIRST_NEWLINE=0x200,    /* after end of status line (like PM_NEWLINE, except no folding possible) */
      PM_NAME=0x300,    /* inside header name */
      PM_SPACE=0x400,    /* ' ' after ':' at end of header name */
      PM_VALUE=0x500,    /* inside header value */
      PM_NEWLINE=0x600,    /* after '\n' */
      PM_FOLDING=0x700,    /* ' ' after newline (folded header value) */
      PM_END=0x800    /* whole header parsed */
   } parse_mode=PM_START;

   int	recycle;    /* have to do more handling with same input char */

   DMSG(("parsing HTTP headers...\n"));

   do {    /* while not PM_END */
      const int	in=sgetc(res);    /* input character from HTTP */
      int	dispatch_char;    /* character class used in switch */

#ifdef DEBUG
      if(cfg.debug)
	 fputc(in, stderr); fflush(stderr);
#endif

      if(isalpha(in))
	 dispatch_char='a';
      else if(in==' ' || in=='\t')
	 dispatch_char=' ';
      else if(isdigit(in))
	 dispatch_char='0';
      else if(strchr("-:\r\n", in))
	 dispatch_char=in;
      else if(in==EOF)
	 dispatch_char=NODATA;
      else
	 dispatch_char='*';

      do {    /* while recycle */
	 recycle=0;
	 switch(dispatch_char|parse_mode) {
	    /* EOF */
	    case NODATA|PM_START:
            case NODATA|PM_STATUSLINE:
            case NODATA|PM_NEWLINE:
            case NODATA|PM_NAME:
            case NODATA|PM_SPACE:
            case NODATA|PM_VALUE:
            case NODATA|PM_FOLDING:
	       if(!res->user_break) {
		  fprintf(stderr, parse_mode==PM_START ? "\nNo HTTP response.\n" : "\nUnexpected end of file while parsing HTTP header.\n");
		  res->type=RES_FAIL;
		  res->url->proto.type=PT_INTERNAL;    /* don't keep in history */
	       }
	       return;

	    case 'a'|PM_START:
	       parse_mode=PM_STATUSLINE;
	       /* fallthrough */

	    case ' '|PM_STATUSLINE:
	    case 'a'|PM_STATUSLINE:
	    case '0'|PM_STATUSLINE:
	    case '*'|PM_STATUSLINE:
	    case '\r'|PM_STATUSLINE:
	       break;    /* we don't handle status line... */

	    case '\n'|PM_STATUSLINE:
	       parse_mode=PM_FIRST_NEWLINE;
	       break;

	    /* beginning of header line */
	    case 'a'|PM_FIRST_NEWLINE:
	    case 'a'|PM_NEWLINE:
	       headers->header=realloc(headers->header, sizeof(struct Header[++headers->count]));
	       headers->header[headers->count-1].name=strdup("");
	       headers->header[headers->count-1].value=strdup("");

	       parse_mode=PM_NAME;
	       /* fallthrough */

	    case 'a'|PM_NAME:
	    case '0'|PM_NAME:
	    case '-'|PM_NAME:
	       add_char(&headers->header[headers->count-1].name, tolower(in));    /* store name char, ignoring case */
	       break;
	       
	    case ':'|PM_NAME:
	       parse_mode=PM_SPACE;
	       break;

	    case ' '|PM_SPACE:
	       parse_mode=PM_VALUE;
	       break;

	    case 'a'|PM_VALUE:
	    case '0'|PM_VALUE:
	    case ' '|PM_VALUE:
	    case '-'|PM_VALUE:
	    case ':'|PM_VALUE:
	    case '*'|PM_VALUE:
	       add_char(&headers->header[headers->count-1].value, in);
	       break;

	    case '\r'|PM_VALUE:
	       break;

	    case '\n'|PM_VALUE:
	       parse_mode=PM_NEWLINE;
	       break;

	    /* ' ' at line start => folded header line */
	    case ' '|PM_NEWLINE:
	       add_char(&headers->header[headers->count-1].value, ' ');    /* add one space */
	       
	       parse_mode=PM_FOLDING;
	       break;

	    case ' '|PM_FOLDING:    /* skip more blank space at folding start */
	       break;

	    case 'a'|PM_FOLDING:
	    case '0'|PM_FOLDING:
	    case '-'|PM_FOLDING:
	    case ':'|PM_FOLDING:
	    case '*'|PM_FOLDING:
	       parse_mode=PM_VALUE;
	       recycle=1;
	       break;

	    /* empty line? */
	    case '\r'|PM_NEWLINE:
	    case '\r'|PM_FIRST_NEWLINE:
	       break;

	    case '\n'|PM_NEWLINE:
	    case '\n'|PM_FIRST_NEWLINE:
	       parse_mode=PM_END;
	       break;

	    /* errors handling */

	    case 'a'|PM_SPACE:
	    case '0'|PM_SPACE:
	    case '-'|PM_SPACE:
	    case ':'|PM_SPACE:
	    case '*'|PM_SPACE:
	       DMSG(("\n"));
	       fprintf(stderr, "HTTP header parsing error (missing ' ' after ':')\n");
	       parse_mode=PM_VALUE;
	       recycle=1;
	       break;

	    case '\n'|PM_NAME:
	    case '\n'|PM_SPACE:
	    case '\n'|PM_FOLDING:
	       DMSG(("\n"));
	       fprintf(stderr, "HTTP header parsing error (unexpected line end)\n");
	       parse_mode=PM_NEWLINE;
	       break;

	    default:
	       DMSG(("\n"));
	       fprintf(stderr, "HTTP header parsing error (unexpected character)\n");
	 }    /* switch */
      } while(recycle);
   } while(parse_mode!=PM_END);

#ifdef DEBUG
   if(cfg.debug)
      dump_headers(&res->handle.http->headers);
#endif
}