File: HttpHeaderParser.java

package info (click to toggle)
tomcat11 11.0.11-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 47,028 kB
  • sloc: java: 366,244; xml: 55,681; jsp: 4,783; sh: 1,304; perl: 324; makefile: 25; ansic: 14
file content (409 lines) | stat: -rw-r--r-- 16,963 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.apache.tomcat.util.http.parser;

import java.io.IOException;
import java.nio.ByteBuffer;

import org.apache.tomcat.util.buf.MessageBytes;
import org.apache.tomcat.util.http.HeaderUtil;
import org.apache.tomcat.util.http.MimeHeaders;
import org.apache.tomcat.util.res.StringManager;

public class HttpHeaderParser {

    private static final StringManager sm = StringManager.getManager(HttpHeaderParser.class);

    private static final byte CR = (byte) '\r';
    private static final byte LF = (byte) '\n';
    private static final byte SP = (byte) ' ';
    private static final byte HT = (byte) '\t';
    private static final byte COLON = (byte) ':';
    private static final byte A = (byte) 'A';
    private static final byte a = (byte) 'a';
    private static final byte Z = (byte) 'Z';
    private static final byte LC_OFFSET = A - a;

    private final HeaderDataSource source;
    private final MimeHeaders headers;
    private final boolean tolerantEol;
    private final HeaderParseData headerData = new HeaderParseData();

    private HeaderParsePosition headerParsePos = HeaderParsePosition.HEADER_START;
    private byte prevChr = 0;
    private byte chr = 0;


    public HttpHeaderParser(HeaderDataSource source, MimeHeaders headers, boolean tolerantEol) {
        this.source = source;
        this.headers = headers;
        this.tolerantEol = tolerantEol;
    }


    public void recycle() {
        chr = 0;
        prevChr = 0;
        headerParsePos = HeaderParsePosition.HEADER_START;
        headerData.recycle();
    }


    /**
     * Parse an HTTP header.
     *
     * @return One of {@link HeaderParseStatus#NEED_MORE_DATA}, {@link HeaderParseStatus#HAVE_MORE_HEADERS} or
     *             {@link HeaderParseStatus#DONE}.
     *
     * @throws IOException If an error occurs during the parsing of the headers
     */
    public HeaderParseStatus parseHeader() throws IOException {

        while (headerParsePos == HeaderParsePosition.HEADER_START) {

            // Read new bytes if needed
            if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) {
                if (!source.fillHeaderBuffer()) {
                    return HeaderParseStatus.NEED_MORE_DATA;
                }
            }

            prevChr = chr;
            chr = source.getHeaderByteBuffer().get();

            if (chr == CR && prevChr != CR) {
                // Possible start of CRLF - process the next byte.
            } else if (chr == LF) {
                if (!tolerantEol && prevChr != CR) {
                    throw new IllegalArgumentException(sm.getString("httpHeaderParser.invalidCrlfNoCR"));
                }
                return HeaderParseStatus.DONE;
            } else {
                if (prevChr == CR) {
                    // Must have read two bytes (first was CR, second was not LF)
                    source.getHeaderByteBuffer().position(source.getHeaderByteBuffer().position() - 2);
                } else {
                    // Must have only read one byte
                    source.getHeaderByteBuffer().position(source.getHeaderByteBuffer().position() - 1);
                }
                break;
            }
        }

        if (headerParsePos == HeaderParsePosition.HEADER_START) {
            // Mark the current buffer position
            headerData.start = source.getHeaderByteBuffer().position();
            headerData.lineStart = headerData.start;
            headerParsePos = HeaderParsePosition.HEADER_NAME;
        }

        //
        // Reading the header name
        // Header name is always US-ASCII
        //

        while (headerParsePos == HeaderParsePosition.HEADER_NAME) {

            // Read new bytes if needed
            if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) {
                if (!source.fillHeaderBuffer()) {
                    return HeaderParseStatus.NEED_MORE_DATA;
                }
            }

            int pos = source.getHeaderByteBuffer().position();
            chr = source.getHeaderByteBuffer().get();
            if (chr == COLON) {
                if (headerData.start == pos) {
                    // Zero length header name - not valid.
                    // skipLine() will handle the error
                    return skipLine();
                }
                headerParsePos = HeaderParsePosition.HEADER_VALUE_START;
                headerData.headerValue = headers.addValue(source.getHeaderByteBuffer().array(), headerData.start,
                        pos - headerData.start);
                pos = source.getHeaderByteBuffer().position();
                // Mark the current buffer position
                headerData.start = pos;
                headerData.realPos = pos;
                headerData.lastSignificantChar = pos;
                break;
            } else if (!HttpParser.isToken(chr)) {
                // Non-token characters are illegal in header names
                // Parsing continues so the error can be reported in context
                headerData.lastSignificantChar = pos;
                source.getHeaderByteBuffer().position(source.getHeaderByteBuffer().position() - 1);
                // skipLine() will handle the error
                return skipLine();
            }

            // chr is next byte of header name. Convert to lowercase.
            if (chr >= A && chr <= Z) {
                source.getHeaderByteBuffer().put(pos, (byte) (chr - LC_OFFSET));
            }
        }

        // Skip the line and ignore the header
        if (headerParsePos == HeaderParsePosition.HEADER_SKIPLINE) {
            return skipLine();
        }

        //
        // Reading the header value (which can be spanned over multiple lines)
        //

        while (headerParsePos == HeaderParsePosition.HEADER_VALUE_START ||
                headerParsePos == HeaderParsePosition.HEADER_VALUE ||
                headerParsePos == HeaderParsePosition.HEADER_MULTI_LINE) {

            if (headerParsePos == HeaderParsePosition.HEADER_VALUE_START) {
                // Skipping spaces
                while (true) {
                    // Read new bytes if needed
                    if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) {
                        if (!source.fillHeaderBuffer()) {
                            return HeaderParseStatus.NEED_MORE_DATA;
                        }
                    }

                    chr = source.getHeaderByteBuffer().get();
                    if (chr != SP && chr != HT) {
                        headerParsePos = HeaderParsePosition.HEADER_VALUE;
                        source.getHeaderByteBuffer().position(source.getHeaderByteBuffer().position() - 1);
                        // Avoids prevChr = chr at start of header value
                        // parsing which causes problems when chr is CR
                        // (in the case of an empty header value)
                        chr = 0;
                        break;
                    }
                }
            }
            if (headerParsePos == HeaderParsePosition.HEADER_VALUE) {

                // Reading bytes until the end of the line
                boolean eol = false;
                while (!eol) {

                    // Read new bytes if needed
                    if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) {
                        if (!source.fillHeaderBuffer()) {
                            return HeaderParseStatus.NEED_MORE_DATA;
                        }
                    }

                    prevChr = chr;
                    chr = source.getHeaderByteBuffer().get();
                    if (chr == CR && prevChr != CR) {
                        // CR is only permitted at the start of a CRLF sequence.
                        // Possible start of CRLF - process the next byte.
                    } else if (chr == LF) {
                        if (!tolerantEol && prevChr != CR) {
                            throw new IllegalArgumentException(sm.getString("httpHeaderParser.invalidCrlfNoCR"));
                        }
                        eol = true;
                    } else if (prevChr == CR) {
                        // Invalid value - also need to delete header
                        return skipLine();
                    } else if (HttpParser.isControl(chr) && chr != HT) {
                        // Invalid value - also need to delete header
                        return skipLine();
                    } else if (chr == SP || chr == HT) {
                        source.getHeaderByteBuffer().put(headerData.realPos, chr);
                        headerData.realPos++;
                    } else {
                        source.getHeaderByteBuffer().put(headerData.realPos, chr);
                        headerData.realPos++;
                        headerData.lastSignificantChar = headerData.realPos;
                    }
                }

                // Ignore whitespaces at the end of the line
                headerData.realPos = headerData.lastSignificantChar;

                // Checking the first character of the new line. If the character
                // is a LWS, then it's a multiline header
                headerParsePos = HeaderParsePosition.HEADER_MULTI_LINE;
            }
            // Read new bytes if needed
            if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) {
                if (!source.fillHeaderBuffer()) {
                    return HeaderParseStatus.NEED_MORE_DATA;
                }
            }

            byte peek = source.getHeaderByteBuffer().get(source.getHeaderByteBuffer().position());
            if (headerParsePos == HeaderParsePosition.HEADER_MULTI_LINE) {
                if (peek != SP && peek != HT) {
                    headerParsePos = HeaderParsePosition.HEADER_START;
                    break;
                } else {
                    // Copying one extra space in the buffer (since there must
                    // be at least one space inserted between the lines)
                    source.getHeaderByteBuffer().put(headerData.realPos, peek);
                    headerData.realPos++;
                    headerParsePos = HeaderParsePosition.HEADER_VALUE_START;
                }
            }
        }
        // Set the header value
        headerData.headerValue.setBytes(source.getHeaderByteBuffer().array(), headerData.start,
                headerData.lastSignificantChar - headerData.start);
        headerData.recycle();
        return HeaderParseStatus.HAVE_MORE_HEADERS;
    }


    private HeaderParseStatus skipLine() throws IOException {
        // Parse the rest of the invalid header so we can construct a useful
        // exception and/or debug message.
        headerParsePos = HeaderParsePosition.HEADER_SKIPLINE;
        boolean eol = false;

        // Reading bytes until the end of the line
        while (!eol) {

            // Read new bytes if needed
            if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) {
                if (!source.fillHeaderBuffer()) {
                    return HeaderParseStatus.NEED_MORE_DATA;
                }
            }

            int pos = source.getHeaderByteBuffer().position();
            prevChr = chr;
            chr = source.getHeaderByteBuffer().get();
            if (chr == CR) {
                // Skip
            } else if (chr == LF) {
                if (!tolerantEol && prevChr != CR) {
                    throw new IllegalArgumentException(sm.getString("httpHeaderParser.invalidCrlfNoCR"));
                }
                eol = true;
            } else {
                headerData.lastSignificantChar = pos;
            }
        }

        throw new IllegalArgumentException(sm.getString("httpHeaderParser.invalidHeader",
                HeaderUtil.toPrintableString(source.getHeaderByteBuffer().array(), headerData.lineStart,
                        headerData.lastSignificantChar - headerData.lineStart + 1)));
    }


    public enum HeaderParseStatus {
        DONE,
        HAVE_MORE_HEADERS,
        NEED_MORE_DATA
    }


    public enum HeaderParsePosition {
        /**
         * Start of a new header. A CRLF here means that there are no more headers. Any other character starts a header
         * name.
         */
        HEADER_START,
        /**
         * Reading a header name. All characters of header are HTTP_TOKEN_CHAR. Header name is followed by ':'. No
         * whitespace is allowed.<br>
         * Any non-HTTP_TOKEN_CHAR (this includes any whitespace) encountered before ':' will result in the whole line
         * being ignored.
         */
        HEADER_NAME,
        /**
         * Skipping whitespace before text of header value starts, either on the first line of header value (just after
         * ':') or on subsequent lines when it is known that subsequent line starts with SP or HT.
         */
        HEADER_VALUE_START,
        /**
         * Reading the header value. We are inside the value. Either on the first line or on any subsequent line. We
         * come into this state from HEADER_VALUE_START after the first non-SP/non-HT byte is encountered on the line.
         */
        HEADER_VALUE,
        /**
         * Before reading a new line of a header. Once the next byte is peeked, the state changes without advancing our
         * position. The state becomes either HEADER_VALUE_START (if that first byte is SP or HT), or HEADER_START
         * (otherwise).
         */
        HEADER_MULTI_LINE,
        /**
         * Reading all bytes until the next CRLF. The line is being ignored.
         */
        HEADER_SKIPLINE
    }


    private static class HeaderParseData {
        /**
         * The first character of the header line.
         */
        int lineStart = 0;
        /**
         * When parsing header name: first character of the header.<br>
         * When skipping broken header line: first character of the header.<br>
         * When parsing header value: first character after ':'.
         */
        int start = 0;
        /**
         * When parsing header name: not used (stays as 0).<br>
         * When skipping broken header line: not used (stays as 0).<br>
         * When parsing header value: starts as the first character after ':'. Then is increased as far as more bytes of
         * the header are harvested. Bytes from buf[pos] are copied to buf[realPos]. Thus the string from [start] to
         * [realPos-1] is the prepared value of the header, with whitespaces removed as needed.<br>
         */
        int realPos = 0;
        /**
         * When parsing header name: not used (stays as 0).<br>
         * When skipping broken header line: last non-CR/non-LF character.<br>
         * When parsing header value: position after the last not-LWS character.<br>
         */
        int lastSignificantChar = 0;
        /**
         * MB that will store the value of the header. It is null while parsing header name and is created after the
         * name has been parsed.
         */
        MessageBytes headerValue = null;

        public void recycle() {
            lineStart = 0;
            start = 0;
            realPos = 0;
            lastSignificantChar = 0;
            headerValue = null;
        }
    }


    public interface HeaderDataSource {
        /**
         * Read more data into the header buffer. The implementation is expected to determine if blocking or not
         * blocking IO should be used.
         *
         * @return {@code true} if more data was added to the buffer, otherwise {@code false}
         *
         * @throws IOException If an I/O error occurred while obtaining more header data
         */
        boolean fillHeaderBuffer() throws IOException;

        /**
         * Obtain a reference to the buffer containing the header data.
         *
         * @return The buffer containing the header data
         */
        ByteBuffer getHeaderByteBuffer();
    }
}