File: AttributeParser.java

package info (click to toggle)
tomcat7 7.0.56-3%2Bdeb8u11
  • links: PTS, VCS
  • area: main
  • in suites: jessie
  • size: 35,688 kB
  • ctags: 41,823
  • sloc: java: 249,464; xml: 51,553; jsp: 3,037; sh: 1,361; perl: 269; makefile: 195
file content (351 lines) | stat: -rw-r--r-- 13,144 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jasper.compiler;

/**
 * Converts a JSP attribute value into the unquoted equivalent. The attribute
 * may contain EL expressions, in which case care needs to be taken to avoid any
 * ambiguities. For example, consider the attribute values "${1+1}" and
 * "\${1+1}". After unquoting, both appear as "${1+1}" but the first should
 * evaluate to "2" and the second to "${1+1}". Literal \, $ and # need special
 * treatment to ensure there is no ambiguity. The JSP attribute unquoting
 * covers \\, \", \', \$, \#, %\>, <\%, ' and "
 */
public class AttributeParser {

    /* System property that controls if the strict quoting rules are applied. */ 
    private static final boolean STRICT_QUOTE_ESCAPING = Boolean.valueOf(
            System.getProperty(
                    "org.apache.jasper.compiler.Parser.STRICT_QUOTE_ESCAPING",
                    "true")).booleanValue();

    /**
     * Parses the provided input String as a JSP attribute and returns an
     * unquoted value.
     * 
     * @param input         The input.
     * @param quote         The quote character for the attribute or 0 for
     *                      scripting expressions.
     * @param isELIgnored   Is expression language being ignored on the page
     *                      where the JSP attribute is defined.
     * @param isDeferredSyntaxAllowedAsLiteral
     *                      Are deferred expressions treated as literals?
     * @return              An unquoted JSP attribute that, if it contains
     *                      expression language can be safely passed to the EL
     *                      processor without fear of ambiguity.
     */
    public static String getUnquoted(String input, char quote,
            boolean isELIgnored, boolean isDeferredSyntaxAllowedAsLiteral) {
        return (new AttributeParser(input, quote, isELIgnored,
                isDeferredSyntaxAllowedAsLiteral,
                STRICT_QUOTE_ESCAPING)).getUnquoted();
    }

    /**
     * Provided solely for unit test purposes and allows per call overriding of
     * the STRICT_QUOTE_ESCAPING system property.
     * 
     * @param input         The input.
     * @param quote         The quote character for the attribute or 0 for
     *                      scripting expressions.
     * @param isELIgnored   Is expression language being ignored on the page
     *                      where the JSP attribute is defined.
     * @param isDeferredSyntaxAllowedAsLiteral
     *                      Are deferred expressions treated as literals?
     * @param strict        The value to use for STRICT_QUOTE_ESCAPING.
     * @return              An unquoted JSP attribute that, if it contains
     *                      expression language can be safely passed to the EL
     *                      processor without fear of ambiguity.
     */
    protected static String getUnquoted(String input, char quote,
            boolean isELIgnored, boolean isDeferredSyntaxAllowedAsLiteral,
            boolean strict) {
        return (new AttributeParser(input, quote, isELIgnored,
                isDeferredSyntaxAllowedAsLiteral, strict)).getUnquoted();
    }

    /* The quoted input string. */
    private final String input;
    
    /* The quote used for the attribute - null for scripting expressions. */
    private final char quote;
    
    /* Is expression language being ignored - affects unquoting. \$ and \# are
     * treated as literals rather than quoted values. */
    private final boolean isELIgnored;
    
    /* Are deferred expression treated as literals */
    private final boolean isDeferredSyntaxAllowedAsLiteral;
    
    /* Overrides the STRICT_QUOTE_ESCAPING. Used for Unit tests only. */
    private final boolean strict;
    
    /* The type ($ or #) of expression. Literals have a type of null. */
    private char type;
    
    /* The length of the quoted input string. */
    private final int size;
    
    /* Tracks the current position of the parser in the input String. */
    private int i = 0;
    
    /* Indicates if the last character returned by nextChar() was escaped. */
    private boolean lastChEscaped = false;
    
    /* The unquoted result. */
    private StringBuilder result;


    /**
     * For test purposes.
     * @param input
     * @param quote
     * @param strict
     */
    private AttributeParser(String input, char quote,
            boolean isELIgnored, boolean isDeferredSyntaxAllowedAsLiteral,
            boolean strict) {
        this.input = input;
        this.quote = quote;
        this.isELIgnored = isELIgnored;
        this.isDeferredSyntaxAllowedAsLiteral =
            isDeferredSyntaxAllowedAsLiteral;
        this.strict = strict;
        this.type = getType(input);
        this.size = input.length();
        result = new StringBuilder(size);
    }

    /*
     * Work through input looking for literals and expressions until the input
     * has all been read.
     */
    private String getUnquoted() {
        while (i < size) {
            parseLiteral();
            parseEL();
        }
        return result.toString();
    }

    /*
     * This method gets the next unquoted character and looks for
     * - literals that need to be converted for EL processing
     *   \ -> type{'\\'}
     *   $ -> type{'$'}
     *   # -> type{'#'}
     * - start of EL
     *   ${
     *   #{
     * Note all the examples above *do not* include the escaping required to use
     * the values in Java code.
     */
    private void parseLiteral() {
        boolean foundEL = false;
        while (i < size && !foundEL) {
            char ch = nextChar();
            if (!isELIgnored && ch == '\\') {
                if (type == 0) {
                    result.append("\\");
                } else {
                    result.append(type);
                    result.append("{'\\\\'}");
                }
            } else if (!isELIgnored && ch == '$' && lastChEscaped){
                if (type == 0) {
                    result.append("\\$");
                } else {
                    result.append(type);
                    result.append("{'$'}");
                }
            } else if (!isELIgnored && ch == '#' && lastChEscaped){
                // Note if isDeferredSyntaxAllowedAsLiteral==true, \# will
                // not be treated as an escape
                if (type == 0) {
                    result.append("\\#");
                } else {
                    result.append(type);
                    result.append("{'#'}");
                }
            } else if (ch == type){
                if (i < size) {
                    char next = input.charAt(i);
                    if (next == '{') {
                        foundEL = true;
                        // Move back to start of EL
                        i--;
                    } else {
                        result.append(ch);
                    }
                } else {
                    result.append(ch);
                }
            } else {
                result.append(ch);
            }
        }
    }

    /*
     * For EL need to unquote everything but no need to convert anything. The
     * EL is terminated by '}'. The only other valid location for '}' is inside
     * a StringLiteral. The literals are delimited by '\'' or '\"'. The only
     * other valid location for '\'' or '\"' is also inside a StringLiteral. A
     * quote character inside a StringLiteral must be escaped if the same quote
     * character is used to delimit the StringLiteral.
     */
    private void parseEL() {
        boolean endEL = false;
        boolean insideLiteral = false;
        char literalQuote = 0;
        while (i < size && !endEL) {
            char ch = nextChar();
            if (ch == '\'' || ch == '\"') {
                if (insideLiteral) {
                    if (literalQuote == ch) {
                        insideLiteral = false;
                    }
                } else {
                    insideLiteral = true;
                    literalQuote = ch;
                }
                result.append(ch);
            } else if (ch == '\\') {
                result.append(ch);
                if (insideLiteral && size < i) {
                    ch = nextChar();
                    result.append(ch);
                }
            } else if (ch == '}') {
                if (!insideLiteral) {
                    endEL = true;
                }
                result.append(ch);
            } else {
                result.append(ch);
            }
        }
    }

    /*
     * Returns the next unquoted character and sets the lastChEscaped flag to
     * indicate if it was quoted/escaped or not.
     * &apos; is always unquoted to '
     * &quot; is always unquoted to "
     * \" is always unquoted to "
     * \' is always unquoted to '
     * \\ is always unquoted to \
     * \$ is unquoted to $ if EL is not being ignored
     * \# is unquoted to # if EL is not being ignored
     * <\% is always unquoted to <%
     * %\> is always unquoted to %>
     */
    private char nextChar() {
        lastChEscaped = false;
        char ch = input.charAt(i);
        
        if (ch == '&') {
            if (i + 5 < size && input.charAt(i + 1) == 'a' &&
                    input.charAt(i + 2) == 'p' && input.charAt(i + 3) == 'o' &&
                    input.charAt(i + 4) == 's' && input.charAt(i + 5) == ';') {
                ch = '\'';
                i += 6;
            } else if (i + 5 < size && input.charAt(i + 1) == 'q' &&
                    input.charAt(i + 2) == 'u' && input.charAt(i + 3) == 'o' &&
                    input.charAt(i + 4) == 't' && input.charAt(i + 5) == ';') {
                ch = '\"';
                i += 6;
            } else {
                ++i;
            }
        } else if (ch == '\\' && i + 1 < size) {
            ch = input.charAt(i + 1);
            if (ch == '\\' || ch == '\"' || ch == '\'' ||
                    (!isELIgnored &&
                            (ch == '$' ||
                                    (!isDeferredSyntaxAllowedAsLiteral &&
                                            ch == '#')))) {
                i += 2;
                lastChEscaped = true;
            } else {
                ch = '\\';
                ++i;
            }
        } else if (ch == '<' && (i + 2 < size) && input.charAt(i + 1) == '\\' &&
                input.charAt(i + 2) == '%') {
            // Note this is a hack since nextChar only returns a single char
            // It is safe since <% does not require special treatment for EL
            // or for literals
            result.append('<');
            i+=3;
            return '%';
        } else if (ch == '%' && i + 2 < size && input.charAt(i + 1) == '\\' &&
                input.charAt(i + 2) == '>') {
            // Note this is a hack since nextChar only returns a single char
            // It is safe since %> does not require special treatment for EL
            // or for literals
            result.append('%');
            i+=3;
            return '>';
        } else if (ch == quote && strict) {
            String msg = Localizer.getMessage("jsp.error.attribute.noescape",
                    input, ""+ quote);
            throw new IllegalArgumentException(msg);
        } else {
            ++i;
        }

        return ch;
    }

    /*
     * Determines the type of expression by looking for the first unquoted ${
     * or #{.
     */
    private char getType(String value) {
        if (value == null) {
            return 0;
        }

        if (isELIgnored) {
            return 0;
        }

        int j = 0;
        int len = value.length();
        char current;

        while (j < len) {
            current = value.charAt(j);
            if (current == '\\') {
                // Escape character - skip a character
                j++;
            } else if (current == '#' && !isDeferredSyntaxAllowedAsLiteral) {
                if (j < (len -1) && value.charAt(j + 1) == '{') {
                    return '#';
                }
            } else if (current == '$') {
                if (j < (len - 1) && value.charAt(j + 1) == '{') {
                    return '$';
                }
            }
            j++;
        }
        return 0;
    }
}