File: HtmlParameterEncoder.cs

package info (click to toggle)
mono 6.8.0.105%2Bdfsg-3.3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,284,512 kB
  • sloc: cs: 11,172,132; xml: 2,850,069; ansic: 671,653; cpp: 122,091; perl: 59,366; javascript: 30,841; asm: 22,168; makefile: 20,093; sh: 15,020; python: 4,827; pascal: 925; sql: 859; sed: 16; php: 1
file content (239 lines) | stat: -rw-r--r-- 9,491 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
//------------------------------------------------------------------------------
// <copyright file="HtmlParameterEncoder.cs" company="Microsoft">
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// </copyright>
//------------------------------------------------------------------------------

namespace System.Web.Security.AntiXss {
    using System;
    using System.Collections;
    using System.Text;
    using System.Threading;

    /// <summary>
    /// The type of space encoding to use.
    /// </summary>
    internal enum EncodingType {
        /// <summary>
        /// Encode spaces for use in query strings
        /// </summary>
        QueryString = 1,

        /// <summary>
        /// Encode spaces for use in form data
        /// </summary>
        HtmlForm = 2
    }

    /// <summary>
    /// Provides Html Parameter Encoding methods.
    /// </summary>
    internal static class HtmlParameterEncoder {

        /// <summary>
        /// The value to use when encoding a space for query strings.
        /// </summary>
        private static readonly char[] QueryStringSpace = "%20".ToCharArray();

        /// <summary>
        /// The value to use when encoding a space for form data.
        /// </summary>
        private static readonly char[] FormStringSpace = "+".ToCharArray();

        /// <summary>
        /// The values to output for each character.
        /// </summary>
        private static Lazy<char[][]> characterValuesLazy = new Lazy<char[][]>(InitialiseSafeList);

        /// <summary>
        /// Encodes a string for query string encoding and returns the encoded string.
        /// </summary>
        /// <param name="s">The text to URL-encode.</param>
        /// <param name="encoding">The encoding for the text parameter.</param>
        /// <returns>The URL-encoded text.</returns>
        /// <remarks>URL encoding ensures that all browsers will correctly transmit text in URL strings. 
        /// Characters such as a question mark (?), ampersand (&amp;), slash mark (/), and spaces might be truncated or corrupted by some browsers. 
        /// As a result, these characters must be encoded in &lt;a&gt; tags or in query strings where the strings can be re-sent by a browser 
        /// in a request string.</remarks>
        /// <exception cref="ArgumentNullException">Thrown if the encoding is null.</exception>
        internal static string QueryStringParameterEncode(string s, Encoding encoding) {
            return FormQueryEncode(s, encoding, EncodingType.QueryString);
        }

        /// <summary>
        /// Encodes a string for form URL encoding and returns the encoded string.
        /// </summary>
        /// <param name="s">The text to URL-encode.</param>
        /// <param name="encoding">The encoding for the text parameter.</param>
        /// <returns>The URL-encoded text.</returns>
        /// <remarks>URL encoding ensures that all browsers will correctly transmit text in URL strings. 
        /// Characters such as a question mark (?), ampersand (&amp;), slash mark (/), and spaces might be truncated or corrupted by some browsers. 
        /// As a result, these characters must be encoded in &lt;a&gt; tags or in query strings where the strings can be re-sent by a browser 
        /// in a request string.</remarks>
        /// <exception cref="ArgumentNullException">Thrown if the encoding is null.</exception>
        internal static string FormStringParameterEncode(string s, Encoding encoding) {
            return FormQueryEncode(s, encoding, EncodingType.HtmlForm);
        }

        /// <summary>
        /// Encodes a string for Query String or Form Data encoding.
        /// </summary>
        /// <param name="s">The text to URL-encode.</param>
        /// <param name="encoding">The encoding for the text parameter.</param>
        /// <param name="encodingType">The encoding type to use.</param>
        /// <returns>The encoded text.</returns>
        private static string FormQueryEncode(string s, Encoding encoding, EncodingType encodingType) {
            return FormQueryEncode(s, encoding, encodingType, characterValuesLazy);
        }

        private static string FormQueryEncode(string s, Encoding encoding, EncodingType encodingType, Lazy<char[][]> characterValuesLazy) {
            if (string.IsNullOrEmpty(s)) {
                return s;
            }

            if (encoding == null) {
                throw new ArgumentNullException("encoding");
            }

            var characterValues = characterValuesLazy.Value;

            // RFC 3986 states strings must be converted to their UTF8 value before URL encoding.
            // See http://tools.ietf.org/html/rfc3986
            // Conversion to char[] keeps null characters inline.
            byte[] utf8Bytes = encoding.GetBytes(s.ToCharArray());
            char[] encodedInput = new char[utf8Bytes.Length * 3]; // Each byte can potentially be encoded as %xx
            int outputLength = 0;

            for (int characterPosition = 0; characterPosition < utf8Bytes.Length; characterPosition++) {
                byte currentCharacter = utf8Bytes[characterPosition];

                if (currentCharacter == 0x00 || currentCharacter == 0x20 || currentCharacter > characterValues.Length || characterValues[currentCharacter] != null) {
                    // character needs to be encoded
                    char[] encodedCharacter;

                    if (currentCharacter == 0x20) {
                        switch (encodingType) {
                            case EncodingType.QueryString:
                                encodedCharacter = QueryStringSpace;
                                break;

                            // Special case for Html Form data, from http://www.w3.org/TR/html401/appendix/notes.html#non-ascii-chars
                            case EncodingType.HtmlForm:
                                encodedCharacter = FormStringSpace;
                                break;

                            default:
                                throw new ArgumentOutOfRangeException("encodingType");
                        }
                    }
                    else {
                        encodedCharacter = characterValues[currentCharacter];
                    }

                    for (int j = 0; j < encodedCharacter.Length; j++) {
                        encodedInput[outputLength++] = encodedCharacter[j];
                    }
                }
                else {
                    // character does not need encoding
                    encodedInput[outputLength++] = (char)currentCharacter;
                }
            }

            return new string(encodedInput, 0, outputLength);
        }

        /// <summary>
        /// Initializes the HTML safe list.
        /// </summary>
        private static char[][] InitialiseSafeList() {
            char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator);
            SafeList.PunchSafeList(ref result, UrlParameterSafeList());
            return result;
        }

        /// <summary>
        /// Provides the safe characters for URL parameter encoding.
        /// </summary>
        /// <returns>The safe characters for URL parameter encoding.</returns>
        private static IEnumerable UrlParameterSafeList() {
            // Hyphen
            yield return 0x2D;

            // Full stop/period
            yield return 0x2E;

            // Digits
            for (int i = 0x30; i <= 0x39; i++) {
                yield return i;
            }

            // Upper case alphabet
            for (int i = 0x41; i <= 0x5A; i++) {
                yield return i;
            }

            // Underscore
            yield return 0x5F;

            // Lower case alphabet
            for (int i = 0x61; i <= 0x7A; i++) {
                yield return i;
            }

            // Tilde
            yield return 0x7E;
        }

        #region UrlPathEncode Helpers

        /// <summary>
        /// The values to output for each character.
        /// </summary>
        private static Lazy<char[][]> pathCharacterValuesLazy = new Lazy<char[][]>(InitialisePathSafeList);

        internal static string UrlPathEncode(string s, Encoding encoding) {
            return FormQueryEncode(s, encoding, EncodingType.QueryString, pathCharacterValuesLazy);
        }

        /// <summary>
        /// Initializes the HTML safe list.
        /// </summary>
        private static char[][] InitialisePathSafeList() {
            char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator);
            SafeList.PunchSafeList(ref result, UrlPathSafeList());
            return result;
        }

        /// <summary>
        /// Provides the safe characters for URL path encoding.
        /// </summary>
        /// <returns>The safe characters for URL path encoding.</returns>
        private static IEnumerable UrlPathSafeList() {

            foreach (var c in UrlParameterSafeList()) {
                yield return c;
            }

            // Hash
            yield return 0x23;

            // Percent
            yield return 0x25;

            // Forward slash
            yield return 0x2F;

            // Backwards slash
            yield return 0x5C;

            // Left parenthesis
            yield return 0x28;

            //Right parenthesis
            yield return 0x29;
        }

        #endregion
    }
}