File: SurrogatePairs.java

package info (click to toggle)
openjdk-11 11.0.4%2B11-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 757,028 kB
  • sloc: java: 5,016,041; xml: 1,191,974; cpp: 934,731; ansic: 555,697; sh: 24,299; objc: 12,703; python: 3,602; asm: 3,415; makefile: 2,772; awk: 351; sed: 172; perl: 114; jsp: 24; csh: 3
file content (125 lines) | stat: -rw-r--r-- 4,496 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/*
 * Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

/*
 * @test
 * @bug 4396708
 * @summary Test URL encoder and decoder on a string that contains
 * surrogate pairs.
 *
 */

import java.io.*;
import java.net.*;

/*
 * Surrogate pairs are two character Unicode sequences where the first
 * character lies in the range [d800, dbff] and the second character lies
 * in the range [dc00, dfff]. They are used as an escaping mechanism to add
 * 1M more characters to Unicode.
 */
public class SurrogatePairs {

    static String[] testStrings = {"\uD800\uDC00",
                                   "\uD800\uDFFF",
                                   "\uDBFF\uDC00",
                                   "\uDBFF\uDFFF",
                                   "1\uDBFF\uDC00",
                                   "@\uDBFF\uDC00",
                                   "\uDBFF\uDC001",
                                   "\uDBFF\uDC00@",
                                   "\u0101\uDBFF\uDC00",
                                   "\uDBFF\uDC00\u0101"
    };

    static String[] correctEncodings = {"%F0%90%80%80",
                                        "%F0%90%8F%BF",
                                        "%F4%8F%B0%80",
                                        "%F4%8F%BF%BF",
                                        "1%F4%8F%B0%80",
                                        "%40%F4%8F%B0%80",
                                        "%F4%8F%B0%801",
                                        "%F4%8F%B0%80%40",
                                        "%C4%81%F4%8F%B0%80",
                                        "%F4%8F%B0%80%C4%81"
    };

    public static void main(String[] args) throws Exception {

        for (int i=0; i < testStrings.length; i++) {
            test(testStrings[i], correctEncodings[i]);
        }
    }

    private static void test(String str, String correctEncoding)
        throws Exception {

        System.out.println("Unicode bytes of test string are: "
                           + getHexBytes(str));

        String encoded = URLEncoder.encode(str, "UTF-8");

        System.out.println("URLEncoding is: " + encoded);

        if (encoded.equals(correctEncoding))
            System.out.println("The encoding is correct!");
        else {
            throw new Exception("The encoding is incorrect!" +
                                " It should be " + correctEncoding);
        }

        String decoded = URLDecoder.decode(encoded, "UTF-8");

        System.out.println("Unicode bytes for URLDecoding are: "
                           + getHexBytes(decoded));

        if (str.equals(decoded))
            System.out.println("The decoding is correct");
        else {
            throw new Exception("The decoded is not equal to the original");
        }
        System.out.println("---");
    }

    private static String getHexBytes(String s) throws Exception {
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < s.length(); i++) {

            int a = s.charAt(i);
            int b1 = (a >>8) & 0xff;
            int b2 = (byte)a;
            int b11 = (b1>>4) & 0x0f;
            int b12 = b1 & 0x0f;
            int b21 = (b2 >>4) & 0x0f;
            int b22 = b2 & 0x0f;

            sb.append(Integer.toHexString(b11));
            sb.append(Integer.toHexString(b12));
            sb.append(Integer.toHexString(b21));
            sb.append(Integer.toHexString(b22));
            sb.append(' ');
        }
        return sb.toString();
    }

}