File: SurrogatePairs.java

package info (click to toggle)
openjdk-23 23.0.2%2B7-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 815,324 kB
  • sloc: java: 5,632,909; cpp: 1,303,022; xml: 1,237,193; ansic: 419,177; asm: 404,932; objc: 20,978; sh: 15,486; javascript: 11,040; python: 6,802; makefile: 2,331; perl: 357; awk: 351; sed: 172; pascal: 103; exp: 26; jsp: 24; csh: 3
file content (91 lines) | stat: -rw-r--r-- 3,927 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/*
 * Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

/*
 * @test
 * @bug 4396708
 * @summary Test URL encoder and decoder on a string that contains
 * surrogate pairs.
 * @run junit SurrogatePairs
 */

import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.stream.Collectors;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.*;

/*
 * Surrogate pairs are two character Unicode sequences where the first
 * character lies in the range [d800, dbff] and the second character lies
 * in the range [dc00, dfff]. They are used as an escaping mechanism to add
 * 1M more characters to Unicode.
 */
public class SurrogatePairs {

    public static String[][] arguments() {
        return new String[][] {
                {"\uD800\uDC00", "%F0%90%80%80"},
                {"\uD800\uDFFF", "%F0%90%8F%BF"},
                {"\uDBFF\uDC00", "%F4%8F%B0%80"},
                {"\uDBFF\uDFFF", "%F4%8F%BF%BF"},
                {"1\uDBFF\uDC00", "1%F4%8F%B0%80"},
                {"@\uDBFF\uDC00", "%40%F4%8F%B0%80"},
                {"\uDBFF\uDC001", "%F4%8F%B0%801"},
                {"\uDBFF\uDC00@", "%F4%8F%B0%80%40"},
                {"\u0101\uDBFF\uDC00", "%C4%81%F4%8F%B0%80"},
                {"\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
                {"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
                {"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
                {"\uDBFF\uDC00\u0101", "%F4%8F%B0%80%C4%81"},
                {"\uDE0A\uD83D", "%3F%3F"},
                {"1\uDE0A\uD83D", "1%3F%3F"},
                {"@\uDE0A\uD83D", "%40%3F%3F"},
                {"1@1\uDE0A\uD800\uDC00 \uD83D", "1%401%3F%F0%90%80%80+%3F"}
        };
    }

    @ParameterizedTest
    @MethodSource("arguments")
    public void test(String str, String correctEncoding) {
        String encoded = URLEncoder.encode(str, UTF_8);
        assertEquals(correctEncoding, encoded, () ->
                "str=%s, expected=%s, actual=%s"
                        .formatted(escape(str), escape(correctEncoding), escape(encoded)));

        // Map unmappable characters to '?'
        String cleanStr = new String(str.getBytes(UTF_8), UTF_8);
        String decoded = URLDecoder.decode(encoded, UTF_8);
        assertEquals(cleanStr, decoded, () ->
                "expected=%s, actual=%s".formatted(escape(str), escape(decoded)));
    }

    private static String escape(String s) {
        return s.chars().mapToObj(c -> String.format("\\u%04x", c))
                .collect(Collectors.joining());
    }
}