1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
|
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.jar.Attributes;
import java.util.jar.Attributes.Name;
import java.util.jar.Manifest;
import java.util.List;
import java.util.ArrayList;
import org.testng.annotations.Test;
import static org.testng.Assert.*;
/**
* @test
* @bug 8066619
* @run testng ValueUtf8Coding
* @summary Tests encoding and decoding manifest header values to and from
* UTF-8 with the complete Unicode character set.
*/ /*
* see also "../tools/launcher/UnicodeTest.java" for manifest attributes
* parsed during launch
*/
public class ValueUtf8Coding {
/**
* Maximum number of bytes of UTF-8 encoded characters in one header value.
* <p>
* There are too many different Unicode code points (more than one million)
* to fit all into one manifest value. The specifications state:
* <q>Implementations should support 65535-byte (not character) header
* values, and 65535 headers per file. They might run out of memory,
* but there should not be hard-coded limits below these values.</q>
*
* @see <a
* href="{@docRoot}/../specs/jar/jar.html#Notes_on_Manifest_and_Signature_Files">
* Notes on Manifest and Signature Files</a>
*/
static final int SUPPORTED_VALUE_LENGTH = 65535;
/**
* Returns {@code true} if {@code codePoint} is known not to be a supported
* character in manifest header values. Explicitly forbidden in manifest
* header values are according to a statement from the specifications:
* <q>otherchar: any UTF-8 character except NUL, CR and LF</q>.
* {@code NUL} ({@code 0x0}), however, works just fine and might have been
* used and might still be.
*
* @see <a href="{@docRoot}/../specs/jar/jar.html#Section-Specification">
* Jar File Specification</a>
*/
static boolean isUnsupportedManifestValueCharacter(int codePoint) {
return codePoint == '\r' /* CR */ || codePoint == '\n' /* LF */;
};
/**
* Produces a list of strings with all Unicode characters except those
* explicitly invalid in manifest header values.
* Each string is filled with as many characters as fit into
* {@link #SUPPORTED_VALUE_LENGTH} bytes with UTF-8 encoding except the
* last string which contains the remaining characters. Each of those
* strings becomes a header value the number of which 65535 should be
* supported per file.
*
* @see <a
* href="{@docRoot}/../specs/jar/jar.html#Notes_on_Manifest_and_Signature_Files">
* Notes on Manifest and Signature Files</a>
*/
static List<String> produceValuesWithAllUnicodeCharacters() {
ArrayList<String> values = new ArrayList<>();
byte[] valueBuf = new byte[SUPPORTED_VALUE_LENGTH];
int pos = 0;
for (int codePoint = Character.MIN_CODE_POINT;
codePoint <= Character.MAX_CODE_POINT; codePoint++) {
if (isUnsupportedManifestValueCharacter(codePoint)) {
continue;
}
byte[] charBuf = Character.toString(codePoint).getBytes(UTF_8);
if (pos + charBuf.length > valueBuf.length) {
values.add(new String(valueBuf, 0, pos, UTF_8));
pos = 0;
}
System.arraycopy(charBuf, 0, valueBuf, pos, charBuf.length);
pos += charBuf.length;
}
if (pos > 0) {
values.add(new String(valueBuf, 0, pos, UTF_8));
}
// minimum number of headers supported is the same as the minimum size
// of each header value in bytes
assertTrue(values.size() <= SUPPORTED_VALUE_LENGTH);
return values;
}
/**
* Returns simple, valid, short, and distinct manifest header names.
* The returned name cannot collide with "{@code Manifest-Version}" because
* the returned string does not contain "{@code -}".
*/
static Name azName(int seed) {
StringBuffer name = new StringBuffer();
do {
name.insert(0, (char) (seed % 26 + (seed < 26 ? 'A' : 'a')));
seed = seed / 26 - 1;
} while (seed >= 0);
return new Name(name.toString());
}
/**
* Writes and reads a manifest with the complete Unicode character set.
* The characters are grouped into manifest header values with about as
* many bytes as allowed each, utilizing a single big manifest.
* <p>
* This test assumes that a manifest is encoded and decoded correctly if
* writing and then reading it again results in a manifest with identical
* values as the original. The test is not about other aspects of writing
* and reading manifests than only that, given the fact and the way it
* works for some characters such as the most widely and often used ones,
* it also works for the complete Unicode character set just the same.
* <p>
* Only header values are tested. The set of allowed characters for header
* names are much more limited and are a different topic entirely and most
* simple ones are used here as necessary just to get valid and different
* ones (see {@link #azName}).
* <p>
* Because the current implementation under test uses different portions
* of code depending on where the value occurs to read or write, each
* character is tested in each of the three positions:<ul>
* <li>main attribute header,</li>
* <li>named section name, and</li>
* <li>named sections header values</li>
* </ul>
* Implementation of writing the main section headers in
* {@link Attributes#writeMain(java.io.DataOutputStream)} differs from the
* one writing named section headers in
* {@link Attributes#write(java.io.DataOutputStream)} regarding the special
* order of {@link Name#MANIFEST_VERSION} and
* {@link Name#SIGNATURE_VERSION} and also
* {@link Manifest#read(java.io.InputStream)} at least potentially reads
* main sections differently than reading named sections names headers in
* {@link Attributes#read(Manifest.FastInputStream, byte[])}.
*/
@Test
public void testCompleteUnicodeCharacterSet() throws IOException {
Manifest mf = new Manifest();
mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0");
List<String> values = produceValuesWithAllUnicodeCharacters();
for (int i = 0; i < values.size(); i++) {
Name name = azName(i);
String value = values.get(i);
mf.getMainAttributes().put(name, value);
Attributes attributes = new Attributes();
mf.getEntries().put(value, attributes);
attributes.put(name, value);
}
mf = writeAndRead(mf);
for (int i = 0; i < values.size(); i++) {
String value = values.get(i);
Name name = azName(i);
assertEquals(mf.getMainAttributes().getValue(name), value,
"main attributes header value");
Attributes attributes = mf.getAttributes(value);
assertNotNull(attributes, "named section");
assertEquals(attributes.getValue(name), value,
"named section attributes value");
}
}
static Manifest writeAndRead(Manifest mf) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
mf.write(out);
byte[] mfBytes = out.toByteArray();
System.out.println("-".repeat(72));
System.out.print(new String(mfBytes, UTF_8));
System.out.println("-".repeat(72));
ByteArrayInputStream in = new ByteArrayInputStream(mfBytes);
return new Manifest(in);
}
}
|