1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
From: Markus Koschany <apo@debian.org>
Date: Sun, 3 Jan 2016 02:49:40 +0100
Subject: no chardet
Remove chardet related code because the build-dependency is currently not
available.
Forwarded: not-needed
---
.../validator/htmlparser/extra/ChardetSniffer.java | 84 ----------------------
.../htmlparser/io/HtmlInputStreamReader.java | 5 --
2 files changed, 89 deletions(-)
delete mode 100644 src/nu/validator/htmlparser/extra/ChardetSniffer.java
diff --git a/src/nu/validator/htmlparser/extra/ChardetSniffer.java b/src/nu/validator/htmlparser/extra/ChardetSniffer.java
deleted file mode 100644
index a757503..0000000
--- a/src/nu/validator/htmlparser/extra/ChardetSniffer.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2008 Mozilla Foundation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-package nu.validator.htmlparser.extra;
-
-import java.io.IOException;
-import java.nio.charset.UnsupportedCharsetException;
-
-import nu.validator.htmlparser.io.Encoding;
-
-import org.mozilla.intl.chardet.nsDetector;
-import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
-import org.mozilla.intl.chardet.nsPSMDetector;
-
-import com.ibm.icu.text.CharsetDetector;
-
-public class ChardetSniffer implements nsICharsetDetectionObserver {
-
- private final byte[] source;
-
- private final int length;
-
- private Encoding returnValue = null;
-
- /**
- * @param source
- */
- public ChardetSniffer(final byte[] source, final int length) {
- this.source = source;
- this.length = length;
- }
-
- public Encoding sniff() throws IOException {
- nsDetector detector = new nsDetector(nsPSMDetector.ALL);
- detector.Init(this);
- detector.DoIt(source, length, false);
- detector.DataEnd();
- if (returnValue != null && returnValue != Encoding.WINDOWS1252 && returnValue.isAsciiSuperset()) {
- return returnValue;
- } else {
- return null;
- }
- }
-
- public static void main(String[] args) {
- String[] detectable = CharsetDetector.getAllDetectableCharsets();
- for (int i = 0; i < detectable.length; i++) {
- String charset = detectable[i];
- System.out.println(charset);
- }
- }
-
- public void Notify(String charsetName) {
- try {
- Encoding enc = Encoding.forName(charsetName);
- Encoding actual = enc.getActualHtmlEncoding();
- if (actual != null) {
- enc = actual;
- }
- returnValue = enc;
- } catch (UnsupportedCharsetException e) {
- returnValue = null;
- }
- }
-}
diff --git a/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
index 553c937..0ec65ff 100755
--- a/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
+++ b/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
@@ -35,7 +35,6 @@ import java.nio.charset.CodingErrorAction;
import nu.validator.htmlparser.common.ByteReadable;
import nu.validator.htmlparser.common.Heuristics;
import nu.validator.htmlparser.common.XmlViolationPolicy;
-import nu.validator.htmlparser.extra.ChardetSniffer;
import nu.validator.htmlparser.extra.IcuDetectorSniffer;
import nu.validator.htmlparser.impl.Tokenizer;
@@ -127,10 +126,6 @@ public final class HtmlInputStreamReader extends Reader implements
position = 0;
encoding = (new MetaSniffer(errorHandler, this)).sniff(this);
if (encoding == null
- && (heuristics == Heuristics.CHARDET || heuristics == Heuristics.ALL)) {
- encoding = (new ChardetSniffer(byteArray, limit)).sniff();
- }
- if (encoding == null
&& (heuristics == Heuristics.ICU || heuristics == Heuristics.ALL)) {
position = 0;
encoding = (new IcuDetectorSniffer(this)).sniff();
|