File: no-chardet.patch

package info (click to toggle)
libhtml5parser-java 1.4%2Br1.3.1-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 3,376 kB
  • ctags: 4,545
  • sloc: java: 27,064; cpp: 158; xml: 141; sh: 136; ruby: 44; makefile: 5
file content (127 lines) | stat: -rw-r--r-- 5,048 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
From: Markus Koschany <apo@debian.org>
Date: Sun, 3 Jan 2016 02:49:40 +0100
Subject: no chardet

Remove chardet related code because the build-dependency is currently not
available.

Forwarded: not-needed
---
 .../validator/htmlparser/extra/ChardetSniffer.java | 84 ----------------------
 .../htmlparser/io/HtmlInputStreamReader.java       |  5 --
 2 files changed, 89 deletions(-)
 delete mode 100644 src/nu/validator/htmlparser/extra/ChardetSniffer.java

diff --git a/src/nu/validator/htmlparser/extra/ChardetSniffer.java b/src/nu/validator/htmlparser/extra/ChardetSniffer.java
deleted file mode 100644
index a757503..0000000
--- a/src/nu/validator/htmlparser/extra/ChardetSniffer.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2008 Mozilla Foundation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a 
- * copy of this software and associated documentation files (the "Software"), 
- * to deal in the Software without restriction, including without limitation 
- * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
- * and/or sell copies of the Software, and to permit persons to whom the 
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in 
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
- * DEALINGS IN THE SOFTWARE.
- */
-
-package nu.validator.htmlparser.extra;
-
-import java.io.IOException;
-import java.nio.charset.UnsupportedCharsetException;
-
-import nu.validator.htmlparser.io.Encoding;
-
-import org.mozilla.intl.chardet.nsDetector;
-import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
-import org.mozilla.intl.chardet.nsPSMDetector;
-
-import com.ibm.icu.text.CharsetDetector;
-
-public class ChardetSniffer implements nsICharsetDetectionObserver {
-
-    private final byte[] source;
-
-    private final int length;
-    
-    private Encoding returnValue = null;
-    
-    /**
-     * @param source
-     */
-    public ChardetSniffer(final byte[] source, final int length) {
-        this.source = source;
-        this.length = length;
-    }
-    
-    public Encoding sniff() throws IOException {
-        nsDetector detector = new nsDetector(nsPSMDetector.ALL);
-        detector.Init(this);
-        detector.DoIt(source, length, false);
-        detector.DataEnd();
-        if (returnValue != null && returnValue != Encoding.WINDOWS1252 && returnValue.isAsciiSuperset()) {
-            return returnValue;
-        } else {
-            return null;
-        }
-    }
-    
-    public static void main(String[] args) {
-        String[] detectable = CharsetDetector.getAllDetectableCharsets();
-        for (int i = 0; i < detectable.length; i++) {
-            String charset = detectable[i];
-            System.out.println(charset);
-        }
-    }
-
-    public void Notify(String charsetName) {
-        try {
-            Encoding enc = Encoding.forName(charsetName);
-            Encoding actual = enc.getActualHtmlEncoding();
-            if (actual != null) {
-                enc = actual;
-            }
-            returnValue = enc;
-        } catch (UnsupportedCharsetException e) {
-            returnValue = null;
-        }
-    }
-}
diff --git a/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
index 553c937..0ec65ff 100755
--- a/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
+++ b/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
@@ -35,7 +35,6 @@ import java.nio.charset.CodingErrorAction;
 import nu.validator.htmlparser.common.ByteReadable;
 import nu.validator.htmlparser.common.Heuristics;
 import nu.validator.htmlparser.common.XmlViolationPolicy;
-import nu.validator.htmlparser.extra.ChardetSniffer;
 import nu.validator.htmlparser.extra.IcuDetectorSniffer;
 import nu.validator.htmlparser.impl.Tokenizer;
 
@@ -127,10 +126,6 @@ public final class HtmlInputStreamReader extends Reader implements
             position = 0;
             encoding = (new MetaSniffer(errorHandler, this)).sniff(this);
             if (encoding == null
-                    && (heuristics == Heuristics.CHARDET || heuristics == Heuristics.ALL)) {
-                encoding = (new ChardetSniffer(byteArray, limit)).sniff();
-            }
-            if (encoding == null
                     && (heuristics == Heuristics.ICU || heuristics == Heuristics.ALL)) {
                 position = 0;
                 encoding = (new IcuDetectorSniffer(this)).sniff();